diff --git a/extensions/_unitex.cpp b/extensions/_unitex.cpp index 1a39017eba808f04e82ce259653787ec7c40518e..7b154ff164e6dd8227f846e9743dc7f2eb77394c 100644 --- a/extensions/_unitex.cpp +++ b/extensions/_unitex.cpp @@ -39,14 +39,14 @@ This function launches an Unitex command.\n\n\ static PyObject *unitex_tool(PyObject *self, PyObject *args); PyObject *unitex_tool(PyObject *self, PyObject *args) { - char *command; - if (!PyArg_ParseTuple(args, "s", &command)) - return NULL; + char *command; + if (!PyArg_ParseTuple(args, "s", &command)) + return NULL; - unsigned int ret; - ret = UnitexTool_public_run_string(command); + unsigned int ret; + ret = UnitexTool_public_run_string(command); - return Py_BuildValue("O", ret ? Py_False: Py_True); + return Py_BuildValue("O", ret ? Py_False: Py_True); } @@ -68,24 +68,24 @@ This function loads a dictionary in the persistent space.\n\n\ static PyObject *unitex_load_persistent_dictionary(PyObject *self, PyObject *args); PyObject *unitex_load_persistent_dictionary(PyObject *self, PyObject *args) { - char *path; - if (!PyArg_ParseTuple(args, "s", &path)) - return NULL; - PyObject *result = NULL; + char *path; + if (!PyArg_ParseTuple(args, "s", &path)) + return NULL; + PyObject *result = NULL; - size_t length = strlen(path)+0x200; + size_t length = strlen(path)+0x200; - char *persistent_path = (char*)malloc(length+1); - if (persistent_path == NULL) { - return NULL; - } + char *persistent_path = (char*)malloc(length+1); + if (persistent_path == NULL) { + return NULL; + } - if (persistence_public_load_dictionary(path, persistent_path, length)) { - result = Py_BuildValue("s", persistent_path); - } - free(persistent_path); + if (persistence_public_load_dictionary(path, persistent_path, length)) { + result = Py_BuildValue("s", persistent_path); + } + free(persistent_path); - return result; + return result; } /* 'unitex_load_persistent_fst2' function */ @@ -101,24 +101,24 @@ This function loads a grammar in the persistent space.\n\n\ static PyObject *unitex_load_persistent_fst2(PyObject *self, PyObject *args); PyObject *unitex_load_persistent_fst2(PyObject *self, PyObject *args) { - char *path; - if (!PyArg_ParseTuple(args, "s", &path)) - return NULL; - PyObject *result = NULL; + char *path; + if (!PyArg_ParseTuple(args, "s", &path)) + return NULL; + PyObject *result = NULL; - size_t length = strlen(path)+0x200; + size_t length = strlen(path)+0x200; - char *persistent_path = (char*)malloc(length+1); - if (persistent_path == NULL) { - return NULL; - } + char *persistent_path = (char*)malloc(length+1); + if (persistent_path == NULL) { + return NULL; + } - if (persistence_public_load_fst2(path, persistent_path, length)) { - result = Py_BuildValue("s", persistent_path); - } - free(persistent_path); + if (persistence_public_load_fst2(path, persistent_path, length)) { + result = Py_BuildValue("s", persistent_path); + } + free(persistent_path); - return result; + return result; } /* 'unitex_load_persistent_alphabet' function */ @@ -134,24 +134,24 @@ This function loads an alphabet in the persistent space.\n\n\ static PyObject *unitex_load_persistent_alphabet(PyObject *self, PyObject *args); PyObject *unitex_load_persistent_alphabet(PyObject *self, PyObject *args) { - char *path; - if (!PyArg_ParseTuple(args, "s", &path)) - return NULL; - PyObject *result = NULL; + char *path; + if (!PyArg_ParseTuple(args, "s", &path)) + return NULL; + PyObject *result = NULL; - size_t length = strlen(path)+0x200; + size_t length = strlen(path)+0x200; - char *persistent_path = (char*)malloc(length+1); - if (persistent_path == NULL) { - return NULL; - } + char *persistent_path = (char*)malloc(length+1); + if (persistent_path == NULL) { + return NULL; + } - if (persistence_public_load_alphabet(path, persistent_path, length)) { - result = Py_BuildValue("s", persistent_path); - } - free(persistent_path); + if (persistence_public_load_alphabet(path, persistent_path, length)) { + result = Py_BuildValue("s", persistent_path); + } + free(persistent_path); - return result; + return result; } @@ -167,13 +167,13 @@ This function unloads a dictionary from persistent space.\n\n\ static PyObject *unitex_free_persistent_dictionary(PyObject *self, PyObject *args); PyObject *unitex_free_persistent_dictionary(PyObject *self, PyObject *args) { - char *path; - if (!PyArg_ParseTuple(args, "s", &path)) - return NULL; + char *path; + if (!PyArg_ParseTuple(args, "s", &path)) + return NULL; - persistence_public_unload_dictionary(path); + persistence_public_unload_dictionary(path); - Py_RETURN_NONE; + Py_RETURN_NONE; } /* 'unitex_free_persistent_fst2' function */ @@ -187,13 +187,13 @@ This function unloads a grammar from persistent space.\n\n\ static PyObject *unitex_free_persistent_fst2(PyObject *self, PyObject *args); PyObject *unitex_free_persistent_fst2(PyObject *self, PyObject *args) { - char *path; - if (!PyArg_ParseTuple(args, "s", &path)) - return NULL; + char *path; + if (!PyArg_ParseTuple(args, "s", &path)) + return NULL; - persistence_public_unload_fst2(path); + persistence_public_unload_fst2(path); - Py_RETURN_NONE; + Py_RETURN_NONE; } /* 'unitex_free_persistent_alphabet' function */ @@ -207,13 +207,13 @@ This function unloads an alphabet from persistent space.\n\n\ static PyObject *unitex_free_persistent_alphabet(PyObject *self, PyObject *args); PyObject *unitex_free_persistent_alphabet(PyObject *self, PyObject *args) { - char *path; - if (!PyArg_ParseTuple(args, "s", &path)) - return NULL; + char *path; + if (!PyArg_ParseTuple(args, "s", &path)) + return NULL; - persistence_public_unload_alphabet(path); + persistence_public_unload_alphabet(path); - Py_RETURN_NONE; + Py_RETURN_NONE; } @@ -230,14 +230,14 @@ space.\n\n\ static PyObject *unitex_is_persistent_dictionary(PyObject *self, PyObject *args); PyObject *unitex_is_persistent_dictionary(PyObject *self, PyObject *args) { - char *path; - if (!PyArg_ParseTuple(args, "s", &path)) - return NULL; + char *path; + if (!PyArg_ParseTuple(args, "s", &path)) + return NULL; - unsigned int ret; - ret = persistence_public_is_persisted_dictionary_filename(path); + unsigned int ret; + ret = persistence_public_is_persisted_dictionary_filename(path); - return Py_BuildValue("O", ret ? Py_True: Py_False); + return Py_BuildValue("O", ret ? Py_True: Py_False); } /* 'unitex_is_persistent_fst2' function */ @@ -252,14 +252,14 @@ space.\n\n\ static PyObject *unitex_is_persistent_fst2(PyObject *self, PyObject *args); PyObject *unitex_is_persistent_fst2(PyObject *self, PyObject *args) { - char *path; - if (!PyArg_ParseTuple(args, "s", &path)) - return NULL; + char *path; + if (!PyArg_ParseTuple(args, "s", &path)) + return NULL; - unsigned int ret; - ret = persistence_public_is_persisted_fst2_filename(path); + unsigned int ret; + ret = persistence_public_is_persisted_fst2_filename(path); - return Py_BuildValue("O", ret ? Py_True: Py_False); + return Py_BuildValue("O", ret ? Py_True: Py_False); } /* 'unitex_is_persistent_alphabet' function */ @@ -274,14 +274,14 @@ space.\n\n\ static PyObject *unitex_is_persistent_alphabet(PyObject *self, PyObject *args); PyObject *unitex_is_persistent_alphabet(PyObject *self, PyObject *args) { - char *path; - if (!PyArg_ParseTuple(args, "s", &path)) - return NULL; + char *path; + if (!PyArg_ParseTuple(args, "s", &path)) + return NULL; - unsigned int ret; - ret = persistence_public_is_persisted_alphabet_filename(path); + unsigned int ret; + ret = persistence_public_is_persisted_alphabet_filename(path); - return Py_BuildValue("O", ret ? Py_True: Py_False); + return Py_BuildValue("O", ret ? Py_True: Py_False); } @@ -301,12 +301,12 @@ but should be used for debug purposes only.\n\n\ static PyObject *unitex_enable_stdout(PyObject *self, PyObject *noarg); PyObject *unitex_enable_stdout(PyObject *self, PyObject *noarg) { - enum stdwrite_kind swk = stdwrite_kind_out; + enum stdwrite_kind swk = stdwrite_kind_out; - unsigned int ret; - ret = SetStdWriteCB(swk, 0, NULL, NULL); + unsigned int ret; + ret = SetStdWriteCB(swk, 0, NULL, NULL); - return Py_BuildValue("O", ret ? Py_True: Py_False); + return Py_BuildValue("O", ret ? Py_True: Py_False); } /* 'unitex_enable_stderr' function */ @@ -320,12 +320,12 @@ but should be used for debug purposes only.\n\n\ static PyObject *unitex_enable_stderr(PyObject *self, PyObject *noarg); PyObject *unitex_enable_stderr(PyObject *self, PyObject *noarg) { - enum stdwrite_kind swk = stdwrite_kind_err; + enum stdwrite_kind swk = stdwrite_kind_err; - unsigned int ret; - ret = SetStdWriteCB(swk, 0, NULL, NULL); + unsigned int ret; + ret = SetStdWriteCB(swk, 0, NULL, NULL); - return Py_BuildValue("O", ret ? Py_True: Py_False); + return Py_BuildValue("O", ret ? Py_True: Py_False); } /* 'unitex_disable_stdout' function */ @@ -340,12 +340,12 @@ improve performances.\n\n\ static PyObject *unitex_disable_stdout(PyObject *self, PyObject *noarg); PyObject *unitex_disable_stdout(PyObject *self, PyObject *noarg) { - enum stdwrite_kind swk = stdwrite_kind_out; + enum stdwrite_kind swk = stdwrite_kind_out; - unsigned int ret; - ret = SetStdWriteCB(swk, 1, NULL, NULL); + unsigned int ret; + ret = SetStdWriteCB(swk, 1, NULL, NULL); - return Py_BuildValue("O", ret ? Py_True: Py_False); + return Py_BuildValue("O", ret ? Py_True: Py_False); } /* 'unitex_disable_stderr' function */ @@ -360,12 +360,12 @@ improve performances.\n\n\ static PyObject *unitex_disable_stderr(PyObject *self, PyObject *noarg); PyObject *unitex_disable_stderr(PyObject *self, PyObject *noarg) { - enum stdwrite_kind swk = stdwrite_kind_err; + enum stdwrite_kind swk = stdwrite_kind_err; - unsigned int ret; - ret = SetStdWriteCB(swk, 1, NULL, NULL); + unsigned int ret; + ret = SetStdWriteCB(swk, 1, NULL, NULL); - return Py_BuildValue("O", ret ? Py_True: Py_False); + return Py_BuildValue("O", ret ? Py_True: Py_False); } /* 'unitex_cp' function */ @@ -382,15 +382,15 @@ used to virtualize a file or to dump a virtual file.\n\n\ static PyObject *unitex_cp(PyObject *self, PyObject *args); PyObject *unitex_cp(PyObject *self, PyObject *args) { - char *source_path; - char *target_path; - if (!PyArg_ParseTuple(args, "ss", &source_path, &target_path)) - return NULL; + char *source_path; + char *target_path; + if (!PyArg_ParseTuple(args, "ss", &source_path, &target_path)) + return NULL; - unsigned int ret; - ret = CopyUnitexFile(source_path, target_path); + unsigned int ret; + ret = CopyUnitexFile(source_path, target_path); - return Py_BuildValue("O", ret ? Py_False: Py_True); + return Py_BuildValue("O", ret ? Py_False: Py_True); } /* 'unitex_rm' function */ @@ -405,14 +405,14 @@ filesystem or the disk filesystem.\n\n\ static PyObject *unitex_rm(PyObject *self, PyObject *args); PyObject *unitex_rm(PyObject *self, PyObject *args) { - char *path; - if (!PyArg_ParseTuple(args, "s", &path)) - return NULL; + char *path; + if (!PyArg_ParseTuple(args, "s", &path)) + return NULL; - unsigned int ret; - ret = RemoveUnitexFile(path); + unsigned int ret; + ret = RemoveUnitexFile(path); - return Py_BuildValue("O", ret ? Py_False: Py_True); + return Py_BuildValue("O", ret ? Py_False: Py_True); } /* 'unitex_mv' function */ @@ -428,15 +428,15 @@ virtual filesystem or the disk filesystem.\n\n\ static PyObject *unitex_mv(PyObject *self, PyObject *args); PyObject *unitex_mv(PyObject *self, PyObject *args) { - char *old_path; - char *new_path; - if (!PyArg_ParseTuple(args, "ss", &old_path, &new_path)) - return NULL; + char *old_path; + char *new_path; + if (!PyArg_ParseTuple(args, "ss", &old_path, &new_path)) + return NULL; - unsigned int ret; - ret = RenameUnitexFile(old_path, new_path); + unsigned int ret; + ret = RenameUnitexFile(old_path, new_path); - return Py_BuildValue("O", ret ? Py_False: Py_True); + return Py_BuildValue("O", ret ? Py_False: Py_True); } /* 'unitex_mkdir' function */ @@ -450,14 +450,14 @@ This function creates a directory on the disk.\n\n\ static PyObject *unitex_mkdir(PyObject *self, PyObject *args); PyObject *unitex_mkdir(PyObject *self, PyObject *args) { - char *path; - if (!PyArg_ParseTuple(args, "s", &path)) - return NULL; + char *path; + if (!PyArg_ParseTuple(args, "s", &path)) + return NULL; - unsigned int ret; - ret = CreateUnitexFolder(path); + unsigned int ret; + ret = CreateUnitexFolder(path); - return Py_BuildValue("O", ret ? Py_False: Py_True); + return Py_BuildValue("O", ret ? Py_False: Py_True); } /* 'unitex_rmdir' function */ @@ -471,14 +471,14 @@ This function removes a directory from the disk.\n\n\ static PyObject *unitex_rmdir(PyObject *self, PyObject *args); PyObject *unitex_rmdir(PyObject *self, PyObject *args) { - char *path; - if (!PyArg_ParseTuple(args, "s", &path)) - return NULL; + char *path; + if (!PyArg_ParseTuple(args, "s", &path)) + return NULL; - unsigned int ret; - ret = RemoveUnitexFolder(path); + unsigned int ret; + ret = RemoveUnitexFolder(path); - return Py_BuildValue("O", ret ? Py_False: Py_True); + return Py_BuildValue("O", ret ? Py_False: Py_True); } /* 'unitex_ls' function */ @@ -493,32 +493,32 @@ This function lists (disk or virtual) directory contents.\n\n\ static PyObject *unitex_ls(PyObject *self, PyObject *args); PyObject *unitex_ls(PyObject *self, PyObject *args) { - char *path; - if (!PyArg_ParseTuple(args, "s", &path)) - return NULL; - - char **_file_list = GetUnitexFileList(path); - if (_file_list==NULL) - return PyList_New(0); - - unsigned int size = 0; - while ((*(_file_list + size))!=NULL) { - size ++; - } - - PyObject *file_list = PyList_New(size); - for (unsigned int i = 0; i != size; ++i) { - PyList_SET_ITEM(file_list, i, PyUnicode_FromString(_file_list[i])); - } - - char **_file_list_walk=_file_list; - while ((*_file_list_walk)!=NULL) { - free(*_file_list_walk); - _file_list_walk++; - } - free(_file_list); - - return file_list; + char *path; + if (!PyArg_ParseTuple(args, "s", &path)) + return NULL; + + char **_file_list = GetUnitexFileList(path); + if (_file_list==NULL) + return PyList_New(0); + + unsigned int size = 0; + while ((*(_file_list + size))!=NULL) { + size ++; + } + + PyObject *file_list = PyList_New(size); + for (unsigned int i = 0; i != size; ++i) { + PyList_SET_ITEM(file_list, i, PyUnicode_FromString(_file_list[i])); + } + + char **_file_list_walk=_file_list; + while ((*_file_list_walk)!=NULL) { + free(*_file_list_walk); + _file_list_walk++; + } + free(_file_list); + + return file_list; } /* 'unitex_read_file' function (UTF-8 encoding only)*/ @@ -533,32 +533,32 @@ This function read a file from the disk or from the virtual filesystem.\n\ static PyObject *unitex_read_file(PyObject *self, PyObject *args); PyObject *unitex_read_file(PyObject *self, PyObject *args) { - char *path; - if (!PyArg_ParseTuple(args, "s", &path)) - return NULL; - PyObject *content = NULL; + char *path; + if (!PyArg_ParseTuple(args, "s", &path)) + return NULL; + PyObject *content = NULL; UNITEXFILEMAPPED *amf; const void *buffer; size_t file_size; - GetUnitexFileReadBuffer(path, &amf, &buffer, &file_size); - const unsigned char* bufchar = (const unsigned char*)buffer; + GetUnitexFileReadBuffer(path, &amf, &buffer, &file_size); + const unsigned char* bufchar = (const unsigned char*)buffer; - size_t bom_size = 0; + size_t bom_size = 0; if (file_size>2) { if (((*(bufchar))==0xef) && ((*(bufchar+1))==0xbb) && ((*(bufchar+2))==0xbf)) { bom_size = 3; } - } + } - char* _content = (char*)malloc(file_size+1); - memcpy(_content, bufchar+bom_size, file_size-bom_size); + char* _content = (char*)malloc(file_size+1); + memcpy(_content, bufchar+bom_size, file_size-bom_size); - *(_content+file_size-bom_size) = '\0'; + *(_content+file_size-bom_size) = '\0'; - content = PyUnicode_FromString(_content); - free(_content); + content = PyUnicode_FromString(_content); + free(_content); CloseUnitexFileReadBuffer(amf, buffer, file_size); @@ -579,25 +579,25 @@ This function writes a file on the disk or on the virtual filesystem.\n\ static PyObject *unitex_write_file(PyObject *self, PyObject *args); PyObject *unitex_write_file(PyObject *self, PyObject *args) { - char *path; - PyObject *ustring; - int *use_bom; - if (!PyArg_ParseTuple(args, "sUi", &path, &ustring, &use_bom)) - return NULL; + char *path; + PyObject *ustring; + int *use_bom; + if (!PyArg_ParseTuple(args, "sUi", &path, &ustring, &use_bom)) + return NULL; - PyObject *bytes; - char *content; - Py_ssize_t length; + PyObject *bytes; + char *content; + Py_ssize_t length; - bytes = PyUnicode_AsUTF8String(ustring); - PyBytes_AsStringAndSize(bytes, &content, &length); + bytes = PyUnicode_AsUTF8String(ustring); + PyBytes_AsStringAndSize(bytes, &content, &length); - const unsigned char UTF8BOM[3] = { 0xef,0xbb,0xbf }; + const unsigned char UTF8BOM[3] = { 0xef,0xbb,0xbf }; - unsigned int ret; - ret = WriteUnitexFile(path, UTF8BOM, use_bom ? 3:0, content, length); + unsigned int ret; + ret = WriteUnitexFile(path, UTF8BOM, use_bom ? 3:0, content, length); - return Py_BuildValue("O", ret ? Py_False: Py_True); + return Py_BuildValue("O", ret ? Py_False: Py_True); } /* 'unitex_append_to_file' function */ @@ -613,84 +613,84 @@ This function writes at the end of an existing file (virtual or not).\n\ static PyObject *unitex_append_to_file(PyObject *self, PyObject *args); PyObject *unitex_append_to_file(PyObject *self, PyObject *args) { - char *path; - PyObject *ustring; - if (!PyArg_ParseTuple(args, "sU", &path, &ustring)) - return NULL; + char *path; + PyObject *ustring; + if (!PyArg_ParseTuple(args, "sU", &path, &ustring)) + return NULL; - PyObject *bytes; - char *content; - Py_ssize_t length; + PyObject *bytes; + char *content; + Py_ssize_t length; - bytes = PyUnicode_AsUTF8String(ustring); - PyBytes_AsStringAndSize(bytes, &content, &length); + bytes = PyUnicode_AsUTF8String(ustring); + PyBytes_AsStringAndSize(bytes, &content, &length); - unsigned int ret; - ret = AppendUnitexFile(path, content, length); + unsigned int ret; + ret = AppendUnitexFile(path, content, length); - return Py_BuildValue("O", ret ? Py_False: Py_True); + return Py_BuildValue("O", ret ? Py_False: Py_True); } static PyMethodDef unitex_methods[] = { - /* Unitex Tool function */ - {"unitex_tool", unitex_tool, METH_VARARGS, unitex_tool_docstring}, - - /* Persistence functions */ - {"unitex_load_persistent_dictionary", unitex_load_persistent_dictionary, METH_VARARGS, unitex_load_persistent_dictionary_docstring}, - {"unitex_load_persistent_fst2", unitex_load_persistent_fst2, METH_VARARGS, unitex_load_persistent_fst2_docstring}, - {"unitex_load_persistent_alphabet", unitex_load_persistent_alphabet, METH_VARARGS, unitex_load_persistent_alphabet_docstring}, - - {"unitex_free_persistent_dictionary", unitex_free_persistent_dictionary, METH_VARARGS, unitex_free_persistent_dictionary_docstring}, - {"unitex_free_persistent_fst2", unitex_free_persistent_fst2, METH_VARARGS, unitex_free_persistent_fst2_docstring}, - {"unitex_free_persistent_alphabet", unitex_free_persistent_alphabet, METH_VARARGS, unitex_free_persistent_alphabet_docstring}, - - {"unitex_is_persistent_dictionary", unitex_is_persistent_dictionary, METH_VARARGS, unitex_is_persistent_dictionary_docstring}, - {"unitex_is_persistent_fst2", unitex_is_persistent_fst2, METH_VARARGS, unitex_is_persistent_fst2_docstring}, - {"unitex_is_persistent_alphabet", unitex_is_persistent_alphabet, METH_VARARGS, unitex_is_persistent_alphabet_docstring}, - - /* I/O functions */ - {"unitex_enable_stdout", unitex_enable_stdout, METH_NOARGS, unitex_enable_stdout_docstring}, - {"unitex_disable_stdout", unitex_disable_stdout, METH_NOARGS, unitex_disable_stdout_docstring}, - {"unitex_enable_stderr", unitex_enable_stderr, METH_NOARGS, unitex_enable_stderr_docstring}, - {"unitex_disable_stderr", unitex_disable_stderr, METH_NOARGS, unitex_disable_stderr_docstring}, - - {"unitex_cp", unitex_cp, METH_VARARGS, unitex_cp_docstring}, - {"unitex_rm", unitex_rm, METH_VARARGS, unitex_rm_docstring}, - {"unitex_mv", unitex_mv, METH_VARARGS, unitex_mv_docstring}, - {"unitex_mkdir", unitex_mkdir, METH_VARARGS, unitex_mkdir_docstring}, - {"unitex_rmdir", unitex_rmdir, METH_VARARGS, unitex_rmdir_docstring}, - {"unitex_ls", unitex_ls, METH_VARARGS, unitex_ls_docstring}, - - {"unitex_read_file", unitex_read_file, METH_VARARGS, unitex_read_file_docstring}, - {"unitex_write_file", unitex_write_file, METH_VARARGS, unitex_write_file_docstring}, - {"unitex_append_to_file", unitex_append_to_file, METH_VARARGS, unitex_append_to_file_docstring}, - - {NULL, NULL, 0, NULL} + /* Unitex Tool function */ + {"unitex_tool", unitex_tool, METH_VARARGS, unitex_tool_docstring}, + + /* Persistence functions */ + {"unitex_load_persistent_dictionary", unitex_load_persistent_dictionary, METH_VARARGS, unitex_load_persistent_dictionary_docstring}, + {"unitex_load_persistent_fst2", unitex_load_persistent_fst2, METH_VARARGS, unitex_load_persistent_fst2_docstring}, + {"unitex_load_persistent_alphabet", unitex_load_persistent_alphabet, METH_VARARGS, unitex_load_persistent_alphabet_docstring}, + + {"unitex_free_persistent_dictionary", unitex_free_persistent_dictionary, METH_VARARGS, unitex_free_persistent_dictionary_docstring}, + {"unitex_free_persistent_fst2", unitex_free_persistent_fst2, METH_VARARGS, unitex_free_persistent_fst2_docstring}, + {"unitex_free_persistent_alphabet", unitex_free_persistent_alphabet, METH_VARARGS, unitex_free_persistent_alphabet_docstring}, + + {"unitex_is_persistent_dictionary", unitex_is_persistent_dictionary, METH_VARARGS, unitex_is_persistent_dictionary_docstring}, + {"unitex_is_persistent_fst2", unitex_is_persistent_fst2, METH_VARARGS, unitex_is_persistent_fst2_docstring}, + {"unitex_is_persistent_alphabet", unitex_is_persistent_alphabet, METH_VARARGS, unitex_is_persistent_alphabet_docstring}, + + /* I/O functions */ + {"unitex_enable_stdout", unitex_enable_stdout, METH_NOARGS, unitex_enable_stdout_docstring}, + {"unitex_disable_stdout", unitex_disable_stdout, METH_NOARGS, unitex_disable_stdout_docstring}, + {"unitex_enable_stderr", unitex_enable_stderr, METH_NOARGS, unitex_enable_stderr_docstring}, + {"unitex_disable_stderr", unitex_disable_stderr, METH_NOARGS, unitex_disable_stderr_docstring}, + + {"unitex_cp", unitex_cp, METH_VARARGS, unitex_cp_docstring}, + {"unitex_rm", unitex_rm, METH_VARARGS, unitex_rm_docstring}, + {"unitex_mv", unitex_mv, METH_VARARGS, unitex_mv_docstring}, + {"unitex_mkdir", unitex_mkdir, METH_VARARGS, unitex_mkdir_docstring}, + {"unitex_rmdir", unitex_rmdir, METH_VARARGS, unitex_rmdir_docstring}, + {"unitex_ls", unitex_ls, METH_VARARGS, unitex_ls_docstring}, + + {"unitex_read_file", unitex_read_file, METH_VARARGS, unitex_read_file_docstring}, + {"unitex_write_file", unitex_write_file, METH_VARARGS, unitex_write_file_docstring}, + {"unitex_append_to_file", unitex_append_to_file, METH_VARARGS, unitex_append_to_file_docstring}, + + {NULL, NULL, 0, NULL} }; #if PY_MAJOR_VERSION >= 3 static struct PyModuleDef unitex_module_def = { - PyModuleDef_HEAD_INIT, - "_unitex", - unitex_docstring, - -1, - unitex_methods + PyModuleDef_HEAD_INIT, + "_unitex", + unitex_docstring, + -1, + unitex_methods }; PyMODINIT_FUNC PyInit__unitex(void) { - PyObject *module = PyModule_Create(&unitex_module_def); + PyObject *module = PyModule_Create(&unitex_module_def); - if (module == NULL) - return NULL; - return module; + if (module == NULL) + return NULL; + return module; } #else PyMODINIT_FUNC init_unitex(void) { - PyObject *module = Py_InitModule3("_unitex", unitex_methods, unitex_docstring); + PyObject *module = Py_InitModule3("_unitex", unitex_methods, unitex_docstring); - if (module == NULL) - return; + if (module == NULL) + return; } #endif diff --git a/setup.py b/setup.py index 20761be75cbaae3ed2c66bdc24b51d531897dc64..562b1176aae7de6cb9a907ce31214fa52358f5d8 100644 --- a/setup.py +++ b/setup.py @@ -27,13 +27,13 @@ class CustomBuild(build): def run(self): # Unitex library compilation. command = "cd %s && make 64BITS=yes LIBRARY=yes TRE_DIRECT_COMPILE=yes DEBUG=yes" % os.path.join(UNITEX_INC, "build") - + try: process = subprocess.Popen(command, stderr=subprocess.PIPE, shell=True) except Exception as e: sys.stderr.write("Error in command: %s\n" % command) raise e - + process.wait() if process.returncode != 0: @@ -57,7 +57,7 @@ class CustomBuild(build): except Exception as e: sys.stderr.write("Error in command: %s\n" % command) raise e - + process.wait() if process.returncode != 0: @@ -79,7 +79,7 @@ class CustomClean(clean): except Exception as e: sys.stderr.write("Error in command: %s\n" % command) raise e - + process.wait() if process.returncode != 0: @@ -91,36 +91,36 @@ setup(name = "unitex", version = "1.0", description = "Python 3 binding for the Unitex library", long_description = open('README.md').read(), - + author = "Patrick Watrin", author_email = "patrick.watrin@gmail.com", - + # https://pypi.python.org/pypi?%3Aaction=list_classifiers classifiers = ["License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Programming Language :: Python", "Development Status :: 4 - Beta", "Intended Audience :: Developers", "Topic :: Scientific/Engineering :: Information Analysis"], - + keywords = "Unitex, Finite-States Transducers, Natural Language Processing", - + license = "GPLv3", install_requires = [], - + package_dir = {"unitex": "unitex", - "unitex.utils": "unitex/utils"}, + "unitex.utils": "unitex/utils"}, packages = ["unitex", - "unitex.utils"], - + "unitex.utils"], + data_files = [], - + ext_modules=[Extension("_unitex", include_dirs = [UNITEX_INC, get_python_inc()], libraries=["unitex"], library_dirs=['/usr/local/lib'], sources = ["extensions/_unitex.cpp"])], - + cmdclass = { "build": CustomBuild, "clean": CustomClean diff --git a/tests/04_test_processor.py b/tests/04_test_processor.py index 526841bc5e9d13615ef7bfe4537dee0f37298f32..361c4e8210e4e8f2c029da805b8f11eee95f4e7e 100644 --- a/tests/04_test_processor.py +++ b/tests/04_test_processor.py @@ -1,9 +1,13 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -import os, shutil, unittest +import os +import shutil +import unittest +import yaml from unitex import UnitexConstants +from unitex.config import UnitexConfig from unitex.tools import compress, grf2fst2 from unitex.processor import UnitexProcessor @@ -86,7 +90,14 @@ class TestUnitexIO(unittest.TestCase): os.remove(self._arguments["xml"]) def test_01_processor_txt(self): - processor = UnitexProcessor(self._arguments["config"]) + options = None + with open(self._arguments["config"], "r") as f: + options = yaml.load(f) + + config = UnitexConfig() + config.load(options) + + processor = UnitexProcessor(config) processor.open(self._arguments["txt"], mode="srtlf", tagged=False) kwargs = {} @@ -98,7 +109,14 @@ class TestUnitexIO(unittest.TestCase): self.assertTrue(ret, "Tagging process failed (txt format)!") def test_02_processor_xml(self): - processor = UnitexProcessor(self._arguments["config"]) + options = None + with open(self._arguments["config"], "r") as f: + options = yaml.load(f) + + config = UnitexConfig() + config.load(options) + + processor = UnitexProcessor(config) processor.open(self._arguments["txt"], mode="srtlf", tagged=False) kwargs = {} diff --git a/unitex/io.py b/unitex/io.py index d3872ddf824eac57db237d8019c4e16720d16450..01565062b8380703330bb41b65d85e2fb1f549fe 100644 --- a/unitex/io.py +++ b/unitex/io.py @@ -154,7 +154,7 @@ class UnitexFile(object): manipulate files on the disk and the virtual filesystems. It's mainly useful to read files from virtual filesystem whithout having to copy them to the disk. - + **WARNING: the encoding must be UTF-8 and the data Unicode strings.** """ @@ -162,7 +162,7 @@ class UnitexFile(object): def __init__(self): self.__use_bom = None - self.__file = None + self.__path = None self.__mode = None def open(self, file, mode=None, use_bom=False): @@ -189,11 +189,11 @@ class UnitexFile(object): *No return.* """ - if self.__file is not None: - raise UnitexException("You must close the current file (%s) before open another one..." % self.__file) + if self.__path is not None: + raise UnitexException("You must close the current file (%s) before open another one..." % self.__path) self.__use_bom = use_bom - self.__file = file + self.__path = file if mode is None: mode = "r" @@ -204,9 +204,9 @@ class UnitexFile(object): This function close the opened file and reset all the internal parameters. """ - if self.__file is None: + if self.__path is None: raise UnitexException("There is no file to close...") - self.__file = None + self.__path = None self.__mode = None def write(self, data): @@ -220,16 +220,16 @@ class UnitexFile(object): *No return.* """ - if self.__file is None: + if self.__path is None: raise UnitexException("You must open a file before writing...") if self.__mode not in ("w", "a"): - raise UnitexException("File '%s' is opened in read mode..." % self.__file) + raise UnitexException("File '%s' is opened in read mode..." % self.__path) if self.__mode == "w": bom = 1 if self.__use_bom is True else 0 - _unitex.unitex_write_file(self.__file, data, bom) + _unitex.unitex_write_file(self.__path, data, bom) else: - _unitex.unitex_append_to_file(self.__file, data) + _unitex.unitex_append_to_file(self.__path, data) def read(self): """ @@ -242,8 +242,8 @@ class UnitexFile(object): The data read are returned as a unicode string. """ - if self.__file is None: + if self.__path is None: raise UnitexException("You must open a file before reading...") if self.__mode != "r": - raise UnitexException("File '%s' is opened in write/append mode..." % self.__file) - return _unitex.unitex_read_file(self.__file) + raise UnitexException("File '%s' is opened in write/append mode..." % self.__path) + return _unitex.unitex_read_file(self.__path) diff --git a/unitex/processor.py b/unitex/processor.py index 47527a1e8fa54d8079a45187b217f4e68fcc99ea..f0a813755e90bb2e7c8d68c3ab911421bd0afaf4 100644 --- a/unitex/processor.py +++ b/unitex/processor.py @@ -6,7 +6,6 @@ from __future__ import absolute_import import logging import os import re -import yaml # Compatibility Python 2/3 from io import open @@ -14,10 +13,10 @@ from io import open from xml.sax.saxutils import escape from unitex import * -from unitex.config import UnitexConfig from unitex.io import * from unitex.resources import * from unitex.tools import * +from unitex.utils.formats import TextFST _LOGGER = logging.getLogger(__name__) @@ -40,7 +39,7 @@ class UnitexProcessor(object): """ def __init__(self, config): - self.__options = None + self.__config = config self.__persisted_objects = None @@ -48,73 +47,58 @@ class UnitexProcessor(object): self.__snt = None self.__dir = None - self.init(config) - - def init(self, config): - options = None - with open(config, "r") as f: - options = yaml.load(f) - - self.__options = UnitexConfig() - self.__options.load(options) - - verbose = self.__options["verbose"] - debug = self.__options["debug"] - log = self.__options["log"] + verbose = self.__config["verbose"] + debug = self.__config["debug"] + log = self.__config["log"] init_log_system(verbose, debug, log) self._load() - def get_option(self, name): - if not name in self.__options: - raise UnitexException("Unkown option '%s'" % name) - return self.__options[name] - def _load(self): - if self.__options["persistence"] is False: + if self.__config["persistence"] is False: return self.__persisted_objects = [] - if self.__options["resources"]["alphabet"] is not None: + if self.__config["resources"]["alphabet"] is not None: _type = UnitexConstants.RESOURCE_ALPHABET - _object = load_persistent_alphabet(self.__options["resources"]["alphabet"]) + _object = load_persistent_alphabet(self.__config["resources"]["alphabet"]) self.__persisted_objects.append((_type, _object)) - self.__options["resources"]["alphabet"] = _object + self.__config["resources"]["alphabet"] = _object - if self.__options["resources"]["alphabet-sorted"] is not None: + if self.__config["resources"]["alphabet-sorted"] is not None: _type = UnitexConstants.RESOURCE_ALPHABET - _object = load_persistent_alphabet(self.__options["resources"]["alphabet-sorted"]) + _object = load_persistent_alphabet(self.__config["resources"]["alphabet-sorted"]) self.__persisted_objects.append((_type, _object)) - self.__options["resources"]["alphabet-sorted"] = _object + self.__config["resources"]["alphabet-sorted"] = _object - if self.__options["resources"]["sentence"] is not None: + if self.__config["resources"]["sentence"] is not None: _type = UnitexConstants.RESOURCE_GRAMMAR - _object = load_persistent_fst2(self.__options["resources"]["sentence"]) + _object = load_persistent_fst2(self.__config["resources"]["sentence"]) self.__persisted_objects.append((_type, _object)) - self.__options["resources"]["sentence"] = _object + self.__config["resources"]["sentence"] = _object - if self.__options["resources"]["replace"] is not None: + if self.__config["resources"]["replace"] is not None: _type = UnitexConstants.RESOURCE_GRAMMAR - _object = load_persistent_fst2(self.__options["resources"]["replace"]) + _object = load_persistent_fst2(self.__config["resources"]["replace"]) self.__persisted_objects.append((_type, _object)) - self.__options["resources"]["replace"] = _object + self.__config["resources"]["replace"] = _object - if self.__options["resources"]["dictionaries"] is not None: + if self.__config["resources"]["dictionaries"] is not None: _objects = [] _type = UnitexConstants.RESOURCE_DICTIONARY - for dictionary in self.__options["resources"]["dictionaries"]: + for dictionary in self.__config["resources"]["dictionaries"]: _object = load_persistent_dictionary(dictionary) self.__persisted_objects.append((_type, _object)) _objects.append(_object) - self.__options["resources"]["dictionaries"] = _objects + self.__config["resources"]["dictionaries"] = _objects def _free(self): if self.__persisted_objects is None: @@ -133,35 +117,36 @@ class UnitexProcessor(object): _LOGGER.error("Unable to clean processor. No file opened!") return - if self.__options["virtualization"] is True: + if self.__config["virtualization"] is True: if self.__dir is not None: for vf in ls("%s%s" % (UnitexConstants.VFS_PREFIX, self.__dir)): rm(vf) rm(self.__snt) rm(self.__txt) else: - rmdir(self.__dir) rm(self.__snt) + rmdir(self.__dir) + def _normalize(self): - kwargs = self.__options["tools"]["normalize"] + kwargs = self.__config["tools"]["normalize"] ret = normalize(self.__txt, **kwargs) if ret is False: raise UnitexException("Text normalization failed!") def _segment(self): - grammar = self.__options["resources"]["sentence"] + grammar = self.__config["resources"]["sentence"] if grammar is None: raise UnitexException("Unable to segment text. No sentence grammar provided.") - alphabet = self.__options["resources"]["alphabet"] + alphabet = self.__config["resources"]["alphabet"] if alphabet is None: raise UnitexException("Unable to segment text. No alphabet file provided.") kwargs = {} - kwargs["start_on_space"] = self.__options["tools"]["fst2txt"]["start_on_space"] - kwargs["char_by_char"] = self.__options["tools"]["fst2txt"]["char_by_char"] + kwargs["start_on_space"] = self.__config["tools"]["fst2txt"]["start_on_space"] + kwargs["char_by_char"] = self.__config["tools"]["fst2txt"]["char_by_char"] kwargs["merge"] = True ret = fst2txt(grammar, self.__snt, alphabet, **kwargs) @@ -169,17 +154,17 @@ class UnitexProcessor(object): raise UnitexException("Text segmentation failed!") def _replace(self): - grammar = self.__options["resources"]["replace"] + grammar = self.__config["resources"]["replace"] if grammar is None: raise UnitexException("Unable to normalize text. No replace grammar provided.") - alphabet = self.__options["resources"]["alphabet"] + alphabet = self.__config["resources"]["alphabet"] if alphabet is None: raise UnitexException("Unable to normalize text. No alphabet file provided.") kwargs = {} - kwargs["start_on_space"] = self.__options["tools"]["fst2txt"]["start_on_space"] - kwargs["char_by_char"] = self.__options["tools"]["fst2txt"]["char_by_char"] + kwargs["start_on_space"] = self.__config["tools"]["fst2txt"]["start_on_space"] + kwargs["char_by_char"] = self.__config["tools"]["fst2txt"]["char_by_char"] kwargs["merge"] = False ret = fst2txt(grammar, self.__snt, alphabet, **kwargs) @@ -187,47 +172,47 @@ class UnitexProcessor(object): raise UnitexException("Text normalization failed!") def _tokenize(self): - alphabet = self.__options["resources"]["alphabet"] + alphabet = self.__config["resources"]["alphabet"] if alphabet is None: raise UnitexException("Unable to tokenize text. No alphabet file provided.") - kwargs = self.__options["tools"]["tokenize"] + kwargs = self.__config["tools"]["tokenize"] ret = tokenize(self.__snt, alphabet, **kwargs) if ret is False: raise UnitexException("Text tokenization failed!") def _lexicalize(self): - dictionaries = self.__options["resources"]["dictionaries"] + dictionaries = self.__config["resources"]["dictionaries"] if not dictionaries: raise UnitexException("Unable to lexicalize text. No dictionaries provided.") - alphabet = self.__options["resources"]["alphabet"] + alphabet = self.__config["resources"]["alphabet"] if alphabet is None: raise UnitexException("Unable to tokenize text. No alphabet file provided.") - kwargs = self.__options["tools"]["dico"] + kwargs = self.__config["tools"]["dico"] ret = dico(dictionaries, self.__snt, alphabet, **kwargs) if ret is False: raise UnitexException("Text lexicalization failed!") def _locate(self, grammar, match_mode, output_mode): - alphabet = self.__options["resources"]["alphabet"] + alphabet = self.__config["resources"]["alphabet"] if alphabet is None: raise UnitexException("Unable to locate pattern. No alphabet file provided.") kwargs = {} - kwargs["morpho"] = self.__options["tools"]["locate"]["morpho"] - kwargs["start_on_space"] = self.__options["tools"]["locate"]["start_on_space"] - kwargs["char_by_char"] = self.__options["tools"]["locate"]["char_by_char"] - kwargs["korean"] = self.__options["tools"]["locate"]["korean"] - kwargs["arabic_rules"] = self.__options["tools"]["locate"]["arabic_rules"] - kwargs["negation_operator"] = self.__options["tools"]["locate"]["negation_operator"] - kwargs["stop_token_count"] = self.__options["tools"]["locate"]["stop_token_count"] - kwargs["protect_dic_chars"] = self.__options["tools"]["locate"]["protect_dic_chars"] - kwargs["variable"] = self.__options["tools"]["locate"]["variable"] - kwargs["variable_error"] = self.__options["tools"]["locate"]["variable_error"] + kwargs["morpho"] = self.__config["tools"]["locate"]["morpho"] + kwargs["start_on_space"] = self.__config["tools"]["locate"]["start_on_space"] + kwargs["char_by_char"] = self.__config["tools"]["locate"]["char_by_char"] + kwargs["korean"] = self.__config["tools"]["locate"]["korean"] + kwargs["arabic_rules"] = self.__config["tools"]["locate"]["arabic_rules"] + kwargs["negation_operator"] = self.__config["tools"]["locate"]["negation_operator"] + kwargs["stop_token_count"] = self.__config["tools"]["locate"]["stop_token_count"] + kwargs["protect_dic_chars"] = self.__config["tools"]["locate"]["protect_dic_chars"] + kwargs["variable"] = self.__config["tools"]["locate"]["variable"] + kwargs["variable_error"] = self.__config["tools"]["locate"]["variable_error"] kwargs["sntdir"] = None kwargs["number_of_matches"] = None @@ -249,7 +234,7 @@ class UnitexProcessor(object): raise UnitexException("Locate failed!") index = os.path.join(self.__dir, "concord.ind") - if self.__options["virtualization"] is True: + if self.__config["virtualization"] is True: index = "%s%s" % (UnitexConstants.VFS_PREFIX, index) if exists(index) is False: @@ -257,7 +242,7 @@ class UnitexProcessor(object): return index def _concord(self, index, merge=False, output=None): - alphabet = self.__options["resources"]["alphabet"] + alphabet = self.__config["resources"]["alphabet"] if alphabet is None: raise UnitexException("Unable to build concordance. No alphabet file provided.") @@ -272,7 +257,7 @@ class UnitexProcessor(object): kwargs["offsets"] = None kwargs["unxmlize"] = None kwargs["directory"] = None - kwargs["thai"] = self.__options["tools"]["concord"]["thai"] + kwargs["thai"] = self.__config["tools"]["concord"]["thai"] result = None @@ -291,7 +276,7 @@ class UnitexProcessor(object): kwargs["only_matches"] = False result = os.path.join(self.__dir, "concord.txt") - if self.__options["virtualization"] is True: + if self.__config["virtualization"] is True: result = "%s%s" % (UnitexConstants.VFS_PREFIX, result) ret = concord(index, alphabet, **kwargs) @@ -335,16 +320,15 @@ class UnitexProcessor(object): self.__snt = os.path.join(directory, "%s.snt" % name) self.__dir = os.path.join(directory, "%s_snt" % name) - if self.__options["virtualization"] is True: + if self.__config["virtualization"] is True: txt = "%s%s" % (UnitexConstants.VFS_PREFIX, self.__txt) cp(self.__txt, txt) self.__txt = txt self.__snt = "%s%s" % (UnitexConstants.VFS_PREFIX, self.__snt) - else: - if os.path.exists(self.__dir) is False: - mkdir(self.__dir) + if os.path.exists(self.__dir) is False: + mkdir(self.__dir) self._normalize() @@ -396,9 +380,35 @@ class UnitexProcessor(object): *Return [TextFST]:* - The function returns a TextFST object. + The function returns a TextFST object. The object uses the + text.tfst and text.tind files which are cleaned (i.e. erased) + when the processor is closed. """ - pass + kwargs = self.__config["tools"]["normalize"] + + alphabet = self.__config["resources"]["alphabet"] + if alphabet is None: + raise UnitexException("Unable to segment text. No alphabet file provided.") + + ret = txt2tfst(self.__snt, alphabet, **kwargs) + if ret is False: + raise UnitexException("Text normalization failed!") + + # To avoid the copy process, the UnitexFile must be modified! + tfst = os.path.join(self.__dir, "text.tfst") + if self.__config["virtualization"] is True: + _tfst = "%s%s" % (UnitexConstants.VFS_PREFIX, tfst) + mv(_tfst, tfst) + + tind = os.path.join(self.__dir, "text.tind") + if self.__config["virtualization"] is True: + _tind = "%s%s" % (UnitexConstants.VFS_PREFIX, tind) + mv(_tind, tind) + + fst = TextFST() + fst.load(tfst, tind, "utf-8") + + return fst def iter(self, grammar, **kwargs): """ @@ -490,7 +500,7 @@ class UnitexProcessor(object): return True _output = os.path.join(self.__dir, "concord-merge-temp.txt") - if self.__options["virtualization"] is True: + if self.__config["virtualization"] is True: _output = "%s%s" % (UnitexConstants.VFS_PREFIX, _output) self._concord(index, merge=True, output=_output) diff --git a/unitex/utils/formats.py b/unitex/utils/formats.py index 9e0a6bc3593d76bf2b6fe1b5322a122e4d6c2a3c..b9f4f97c13729c7c89ee664c6f36058bb14d8901 100644 --- a/unitex/utils/formats.py +++ b/unitex/utils/formats.py @@ -393,7 +393,7 @@ class SentenceFST(Automaton): self.__sentence = None self.__tokens = None - self.__labels = None + self.__entries = None def get_sentence(self): return self.__sentence @@ -402,16 +402,26 @@ class SentenceFST(Automaton): return self.__tokens def get_token(self, i): - return self.__tokens[i] + try: + return self.__tokens[i] + except IndexError: + raise UnitexException("SentenceFST token index out of range (size: %s)." %\ + len(self.__tokens)) - def get_label(self, i): - return self.__labels[i] + def get_entries(self): + return self.__entries + + def get_label(self, key): + try: + return self.__entries[key] + except KeyError: + raise UnitexException("SentenceFST label key error.") - def load(self, sentence, tokens, states, labels): + def load(self, sentence, tokens, states, entries): self.__sentence = sentence self.__tokens = [] - self.__labels = {} + self.__entries = {} start = 0 for index, length in tokens: @@ -436,14 +446,14 @@ class SentenceFST(Automaton): break for lid, tid in states[i]: - entry = labels[lid][0] + entry = entries[lid][0] - p1 = labels[lid][1][0][0] - p2 = labels[lid][1][1][0] + p1 = entries[lid][1][0][0] + p2 = entries[lid][1][1][0] - if p1 not in self.__labels: - self.__labels[p1] = [] - self.__labels[p1].append((entry, p2)) + if p1 not in self.__entries: + self.__entries[p1] = [] + self.__entries[p1].append((entry, p2)) transitions.append((sid, lid, tid)) @@ -456,7 +466,7 @@ class TextFST: def __init__(self): self.__tfst = None - self.__tind = None + self.__tind = [] def __del__(self): self.__tfst.close() @@ -465,9 +475,10 @@ class TextFST: return len(self.__tind) def __getitem__(self, i): - if i >= len(self): + try: + position = self.__tind[i] + except IndexError: raise UnitexException("TextFST index out of range (size: %s)." % len(self)) - position = self.__tind[i] self.__tfst.seek(position) diff --git a/unitex/utils/fsa.py b/unitex/utils/fsa.py index de6250e4150ab03b4b21f9833d151c43b663392d..1b14b6e5d587054a6a12b3540696f175384c460b 100644 --- a/unitex/utils/fsa.py +++ b/unitex/utils/fsa.py @@ -519,7 +519,7 @@ class Automaton: if not self[tid].is_visited(): L += self.__expand(self[tid]) - def todot(self, file, encoding=None): + def save(self, file, encoding=None): if encoding is None: encoding = UnitexConstants.DEFAULT_ENCODING