diff --git a/documentation/_unitex.rst b/documentation/_unitex.rst new file mode 100644 index 0000000000000000000000000000000000000000..9a40567010442f736adbaabcf7ba843f17a9ff3d --- /dev/null +++ b/documentation/_unitex.rst @@ -0,0 +1,38 @@ +The `_unitex` C++ extension +=========================== + + +Summary +------- +.. currentmodule:: _unitex +.. autosummary:: + _unitex.unitex_tool + _unitex.unitex_load_persistent_dictionary + _unitex.unitex_load_persistent_fst2 + _unitex.unitex_load_persistent_alphabet + _unitex.unitex_free_persistent_dictionary + _unitex.unitex_free_persistent_fst2 + _unitex.unitex_free_persistent_alphabet + _unitex.unitex_is_persistent_dictionary + _unitex.unitex_is_persistent_fst2 + _unitex.unitex_is_persistent_alphabet + _unitex.unitex_enable_stdout + _unitex.unitex_disable_stdout + _unitex.unitex_enable_stderr + _unitex.unitex_disable_stderr + _unitex.unitex_cp + _unitex.unitex_rm + _unitex.unitex_mv + _unitex.unitex_mkdir + _unitex.unitex_rmdir + _unitex.unitex_ls + _unitex.unitex_read_file + _unitex.unitex_write_file + _unitex.unitex_append_to_file + + +Contents +-------- +.. automodule:: _unitex + :members: + diff --git a/documentation/conf.py b/documentation/conf.py index 9a78d1f279e3e2da0604b69feed6f12d071c6bef..af213a507c0a948554d2bfc6ad984a562512f7aa 100644 --- a/documentation/conf.py +++ b/documentation/conf.py @@ -114,7 +114,13 @@ todo_include_todos = False # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'alabaster' +#html_theme = 'alabaster' +html_theme = 'nature' +#html_theme = "classic" +#html_theme_options = { +# "rightsidebar": "true", +# "relbarbgcolor": "black" +#} # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the diff --git a/documentation/index.rst b/documentation/index.rst index 2aaa9132281e76dd5284d2c98e6c6bbf834f53a3..b9ea08dd39c5877afea292506fbcbef78f5e66b7 100644 --- a/documentation/index.rst +++ b/documentation/index.rst @@ -11,6 +11,7 @@ Contents: .. toctree:: :maxdepth: 1 + _unitex unitex Indices and tables diff --git a/documentation/modules.rst b/documentation/modules.rst deleted file mode 100644 index 404b584145c6da47cfa32a8717c4c54bb75922bb..0000000000000000000000000000000000000000 --- a/documentation/modules.rst +++ /dev/null @@ -1,7 +0,0 @@ -unitex -====== - -.. toctree:: - :maxdepth: 4 - - unitex diff --git a/documentation/unitex-config.rst b/documentation/unitex-config.rst new file mode 100644 index 0000000000000000000000000000000000000000..eccfcb949078bb69fa766ac1acb1e311456ecb85 --- /dev/null +++ b/documentation/unitex-config.rst @@ -0,0 +1,29 @@ +The `unitex.config` package +=========================== + + +Summary +------- +.. currentmodule:: unitex.config +.. autosummary:: + unitex.config.Options + unitex.config.CheckDicOptions + unitex.config.CompressOptions + unitex.config.ConcordOptions + unitex.config.DicoOptions + unitex.config.ExtractOptions + unitex.config.Fst2TxtOptions + unitex.config.Grf2Fst2Options + unitex.config.LocateOptions + unitex.config.NormalizeOptions + unitex.config.SortTxtOptions + unitex.config.TokenizeOptions + unitex.config.Txt2TFstOptions + unitex.config.ResourcesOptions + unitex.config.UnitexConfig + + +Contents +-------- +.. automodule:: unitex.config + :members: diff --git a/documentation/unitex-io.rst b/documentation/unitex-io.rst new file mode 100644 index 0000000000000000000000000000000000000000..0d055d6d9543fa4e5f335306095ee37f7f7cfd1e --- /dev/null +++ b/documentation/unitex-io.rst @@ -0,0 +1,22 @@ +The `unitex.io` package +======================= + + +Summary +------- +.. currentmodule:: unitex.io +.. autosummary:: + unitex.io.UnitexFile + unitex.io.cp + unitex.io.rm + unitex.io.mv + unitex.io.mkdir + unitex.io.rmdir + unitex.io.ls + unitex.io.exists + + +Contents +-------- +.. automodule:: unitex.io + :members: diff --git a/documentation/unitex-processor.rst b/documentation/unitex-processor.rst new file mode 100644 index 0000000000000000000000000000000000000000..da266edc4eebd9013bc49fe44f0486d723225e88 --- /dev/null +++ b/documentation/unitex-processor.rst @@ -0,0 +1,15 @@ +The `unitex.processor` package +============================== + + +Summary +------- +.. currentmodule:: unitex.processor +.. autosummary:: + unitex.processor.UnitexProcessor + + +Contents +-------- +.. automodule:: unitex.processor + :members: diff --git a/documentation/unitex-resources.rst b/documentation/unitex-resources.rst new file mode 100644 index 0000000000000000000000000000000000000000..5ee19f66c336d3040ca7631bb959edee17d58af3 --- /dev/null +++ b/documentation/unitex-resources.rst @@ -0,0 +1,23 @@ +The `unitex.resources` package +============================== + + +Summary +------- +.. currentmodule:: unitex.resources +.. autosummary:: + unitex.resources.load_persistent_dictionary + unitex.resources.is_persistent_dictionary + unitex.resources.free_persistent_dictionary + unitex.resources.load_persistent_fst2 + unitex.resources.is_persistent_fst2 + unitex.resources.free_persistent_fst2 + unitex.resources.load_persistent_alphabet + unitex.resources.is_persistent_alphabet + unitex.resources.free_persistent_alphabet + + +Contents +-------- +.. automodule:: unitex.resources + :members: diff --git a/documentation/unitex-tools.rst b/documentation/unitex-tools.rst new file mode 100644 index 0000000000000000000000000000000000000000..1e2e6f667e8e3b1a57693fc3cb15a1ea994b503e --- /dev/null +++ b/documentation/unitex-tools.rst @@ -0,0 +1,26 @@ +The `unitex.tools` package +========================== + + +Summary +------- +.. currentmodule:: unitex.tools +.. autosummary:: + unitex.tools.check_dic + unitex.tools.compress + unitex.tools.concord + unitex.tools.dico + unitex.tools.extract + unitex.tools.fst2txt + unitex.tools.grf2fst2 + unitex.tools.locate + unitex.tools.normalize + unitex.tools.sort_txt + unitex.tools.tokenize + unitex.tools.txt2tfst + + +Contents +-------- +.. automodule:: unitex.tools + :members: diff --git a/documentation/unitex.rst b/documentation/unitex.rst index 4bb710f4bd54b109026e7f060f1379486ed5352b..38fb3719a53ebfe87501a5140e345fca52901fa7 100644 --- a/documentation/unitex.rst +++ b/documentation/unitex.rst @@ -1,9 +1,32 @@ -unitex module -============= +The `unitex` package +==================== -Module contents ---------------- +Summary +------- +.. currentmodule:: unitex +.. autosummary:: + unitex.UnitexConstants + unitex.disable_stderr + unitex.disable_stdout + unitex.enable_stderr + unitex.enable_stdout + unitex.init_log_system + +Submodules +---------- +.. toctree:: + :maxdepth: 1 + + unitex.tools <unitex-tools> + unitex.resources <unitex-resources> + unitex.io <unitex-io> + + unitex.processor <unitex-processor> + unitex.config <unitex-config> + + +Contents +-------- .. automodule:: unitex :members: - :undoc-members: diff --git a/setup.py b/setup.py index d93d9ad7b4426934bbdfd2d06f8f9a01d5c0ca76..e613c514b1db449603a1fed536e346c8b86858d9 100644 --- a/setup.py +++ b/setup.py @@ -118,8 +118,8 @@ setup(name = "unitex", library_dirs=['/usr/local/lib'], sources = ["extensions/_unitex.cpp"])], - cmdclass = { - "build": CustomBuild, - "clean": CustomClean - } +# cmdclass = { +# "build": CustomBuild, +# "clean": CustomClean +# } ) diff --git a/unitex/__init__.py b/unitex/__init__.py index 07a65b9bcd834356e15c571cbd874f5c99e9e414..e11948c6fab7db153091b6b11bebe9c4fb5f0d60 100644 --- a/unitex/__init__.py +++ b/unitex/__init__.py @@ -18,6 +18,9 @@ class UnitexException(Exception): class UnitexConstants(object): + """ + This class lists all the constants used by the Unitex processor. + """ DEFAULT_ENCODING="utf-8" diff --git a/unitex/tools.py b/unitex/tools.py index 44ce8b400e2d2a1e334c661d05a549ac279ab1ca..d753a7c738c1323179dedc5a83b4651af83e3ab6 100644 --- a/unitex/tools.py +++ b/unitex/tools.py @@ -119,8 +119,8 @@ def compress(dictionary, **kwargs): dictionary (default: False). - **version [str]** -- Possible values are: + - UnitexConstants.DICTIONARY_VERSION_1: produces an old style .bin - UnitexConstants.DICTIONARY_VERfile; - UnitexConstants.DICTIONARY_VERSION_2: produces a new style .bin file, with no file size limitation to 16 Mb and a smaller size (default). @@ -796,21 +796,25 @@ def locate(grammar, text, alphabet, **kwargs): compatibility with previous versions of Unitex. - *Search limit options:* + - **number_of_matches [int]** -- stops after the first N matches (default: all matches). - *Maximum iterations per token options:* + - **stop_token_count [list(int_1, int_2)]** -- emits a warning after 'int_1' iterations on a token and stops after 'int_2' iterations. - *Matching mode options:* + - **match_mode [str]** -- Possible values are: - UnitexConstants.MATCH_MODE_SHORTEST - UnitexConstants.MATCH_MODE_LONGEST (default) - UnitexConstants.MATCH_MODE_ALL - Output options: + - **output_mode [str]** -- Possible values are: - UnitexConstants.OUTPUT_MODE_IGNORE (default) - UnitexConstants.OUTPUT_MODE_MERGE @@ -826,6 +830,7 @@ def locate(grammar, text, alphabet, **kwargs): named str_1 with content str_2. Note that str_2 must be ASCII. - *Ambiguous output options:* + - **ambiguous_outputs [bool]** -- allows the production of several matches with same input but different outputs. If False, in case of ambiguous outputs, one will be arbitrarily chosen and kept,