diff --git a/unitex/tools.py b/unitex/tools.py
index 82c5df758e5635a75349009daf532a63d2a07f08..e2104267f9e0e9ad28500ee098dfadbc4c5b8e82 100644
--- a/unitex/tools.py
+++ b/unitex/tools.py
@@ -1,5 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+# NOTE: The documentation adapted for each function is extracted from
+#       the Unitex manual.
 
 import logging
 
@@ -117,7 +119,7 @@ def compress(dictionary, **kwargs):
                                smaller size (default).
 
     Return [bool]:
-        The function return 'True' if it succeeds and 'False' otherwise.
+        True if it succeeds and False otherwise.
     """
     options = CompressOptions()
     options.load(kwargs)
@@ -154,101 +156,172 @@ def compress(dictionary, **kwargs):
 
 def concord(index, alphabet, **kwargs):
     """
-    This function takes a concordance index file produced by the function Locate and
-    produces a concordance. It is also possible to produce a modified text version taking
-    into account the transducer outputs associated to the occurrences. 
-
-    The result of the application of this function is a file called concord.txt if the concordance
-    was constructed in text mode, a file called concord.html if the output
-    mode was --html, --glossanet or --script, and a text file with the name de-
-    fined by the user of the function if the function has constructed a modified version
-    of the text.
-
-    In --html mode, the occurrence is coded as a hypertext link. The reference associated
-    to this link is of the form <a href="X Y Z">. X et Y represent the beginning
-    and ending positions of the occurrence in characters in the file text_name.snt. Z
-    represents the number of the sentence in which the occurrence was found.
+    This function takes a concordance index file produced by the
+    function 'locate' and produces a concordance. It is also possible to
+    produce a modified text version taking into account the transducer
+    outputs associated to the occurrences.
+
+    The result of the application of this function is a file called
+    concord.txt if the concordance was constructed in text mode, a file
+    called concord.html if 'output_mode' is UnitexConstants.FORMAT_HTML,
+    UnitexConstants.FORMAT_GLOSSANET' or UnitexConstants.FORMAT_SCRIPT,
+    and a text file with the name defined by the user of the function if
+    the function has constructed a modified version of the text.
+
+    In --html mode, the occurrence is coded as a hypertext link. The
+    reference associated to this link is of the form <a href="X Y Z">.
+    X et Y represent the beginning and ending positions of the
+    occurrence in characters in the file text_name.snt. Z represents the
+    number of the sentence in which the occurrence was found.
 
     Arguments:
-        index [str]     -- the index file path (produced by the 'locate' function)
-        alphabet [str]  -- alphabet file used for sorting
+        index [str] -- the index file path (produced by the 'locate'
+            function).
+
+        alphabet [str] -- alphabet file used for sorting.
 
     Keyword arguments:
 
       - Generic options:
-            font [str]            -- the name of the font to use if the output is an HTML
-                                     file.
-            fontsize [int]        -- the font size to use if the output is an HTML file.
-            only_ambiguous [bool] -- Only displays identical occurrences with ambiguous
-                                     outputs, in text order (default: False)
-            only_matches [bool]   -- this option will force empty right and left contexts. Moreover,
-                                     if used with -t/–text, Concord will not surround matches with
-                                     tabulations (default: False)
-            left [str]            -- number of characters on the left of the occurrences (default=0).
-                                     In Thai mode, this means the number of non-diacritic characters.
-            right [str]           -- number of characters (non-diacritic ones in Thai mode) on
-                                     the right of the occurrences (default=0). If the occurrence is
-                                     shorter than this value, the concordance line is completed up to
-                                     right. If the occurrence is longer than the length defined by
-                                     right, it is nevertheless saved as whole.
-
-            NOTE: For both --left and --right, you can add the s character to stop at
-            the first {S} tag. For instance, if you set 40s for the left value, the left context
-            will end at 40 characters at most, less if the {S} tag is found before.
+            font [str] -- the name of the font to use if the output is
+                an HTML file.
+
+            fontsize [int] -- the font size to use if the output is an
+                HTML file.
+
+            only_ambiguous [bool] -- Only displays identical occurrences
+                with ambiguous outputs, in text order (default: False).
+
+            only_matches [bool] -- this option will force empty right
+                and left contexts. Moreover, if used with
+                UnitexConstants.FORMAT_TEXT, the function will not
+                surround matches with tabulations (default: False).
+
+            left [str] -- number of characters on the left of the
+                occurrences (default=0). In Thai mode, this means the
+                number of non-diacritic characters.
+
+            right [str] -- number of characters (non-diacritic ones in
+                Thai mode) on the right of the occurrences (default=0).
+                If the occurrence is shorter than this value, the
+                concordance line is completed up to right. If the
+                occurrence is longer than the length defined by right,
+                it is nevertheless saved as whole.
+
+            NOTE: For both --left and --right, you can add the 's'
+            character to stop at the first {S} tag. For instance, if you
+            set '40s' for the left value, the left context will end at
+            40 characters at most, less if the {S} tag is found before.
 
       - Sort options:
-            sort [str] -- 'UnitexConstants.SORT_TEXT_ORDER': order in which the occurrences appear in the text (default)
-                          'UnitexConstants.SORT_LEFT_CENTER': left context for primary sort, then occurrence for secondary sort
-                          'UnitexConstants.SORT_LEFT_RIGHT': left context, then right context
-                          'UnitexConstants.SORT_CENTER_LEFT': occurrence, then left context
-                          'UnitexConstants.SORT_CENTER_RIGHT': occurrence, then right context
-                          'UnitexConstants.SORT_RIGHT_LEFT': right context, then left context
-                          'UnitexConstants.SORT_RIGHT_CENTER': left context, then occurrence
+            sort [str] -- specifies the sort order. Possible values:
+
+                - 'UnitexConstants.SORT_TEXT_ORDER': order in which the
+                    occurrences appear in the text (default);
+
+                - 'UnitexConstants.SORT_LEFT_CENTER': left context for
+                    primary sort, then occurrence for secondary sort;
+
+                - 'UnitexConstants.SORT_LEFT_RIGHT': left context, then
+                    right context;
+
+                - 'UnitexConstants.SORT_CENTER_LEFT': occurrence, then
+                    left context;
+
+                - 'UnitexConstants.SORT_CENTER_RIGHT': occurrence, then
+                    right context;
+
+                - 'UnitexConstants.SORT_RIGHT_LEFT': right context, then
+                    left context;
+
+                - 'UnitexConstants.SORT_RIGHT_CENTER': left context,
+                    then occurrence.
 
       - Output options:
-            format [str]   -- UnitexConstants.FORMAT_HTML: produces a concordance in HTML format encoded in UTF-8 (default)
-                              UnitexConstants.FORMAT_TEXT: produces a concordance in Unicode text format
-                              UnitexConstants.FORMAT_GLOSSANET: produces a concordance for GlossaNet in HTML format where
-                                                                occurrences are links described by the 'script' argument
-                                                                (cf. Unitex manual p. 268). The HTML file is encoded in UTF-8
-                              UnitexConstants.FORMAT_SCRIPT: produces a HTML concordance file where occurrences are links
-                                                             described by the 'script' argument
-                              UnitexConstants.FORMAT_INDEX: produces an index of the concordance, made of the content of the
-                                                            occurrences (with the grammar outputs, if any), preceded by the
-                                                            positions of the occurrences in the text file given in characters
-                              UnitexConstants.FORMAT_UIMA: produces an index of the concordance relative to the original text
-                                                           file, before any Unitex operation. The 'offsets' argument must be
-                                                           provided
-                              UnitexConstants.FORMAT_PRLG: produces a concordance for PRLG corpora where each line is prefixed
-                                                           by information extracted with Unxmlize’s 'prlg' option. You must
-                                                           provide both the 'offsets' and the 'unxmlize' argument
-                              UnitexConstants.FORMAT_XML: produces an xml index of the concordance
-                              UnitexConstants.FORMAT_XML_WITH_HEADER: produces an xml index of the concordance with full xml header
-                              UnitexConstants.FORMAT_AXIS: quite the same as 'index', but the numbers represent the median
-                                                           character of each occurrence
-                              UnitexConstants.FORMAT_XALIGN: another index file, used by the text alignment module. Each line is
-                                                             made of 3 integers X Y Z followed by the content of the occurrence.
-                                                             X is the sentence number, starting from 1. Y and Z are the starting
-                                                             and ending positions of the occurrence in the sentence, given in
-                                                             characters
-                              UnitexConstants.FORMAT_MERGE: indicates to the function that it is supposed to produce a modified
-                                                            version of the text and save it in a file. The filename must be
-                                                            provided with the 'output' argument
-            script [str]   -- string describing the links format for 'glossanet' and 'script' output. For instance,
-                              if you use 'http://www.google.com/search?q=', you will obtain a HTML concordance
-                              file where occurrences are hyperlinks to Google queries
-            offsets [str]  -- the file produced by Tokenize’s output_offsets option (needed by the 'uima' and the
-                              'prlg' format)
-            unxmlize [str] -- file produced by Unxmlize’s 'prlg' option (needed by the 'prlg' format)
-            output [str]   -- the output filename (needed by the 'merge' format)
+            format [str] -- specifies the output fomat. Possible values:
+
+                - UnitexConstants.FORMAT_HTML: produces a concordance in
+                    HTML format encoded in UTF-8 (default);
+
+                - UnitexConstants.FORMAT_TEXT: produces a concordance in
+                    Unicode text format;
+
+                - UnitexConstants.FORMAT_GLOSSANET: produces a
+                    concordance for GlossaNet in HTML format where
+                    occurrences are links described by the 'script'
+                    argument (cf. Unitex manual p. 268). The HTML file
+                    is encoded in UTF-8;
+
+                - UnitexConstants.FORMAT_SCRIPT: produces a HTML
+                    concordance file where occurrences are links
+                    described by the 'script' argument;
+
+                - UnitexConstants.FORMAT_INDEX: produces an index of the
+                    concordance, made of the content of the occurrences
+                    (with the grammar outputs, if any), preceded by the
+                    positions of the occurrences in the text file given
+                    in characters;
+
+                - UnitexConstants.FORMAT_UIMA: produces an index of the
+                    concordance relative to the original text file,
+                    before any Unitex operation. The 'offsets' argument
+                    must be provided;
+
+                - UnitexConstants.FORMAT_PRLG: produces a concordance
+                    for PRLG corpora where each line is prefixed by
+                    information extracted with Unxmlize’s 'prlg' option.
+                    You must provide both the 'offsets' and the
+                    'unxmlize' argument;
+
+                - UnitexConstants.FORMAT_XML: produces an xml index of
+                    the concordance;
+
+                - UnitexConstants.FORMAT_XML_WITH_HEADER: produces an
+                    xml index of the concordance with full xml header;
+
+                - UnitexConstants.FORMAT_AXIS: quite the same as
+                    'index', but the numbers represent the median
+                    character of each occurrence;
+
+                - UnitexConstants.FORMAT_XALIGN: another index file,
+                    used by the text alignment module. Each line is made
+                    of 3 integers X Y Z followed by the content of the
+                    occurrence. X is the sentence number, starting from
+                    1. Y and Z are the starting and ending positions of
+                    the occurrence in the sentence, given in characters;
+
+                - UnitexConstants.FORMAT_MERGE: indicates to the
+                    function that it is supposed to produce a modified
+                    version of the text and save it in a file.
+                    The filename must be provided with the 'output'
+                    argument.
+
+            script [str] -- string describing the links format for
+                'glossanet' and 'script' output. For instance, if you
+                use 'http://www.google.com/search?q=', you will obtain a
+                HTML concordance file where occurrences are hyperlinks
+                to Google queries.
+
+            offsets [str] -- the file produced by Tokenize’s
+                output_offsets option (needed by the 'uima' and the
+                'prlg' format).
+
+            unxmlize [str] -- file produced by Unxmlize’s 'prlg' option
+                (needed by the 'prlg' format).
+
+            output [str]   -- the output filename (needed by the 'merge'
+                format).
 
       - Other options:
-            directory [str] -- indicates to the function that it must not work in the same directory
-                               than <index> but in 'directory'
-            thai [bool]     -- option to use for Thai concordances (default: False)
+            directory [str] -- indicates to the function that it must
+                not work in the same directory than <index> but in
+                'directory'.
+
+            thai [bool] -- option to use for Thai concordances
+                (default: False).
 
     Return [bool]:
-        The function return 'True' if it succeeds and 'False' otherwise.
+        True if it succeeds and False otherwise.
     """
     options = ConcordOptions()
     options.load(kwargs)
@@ -335,42 +408,54 @@ def concord(index, alphabet, **kwargs):
 
 def dico(dictionaries, text, alphabet, **kwargs):
     """
-    This function applies dictionaries to a text. The text must have been cut up into
-    lexical units by the 'tokenize' function.
-
-    The function 'dico' produces the following files, and saves them in the directory of
-    the text:
-        - dlf: dictionary of simple words in the text
-        - dlc: dictionary of compound words in the text
-        - err: list of unknown words in the text
-        - tags_err: unrecognized simple words that are not matched by the tags.ind
-                    file
-        - tags.ind: sequences to be inserted in the text automaton (see section 3.8.3,
-                    page 69)
-        - stat_dic.n: file containing the number of simple words, the number of compound
-                      words, and the number of unknown words in the text
-
-    NOTE: Files dlf, dlc, err and tags_err are not sorted. Use the function sort_txt
-    to sort them
+    This function applies dictionaries to a text. The text must have
+    been cut up into lexical units by the 'tokenize' function.
+
+    The function 'dico' produces the following files, and saves them in
+    the directory of the text:
+
+        - dlf: dictionary of simple words in the text;
+        - dlc: dictionary of compound words in the text;
+        - err: list of unknown words in the text;
+        - tags_err: unrecognized simple words that are not matched by
+                    the tags.ind file;
+        - tags.ind: sequences to be inserted in the text automaton (see
+                    section 3.8.3, page 69);
+        - stat_dic.n: file containing the number of simple words, the
+                      number of compound words, and the number of
+                      unknown words in the text.
+
+    NOTE: Files dlf, dlc, err and tags_err are not sorted. Use the
+    function 'sort_txt' to sort them.
 
     Arguments:
-        dictionaries [list(str)] -- list of dictionary pathes ('bin' or 'fst2' formats)
-        text     [str]           -- text (snt format) file path
-        alphabet [str]           -- alphabet file path
+        dictionaries [list(str)] -- list of dictionary pathes ('bin' or
+            'fst2' formats).
+
+        text [str] -- text (snt format) file path.
+
+        alphabet [str] -- alphabet file path.
 
     Keyword arguments:
-        morpho [list(str)] -- this optional argument indicates which morphological mode
-                              dictionaries are to be used, if needed by some .fst2
-                              dictionaries. The argument is a list of dictionary path
-                              (bin format)
-        korean [bool]      -- specify the dictionary is in korean (default: False)
-        semitic [bool]     -- specify the dictionary is in a semitic language (default: False)
-        arabic_rules [str] -- specifies the Arabic typographic rule configuration file path
-        raw [str]          -- alternative output file path containing both simple and compound
-                              words, without requiring a text directory
+        morpho [list(str)] -- this optional argument indicates which
+            morphological mode dictionaries are to be used, if needed by
+            some .fst2 dictionaries. The argument is a list of
+            dictionary path (bin format).
+
+        korean [bool] -- specify the dictionary is in korean
+            (default: False).
+
+        semitic [bool] -- specify the dictionary is in a semitic
+            language (default: False).
+
+        arabic_rules [str] -- specifies the Arabic typographic rule
+            configuration file path.
+
+        raw [str] -- alternative output file path containing both simple
+            and compound words, without requiring a text directory.
 
     Return [bool]:
-        The function return 'True' if it succeeds and 'False' otherwise.
+        True if it succeeds and False otherwise.
     """
     options = DicoOptions()
     options.load(kwargs)
@@ -414,21 +499,25 @@ def dico(dictionaries, text, alphabet, **kwargs):
 
 def extract(text, output, index, **kwargs):
     """
-    This function extracts from the given text all sentences that contain at least one
-    occurrence from the concordance. The parameter <text> represents the complete
-    path of the text file, without omitting the extension .snt.
+    This function extracts from the given text all sentences that
+    contain at least one occurrence from the concordance. The parameter
+    <text> represents the complete path of the text file, without
+    omitting the extension .snt.
 
     Arguments:
-        text [str]   -- the text file (.snt format)
-        output [str] -- the output text file
-        index [str]  -- the index file path (produced by the 'locate' function)
+        text [str] -- the text file (.snt format).
+
+        output [str] -- the output text file.
+
+        index [str] -- the index file path (produced by the 'locate'
+            function).
 
     Keyword arguments:
-        non_matching_sentences [bool] -- extracts all sentences that don’t contain matching
-                                         units (default: False)
+        non_matching_sentences [bool] -- extracts all sentences that
+            don’t contain matching units (default: False).
 
     Return [bool]:
-        The function return 'True' if it succeeds and 'False' otherwise.
+        True if it succeeds and False otherwise.
     """
     options = ExtractOptions()
     options.load(kwargs)
@@ -463,29 +552,34 @@ def extract(text, output, index, **kwargs):
 
 def fst2txt(grammar, text, alphabet, **kwargs):
     """
-    This function applies a transducer to a text in longest match mode at the preprocessing
-    stage, when the text has not been cut into lexical units yet. This function modifies the input
-    text file.
+    This function applies a transducer to a text in longest match mode
+    at the preprocessing stage, when the text has not been cut into
+    lexical units yet. This function modifies the input text file.
 
-    This function modifies the input text file.
+    NOTE: This function modifies the input text file.
 
     Arguments:
-        grammar [str]  -- The fst2 to apply on the text
-        text [str]     -- the text file to be modified, with extension .snt
-        alphabet [str] -- the alphabet file of the language of the text
+        grammar [str] -- the fst2 to apply on the text.
+
+        text [str] -- the (.snt) text file to be modified.
+
+        alphabet [str] -- the alphabet file of the language of the text.
 
     Keyword arguments:
-        start_on_space [bool] -- this parameter indicates that the search will start at
-                                 any position in the text, even before a space. This parameter
-                                 should only be used to carry out morphological searches
-                                 (default: False)
-        char_by_char [bool]   -- works in character by character tokenization mode.
-                                 This is useful for languages like Thai (default: False)
-        merge [bool]          -- merge (instead of replace) transducer outputs with text inputs
-                                 (default: True)
+        start_on_space [bool] -- this parameter indicates that the
+            search will start at any position in the text, even before a
+            space. This parameter should only be used to carry out
+            morphological searches (default: False).
+
+        char_by_char [bool] -- works in character by character
+            tokenization mode. This is useful for languages like Thai
+            (default: False).
+
+        merge [bool] -- merge (instead of replace) transducer outputs
+            with text inputs (default: True).
 
     Return [bool]:
-        The function return 'True' if it succeeds and 'False' otherwise.
+        True if it succeeds and False otherwise.
     """
     options = Fst2TxtOptions()
     options.load(kwargs)
@@ -532,45 +626,58 @@ def fst2txt(grammar, text, alphabet, **kwargs):
 
 def grf2fst2(grammar, alphabet, **kwargs):
     """
-    This function compiles a grammar into a .fst2 file (for more details see section
-    6.2). The parameter <grf> denotes the complete path of the main graph of the
-    grammar, without omitting the extension .grf.
+    This function compiles a grammar into a .fst2 file (for more details
+    see section 6.2). The parameter <grf> denotes the complete path of
+    the main graph of the grammar, without omitting the extension .grf.
 
-    The result is a file with the same name as the graph passed to the function as a
-    parameter, but with extension .fst2. This file is saved in the same directory as
-    <grf>.
+    The result is a file with the same name as the graph passed to the
+    function as a parameter, but with extension .fst2. This file is
+    saved in the same directory as <grf>.
 
     Arguments:
-        grammar [str]  -- The grf to compile
-        alphabet [str] -- specifies the alphabet file to be used for tokenizing the content of
-                          the grammar boxes into lexical units
+        grammar [str] -- the grf to compile.
+
+        alphabet [str] -- specifies the alphabet file to be used for
+            tokenizing the content of the grammar boxes into lexical
+            units.
 
     Keyword arguments:
-        loop_check [bool]              -- enables error (loop) checking (default: False)
-        char_by_char [bool]            -- tokenization will be done character by character.
-                                          If neither -c nor -a option is used, lexical units
-                                          will be sequences of any Unicode letters (default: False)
-        pkgdir [str]                   -- specifies the repository directory to use (see section
-                                          5.2.2, page 99)
-        no_empty_graph_warning [bool]  -- no warning will be emitted when a graph matches the
-                                          empty word. This option is used by MultiFlex in order
-                                          not to scare users with meaningless error messages when
-                                          they design an inflection grammar that matches the
-                                          empty word (default: False)
-        tfst_check [bool]              -- checks wether the given graph can be considered as a
-                                          valid sentence automaton or not (default: False)
-        silent_grf_name [bool]         -- does not print the graph names (needed for consistent
-                                          log files across several systems; default: True)
-        named_repositories [list(str)] -- declaration of named repositories. This argument is made
-                                          of one or more X=Y sequences, separated by ‘;’, where X is
-                                          the name of the repository denoted by pathname Y. You can
-                                          use this option several times
-        debug [bool]                   -- compile graphs in debug mode (default: False)
-        check_variables [bool]         -- check output validity to avoid malformed variable
-                                          expressions (default: True)
+        loop_check [bool] -- enables error (loop) checking
+            (default: False).
+
+        char_by_char [bool] -- tokenization will be done character by
+            character. If neither -c nor -a option is used, lexical
+            units will be sequences of any Unicode letters
+            (default: False).
+
+        pkgdir [str] -- specifies the repository directory to use (see
+            section 5.2.2, page 99).
+
+        no_empty_graph_warning [bool] -- no warning will be emitted when
+            a graph matches the empty word. This option is used by
+            MultiFlex in order not to scare users with meaningless error
+            messages when they design an inflection grammar that matches
+            the empty word (default: False).
+
+        tfst_check [bool] -- checks wether the given graph can be
+            considered as a valid sentence automaton or not
+            (default: False).
+
+        silent_grf_name [bool] -- does not print the graph names
+            (default: True).
+
+        named_repositories [list(str)] -- declaration of named
+            repositories. This argument is made of one or more X=Y
+            sequences, separated by ‘;’, where X is the name of the
+            repository denoted by pathname Y.
+
+        debug [bool] -- compile graphs in debug mode (default: False).
+
+        check_variables [bool] -- check output validity to avoid
+            malformed variable expressions (default: True).
 
     Return [bool]:
-        The function return 'True' if it succeeds and 'False' otherwise.
+        True if it succeeds and False otherwise.
     """
     options = Grf2Fst2Options()
     options.load(kwargs)
@@ -621,75 +728,98 @@ def grf2fst2(grammar, alphabet, **kwargs):
 
 def locate(grammar, text, alphabet, **kwargs):
     """
-    This function applies a grammar to a text and constructs an index of the occurrences
-    found.
+    This function applies a grammar to a text and constructs an index of
+    the occurrences found.
 
-    This function saves the references to the found occurrences in a file called concord.ind.
-    The number of occurrences, the number of units belonging to those occurrences, as
-    well as the percentage of recognized units within the text are saved in a file called
+    This function saves the references to the found occurrences in a
+    file called concord.ind. The number of occurrences, the number of
+    units belonging to those occurrences, as well as the percentage of
+    recognized units within the text are saved in a file called
     concord.n. These two files are stored in the directory of the text.
 
     Arguments:
-        grammar [str]  -- The fst2 to apply on the text
-        text [str]     -- the text file, with extension .snt
-        alphabet [str] -- the alphabet file of the language of the text
+        grammar [str] -- the fst2 to apply on the text.
+
+        text [str] -- the text file, with extension .snt.
+
+        alphabet [str] -- the alphabet file of the language of the text.
 
     Keyword arguments:
       - Generic options:
-            start_on_space [bool]   -- this parameter indicates that the search will start at
-                                       any position in the text, even before a space. This parameter
-                                       should only be used to carry out morphological searches
-                                       (default: False)
-            char_by_char [bool]     -- works in character by character tokenization mode.
-                                       This is useful for languages like Thai (default: False)
-            morpho [list(str)]      -- this optional argument indicates which morphological mode
-                                       dictionaries are to be used, if needed by some .fst2
-                                       dictionaries. The argument is a list of dictionary path
-                                       (bin format)
-            korean [bool]           -- specify the dictionary is in korean (default: False)
-            arabic_rules [str]      -- specifies the Arabic typographic rule configuration file path
-            sntdir [str]            -- puts produced files in 'sntdir' instead of the text directory
-                                       Note that 'sntdir' must end with a file separator (\ or /);
-            negation_operator [str] -- specifies the negation operator to be used in Locate patterns.
-                                       The two legal values for X are minus and tilde (default).
-                                       Using minus provides backward compatibility with previous versions
-                                       of Unitex.
+            start_on_space [bool] -- this parameter indicates that the
+                search will start at any position in the text, even
+                before a space. This parameter should only be used to
+                carry out morphological searches (default: False).
+
+            char_by_char [bool] -- works in character by character
+                tokenization mode. This is useful for languages like
+                Thai (default: False).
+
+            morpho [list(str)] -- this optional argument indicates which
+                morphological mode dictionaries are to be used, if
+                needed by some .fst2 dictionaries. The argument is a
+                list of dictionary path (bin format).
+
+            korean [bool] -- specify the dictionary is in korean
+                (default: False).
+
+            arabic_rules [str] -- specifies the Arabic typographic rule
+                configuration file path.
+
+            sntdir [str] -- puts produced files in 'sntdir' instead of
+                the text directory. Note that 'sntdir' must end with a
+                file separator (\ or /).
+
+            negation_operator [str] -- specifies the negation operator
+                to be used in Locate patterns. The two legal values for
+                X are minus and tilde (default). Using minus provides
+                backward compatibility with previous versions of Unitex.
 
       - Search limit options:
-            number_of_matches [int] -- stops after the first N matches (default: all matches)
+            number_of_matches [int] -- stops after the first N matches
+                (default: all matches).
 
       - Maximum iterations per token options:
-            stop_token_count [list(int_1, int_2)] -- emits a warning after 'int_1' iterations on a
-                                                     token and stops after 'int_2' iterations.
+            stop_token_count [list(int_1, int_2)] -- emits a warning
+                after 'int_1' iterations on a token and stops after
+                'int_2' iterations.
 
       - Matching mode options:
-            match_mode [str] -- UnitexConstants.MATCH_MODE_SHORTEST: shortest match mode
-                                UnitexConstants.MATCH_MODE_LONGEST: longest match mode (default)
-                                UnitexConstants.MATCH_MODE_ALL: all match mode
+            match_mode [str] -- Possible values are:
+                - UnitexConstants.MATCH_MODE_SHORTEST
+                - UnitexConstants.MATCH_MODE_LONGEST (default)
+                - UnitexConstants.MATCH_MODE_ALL
 
       - Output options:
-            output_mode [str]             -- UnitexConstants.OUTPUT_MODE_IGNORE: ignore outputs (default)
-                                             UnitexConstants.OUTPUT_MODE_MERGE: merge outputs with text
-                                             UnitexConstants.OUTPUT_MODE_REPLACE: replace texts inputs with
-                                                                                  corresponding transducer outputs
-            protect_dic_chars [bool]      -- when 'merge' or 'replace' mode is used, this option protects some
-                                             input characters with a backslash. This is useful when Locate is
-                                             called by Dico in order to avoid producing bad lines like: 3,14,.PI.NUM
-                                             (default: True)
-            variable [list(str_1, str_2)] -- sets an output variable named str_1 with content str_2. Note that str_2
-                                             must be ASCII.
+            output_mode [str] -- Possible values are:
+                - UnitexConstants.OUTPUT_MODE_IGNORE (default)
+                - UnitexConstants.OUTPUT_MODE_MERGE
+                - UnitexConstants.OUTPUT_MODE_REPLACE
+
+            protect_dic_chars [bool] -- when 'merge' or 'replace' mode
+                is used, this option protects some input characters with
+                a backslash. This is useful when Locate is called by
+                'dico' in order to avoid producing bad lines like:
+                3,14,.PI.NUM (default: True).
+
+            variable [list(str_1, str_2)] -- sets an output variable
+                named str_1 with content str_2. Note that str_2 must be
+                ASCII.
 
       - Ambiguous output options:
-            ambiguous_outputs [bool] -- allows the production of several matches with same input but different
-                                        outputs. If False, in case of ambiguous outputs, one will be arbitrarily
-                                        chosen and kept, depending on the internal state of the function
-                                        (default: True)
-            variable_error [str]     -- UnitexConstants.ON_ERROR_EXIT: kills the function if variable has an empty content
-                                        UnitexConstants.ON_ERROR_IGNORE: ignore the errors (default)
-                                        UnitexConstants.ON_ERROR_BACKTRACK: stop the current path exploration
+            ambiguous_outputs [bool] -- allows the production of several
+                matches with same input but different outputs. If False,
+                in case of ambiguous outputs, one will be arbitrarily
+                chosen and kept, depending on the internal state of the
+                function (default: True).
+
+            variable_error [str] -- Possible values are:
+                - UnitexConstants.ON_ERROR_EXIT
+                - UnitexConstants.ON_ERROR_IGNORE (default)
+                - UnitexConstants.ON_ERROR_BACKTRACK
 
     Return [bool]:
-        The function return 'True' if it succeeds and 'False' otherwise.
+        True if it succeeds and False otherwise.
     """
     options = LocateOptions()
     options.load(kwargs)
@@ -786,39 +916,45 @@ def locate(grammar, text, alphabet, **kwargs):
 
 def normalize(text, **kwargs):
     """
-    This function carries out a normalization of text separators. The separators are
-    space, tab, and newline. Every sequence of separators that contains at least one
-    newline is replaced by a unique newline. All other sequences of separators are replaced
-    by a single space.
+    This function carries out a normalization of text separators. The
+    separators are space, tab, and newline. Every sequence of separators
+    that contains at least one newline is replaced by a unique newline.
+    All other sequences of separators are replaced by a single space.
 
-    This function also checks the syntax of lexical tags found in the text. All sequences in
-    curly brackets should be either the sentence delimiter {S}, the stop marker {STOP},
-    or valid entries in the DELAF format ({aujourd’hui,.ADV}).
+    This function also checks the syntax of lexical tags found in the
+    text. All sequences in curly brackets should be either the sentence
+    delimiter {S}, the stop marker {STOP}, or valid entries in the DELAF
+    format ({aujourd’hui,.ADV}).
 
-    Parameter <text> represents the complete path of the text file. The function creates
-    a modified version of the text that is saved in a file with extension .snt.
+    NOTE: the function creates a modified version of the text that is
+          saved in a file with extension .snt.
 
-    WARNING: if you specify a normalization rule file, its rules will be applied prior to
-    anything else. So, you have to be very careful if you manipulate separators in such
-    rules.
+    WARNING: if you specify a normalization rule file, its rules will be
+             applied prior to anything else. So, you have to be very
+             careful if you manipulate separators in such rules.
 
     Arguments:
-        text [str] -- The text file to normalize
+        text [str] -- the text file to normalize.
 
     Keyword arguments:
-        no_carriage_return [bool]         -- every separator sequence will be turned into a single
-                                             space (default: False)
-        input_offsets [str]               -- base offset file to be used
-        output_offsets [str]              -- offset file to be produced
-        replacement_rules [str]           -- specifies the normalization rule file
-                                             to be used. See section 14.13.6 for details about the
-                                             format of this file. By default, the function only
-                                             replaces { and } by [ and ]
-        no_separator_normalization [bool] -- only applies replacement rules specified with the 'replacement_rules'
-                                             option (default: False)
+        no_carriage_return [bool] -- every separator sequence will be
+            turned into a single space (default: False).
+
+        input_offsets [str] -- base offset file to be used.
+
+        output_offsets [str] -- offset file to be produced.
+
+        replacement_rules [str] -- specifies the normalization rule file
+            to be used. See section 14.13.6 for details about the format
+            of this file. By default, the function only replaces { and }
+            by [ and ].
+
+        no_separator_normalization [bool] -- only applies replacement
+            rules specified with the 'replacement_rules' option
+            (default: False).
 
     Return [bool]:
-        The function return 'True' if it succeeds and 'False' otherwise.
+        True if it succeeds and False otherwise.
     """
     options = NormalizeOptions()
     options.load(kwargs)
@@ -857,31 +993,37 @@ def normalize(text, **kwargs):
 
 def sort_txt(text, **kwargs):
     """
-    This function carries out a lexicographical sorting of the lines of file <txt>. <txt>
-    represents the complete path of the file to be sorted.
+    This function carries out a lexicographical sorting of the lines of
+    file <txt>. <txt> represents the complete path of the file to be
+    sorted.
 
-    The input text file is modified. By default, the sorting is performed in the order of
-    Unicode characters, removing duplicate lines.
+    The input text file is modified. By default, the sorting is
+    performed in the order of Unicode characters, removing duplicate
+    lines.
 
     Arguments:
-        text [str] -- The text file to sort
+        text [str] -- the text file to sort.
 
     Keyword arguments:
-        duplicates [bool]                   -- keep duplicate lines (default: False)
-        reverse [bool]                      -- sort in descending order (default: False)
-        sort_order [str]                    -- sorts using the alphabet order defined in this
-                                               file. If this parameter is missing, the sorting
-                                               is done according to the order of Unicode
-                                               characters
-        line_info [str]                     -- backup the number of lines of the result file
-                                               in this file
-        thai [bool]                         -- option for sorting Thai text (default: False)
-        factorize_inflectional_codes [bool] -- makes two entries XXX,YYY.ZZZ:A and XXX,YYY.ZZZ:B
-                                               become a single entry XXX,YYY.ZZZ:A:B
-                                               (default: False)
+        duplicates [bool] -- keep duplicate lines (default: False).
+
+        reverse [bool] -- sort in descending order (default: False).
+
+        sort_order [str] -- sorts using the alphabet order defined in
+            this file. If this parameter is missing, the sorting is done
+            according to the order of Unicode characters.
+
+        line_info [str] -- backup the number of lines of the result file
+            in this file.
+
+        thai [bool] -- option for sorting Thai text (default: False).
+
+        factorize_inflectional_codes [bool] -- makes two entries X,Y.Z:A
+            and X,Y.Z:B become a single entry X,Y.Z:A:B
+            (default: False).
 
     Return [bool]:
-        The function return 'True' if it succeeds and 'False' otherwise.
+        True if it succeeds and False otherwise.
     """
     options = SortTxtOptions()
     options.load(kwargs)
@@ -922,51 +1064,62 @@ def sort_txt(text, **kwargs):
 
 def tokenize(text, alphabet, **kwargs):
     """
-    This function tokenizes a tet text into lexical units. <txt> the complete path of the
-    text file, without omitting the .snt extension.
-
-    The function codes each unit as a whole. The list of units is saved in a text file called
-    tokens.txt. The sequence of codes representing the units now allows the coding
-    of the text. This sequence is saved in a binary file named text.cod. The function
-    also produces the following four files:
-        - tok_by_freq.txt: text file containing the units sorted by frequency
-        - tok_by_alph.txt: text file containing the units sorted alphabetically
-        - stats.n: text file containing information on the number of sentence separators,
-                   the number of units, the number of simple words and the number of
-                   numbers
-        - enter.pos: binary file containing the list of newline positions in the text. The
-                     coded representation of the text does not contain newlines, but spaces.
-                     Since a newline counts as two characters and a space as a single one,
-                     it is necessary to know where newlines occur in the text when the
-                     positions of occurrences located by the 'locate' function are to be
-                     synchronized with the text file. File enter.pos is used for this by
-                     the 'concord' function. Thanks to this, when clicking on an occurrence in
-                     a concordance, it is correctly selected in the text. File enter.pos is
-                     a binary file containing the list of the positions of newlines in the
-                     text.
+    This function tokenizes a tet text into lexical units. <txt> the
+    complete path of the text file, without omitting the .snt extension.
+
+    The function codes each unit as a whole. The list of units is saved
+    in a text file called tokens.txt. The sequence of codes representing
+    the units now allows the coding of the text. This sequence is saved
+    in a binary file named text.cod. The function also produces the
+    following four files:
+
+        - tok_by_freq.txt: text file containing the units sorted by
+                           frequency.
+        - tok_by_alph.txt: text file containing the units sorted
+                           alphabetically.
+        - stats.n: text file containing information on the number of
+                   sentence separators, the number of units, the number
+                   of simple words and the number of numbers.
+        - enter.pos: binary file containing the list of newline
+                     positions in the text. The coded representation of
+                     the text does not contain newlines, but spaces.
+                     Since a newline counts as two characters and a
+                     space as a single one, it is necessary to know
+                     where newlines occur in the text when the positions
+                     of occurrences located by the 'locate' function are
+                     to be synchronized with the text file. File
+                     enter.pos is used for this by the 'concord'
+                     function. Thanks to this, when clicking on an
+                     occurrence in a concordance, it is correctly
+                     selected in the text. File enter.pos is a binary
+                     file containing the list of the positions of
+                     newlines in the text.
 
     All produced files are saved in the text directory
 
     Arguments:
-        text [str]     -- the text file to tokenize (snt format)
-        alphabet [str] -- the alphabet file
+        text [str]     -- the text file to tokenize (.snt format).
+
+        alphabet [str] -- the alphabet file.
 
     Keyword arguments:
       - Generic options:
-            char_by_char [bool] -- indicates whether the function is applied character by
-                                   character, with the exceptions of the sentence delimiter
-                                   {S}, the stop marker {STOP} and lexical tags like
-                                   {today,.ADV} which are considered to be single units
-                                   (default: False)
-            tokens [str]        -- specifies a tokens.txt file to load and modify, instead
-                                   of creating a new one from scratch
+            char_by_char [bool] -- indicates whether the function is
+                applied character by character, with the exceptions of
+                the sentence delimiter {S}, the stop marker {STOP} and
+                lexical tags like {today,.ADV} which are considered to
+                be single units (default: False).
+
+            tokens [str] -- specifies a tokens.txt file to load and
+                modify, instead of creating a new one from scratch.
 
       - Offsets options:
-            input_offsets [str]  -- base offset file to be used;
-            output_offsets [str] -- offset file to be produced;
+            input_offsets [str] -- base offset file to be used.
+
+            output_offsets [str] -- offset file to be produced.
 
     Return [bool]:
-        The function return 'True' if it succeeds and 'False' otherwise.
+        True if it succeeds and False otherwise.
     """
     options = TokenizeOptions()
     options.load(kwargs)
@@ -1010,29 +1163,35 @@ def txt2tfst(text, alphabet, **kwargs):
     """
     This function constructs an automaton of a text.
 
-    If the text is separated into sentences, the function constructs an automaton for each
-    sentence. If this is not the case, the function arbitrarily cuts the text into sequences
-    of 2000 tokens and produces an automaton for each of these sequences.
+    If the text is separated into sentences, the function constructs an
+    automaton for each sentence. If this is not the case, the function
+    arbitrarily cuts the text into sequences of 2000 tokens and produces
+    an automaton for each of these sequences.
 
-    The result is a file called text.tfst which is saved in the directory of the text.
-    Another file named text.tind is also produced.
+    The result is a file called text.tfst which is saved in the
+    directory of the text. Another file named text.tind is also produced.
 
     Arguments:
-        text [str]     -- the path to the text file in snt format.
-        alphabet [str] -- the alphabet file
+        text [str]  -- the path to the text file in .snt format.
+
+        alphabet [str] -- the alphabet file.
 
     Keyword arguments:
-        clean [bool]                -- indicates whether the rule of conservation of the best
-                                       paths (see section 7.2.4) should be applied
-                                       (default: False)
-        normalization_grammar [str] -- name of a normalization grammar that is to be applied
-                                       to the text automaton
-        tagset [str]                -- Elag tagset file to use to normalize dictionary entries
-        korean [bool]               -- tells the function that it works on Korean
-                                       (default: False)
+        clean [bool] -- indicates whether the rule of conservation of
+            the best paths (see section 7.2.4) should be applied
+            (default: False).
+
+        normalization_grammar [str] -- name of a normalization grammar
+            that is to be applied to the text automaton.
+
+        tagset [str] -- Elag tagset file to use to normalize dictionary
+            entries.
+
+        korean [bool] -- tells the function that it works on Korean
+            (default: False).
 
     Return [bool]:
-        The function return 'True' if it succeeds and 'False' otherwise.
+        True if it succeeds and False otherwise.
     """
     options = Txt2TFstOptions()
     options.load(kwargs)