PEP 0008 cosmetics

a1c97108 · Patrick Watrin · ef167363 · a1c97108
--- a/config/unitex-template.yaml
+++ b/config/unitex-template.yaml
-# Do not modify this file. Use the 'build-config-file.py' script to generate a
-# working version adapted to you local Unitex installation or copy this file
-# before editing.
+# Do not modify this file. Use the 'build-config-file.py' script to
+# generate a working version adapted to you local Unitex installation
+# or copy this file before editing.

 # The 'global' section contains the global configuration parameters.
 global:
    # There is 3 'debug' level:
    # 0: the error output is disabled;
-    # 1: the error output is limited to the logging system implemented in the
-    #    bindings;
-    # 2: the error output is activated for both the bindings and the Unitex
-    #    processor.
-    # NOTE: if you activate the debug for level >= 1, the verbose level is
-    #       automatically activated at level 2.
+    # 1: the error output is limited to the logging system implemented
+    #    in the bindings;
+    # 2: the error output is activated for both the bindings and the
+    #    Unitex processor.
+    # NOTE: if you activate the debug for level >= 1, the verbose level
+    #       is automatically activated at level 2.
    debug: 0

    # There is 4 'verbose' level:
    # 0: the standard output is disabled;
-    # 1: the standard output shows 'warnings' emitted by the bindings logging
-    #    system;
+    # 1: the standard output shows 'warnings' emitted by the bindings
+    #    logging system;
    # 2: the standard output shows 'warnings' and various processing
    #    informations emitted by the bindings logging system;
-    # 3: the full standard output is activated for both the bindings and the
-    #    Unitex processor.
+    # 3: the full standard output is activated for both the bindings and
+    #    the Unitex processor.
    verbose: 0

-    # If not 'null', the error and standard outputs are redirected to the file
-    # specified by this parameters. Be sure to have write access to this file.
+    # If not 'null', the error and standard outputs are redirected to
+    # the file specified by this parameters. Be sure to have write
+    # access to this file.
    #log: /var/log/unitex.log
    log: null

    # If you are using the high-level 'Processor' class, this parameter
    # activate or deactivate the resource persistence. If persistency is
-    # activated, dictionaries, grammar and alphabet are loaded during the
-    # object initialization and kept in memory in order to improve
+    # activated, dictionaries, grammar and alphabet are loaded during
+    # the object initialization and kept in memory in order to improve
    # performances.
    # NOTE: you can manually activate the persistence by using the
    #       'load_persistent_X' functions from 'unitex.resources'.
    persistence: True

-    # The Unitex library implements a virtual filesystem which avoids a lot
-    # of I/O and improves the performance. If this parameter is set to True,
-    # The high-level 'Processor' class will activate automatically this virtual
-    # filesystem.
-    # NOTE: as for the persistence, you can activate manually the VFS by using
-    #       the functions from 'unitex.io'.
+    # The Unitex library implements a virtual filesystem which avoids a
+    # lot of I/O and improves the performance. If this parameter is set
+    # to True, the high-level 'Processor' class will activate
+    # automatically this virtual filesystem.
+    # NOTE: as for the persistence, you can activate manually the VFS by
+    #       using the functions from 'unitex.io'.
    virtualization: True

-# The 'resources' section is automatically filled by the 'build-config-file.py'
-# script. If you want to do it manually, be sure to give the absolute path of
-# each resource as shown below.
-# NOTE: the 'dictionaries' parameter is a list of path. As required by the YAML
-#       format, each item must be prefixed by the '-' character (cf. example).
-# resources:
-#   language: fr
-#   alphabet: /home/dev/projects/python-unitex/dependencies/Unitex-GramLab-3.1rc/French/Alphabet.txt
-#   alphabet-sorted: /home/dev/projects/python-unitex/dependencies/Unitex-GramLab-3.1rc/French/Alphabet_sort.txt
-#   sentence: /home/dev/projects/python-unitex/dependencies/Unitex-GramLab-3.1rc/French/Graphs/Preprocessing/Sentence/Sentence.fst2
-#   replace: /home/dev/projects/python-unitex/dependencies/Unitex-GramLab-3.1rc/French/Graphs/Preprocessing/Replace/Replace.fst2
-#   dictionaries:
-#       - /home/dev/projects/python-unitex/dependencies/Unitex-GramLab-3.1rc/French/Dela/dela-fr-public.bin
-#       - /home/dev/projects/python-unitex/dependencies/Unitex-GramLab-3.1rc/French/Dela/ajouts80jours.bin
-#       - /home/dev/projects/python-unitex/dependencies/Unitex-GramLab-3.1rc/French/Dela/motsGramf-.bin
+# The 'resources' section is automatically filled by the
+# 'build-config-file.py' script. If you want to do it manually, be sure
+# to give the absolute path of each resource as shown below.
+# NOTE: the 'dictionaries' parameter is a list of path. As required by
+#       the YAML format, each item must be prefixed by the '-' character
+#       (cf. example).
+#resources:
+#  language: fr
+#  alphabet: /home/dev/projects/python-unitex/dependencies/Unitex-GramLab-3.1rc/French/Alphabet.txt
+#  alphabet-sorted: /home/dev/projects/python-unitex/dependencies/Unitex-GramLab-3.1rc/French/Alphabet_sort.txt
+#  sentence: /home/dev/projects/python-unitex/dependencies/Unitex-GramLab-3.1rc/French/Graphs/Preprocessing/Sentence/Sentence.fst2
+#  replace: /home/dev/projects/python-unitex/dependencies/Unitex-GramLab-3.1rc/French/Graphs/Preprocessing/Replace/Replace.fst2
+#  dictionaries:
+#      - /home/dev/projects/python-unitex/dependencies/Unitex-GramLab-3.1rc/French/Dela/dela-fr-public.bin
+#      - /home/dev/projects/python-unitex/dependencies/Unitex-GramLab-3.1rc/French/Dela/ajouts80jours.bin
+#      - /home/dev/projects/python-unitex/dependencies/Unitex-GramLab-3.1rc/French/Dela/motsGramf-.bin
 resources:
    language: null

@@ -71,25 +73,25 @@ resources:

    dictionaries: null

-# The 'tools' section can contain any of the arguments used by the unitex
-# tools.
-# Most of the times, these parameters are the same than the one used by the
-# original Unitex tools (as described in the Unitex manual). Changes are
-# explained in the comments of this file.
-# NOTE: if you use the 'Processor' high-level class some parameters will be
-#       overriden to fit the 'tag' functions behaviour. For instance, there is
-#       no point to define a font or a context for 'concord'.
+# The 'tools' section can contain any of the arguments used by the
+# unitex tools.
+# Most of the times, these parameters are the same than the one used by
+# the original Unitex tools (as described in the Unitex manual). Changes
+# are explained in the comments of this file.
+# NOTE: if you use the 'Processor' high-level class some parameters will
+#       be overriden to fit the 'tag' functions behaviour. For instance,
+#       there is no point to define a font or a context for 'concord'.
 # NOTE: ALL FILE PATH MUST BE ABSOLUTE!!!
 tools:

    # CheckDic command (Unitex manual, p.266)
    check_dic:
-        # If set to True, the function will use a strict syntax checking 
+        # If set to True, the function will use a strict syntax checking
        # against unprotected dot and comma.
        strict: False

-        # If set to True, the function will tolerate spaces in grammatical,
-        # semantic and inflectional codes.
+        # If set to True, the function will tolerate spaces in
+        # grammatical, semantic and inflectional codes.
        no_space_warning: False

    # Compress command (Unitex manual, p.266)
@@ -98,9 +100,10 @@ tools:
        # produce a file xxx.bin.
        output: null

-        # If set to True, 'flip' indicates that the inflected and canonical
-        # forms should be swapped in the compressed dictionary. This option is
-        # used to construct an inverse dictionary.
+        # If set to True, 'flip' indicates that the inflected and
+        # canonical forms should be swapped in the compressed
+        # dictionary. This option is used to construct an inverse
+        # dictionary.
        flip: False

        # If set to True, the function will use the semitic compression
@@ -108,45 +111,48 @@ tools:
        semitic: False

        # 'version: v1' produces an old style .bin file.
-        # 'version: v2' produces a new style .bin file, with no file size
-        #               limitation to 16 Mb and a smaller resulting size.
+        # 'version: v2' produces a new style .bin file, with no file
+        #               size limitation to 16 Mb and a smaller resulting
+        #               size.
        version: v2

    # Concord command (Unitex manual, p.267)
    concord:
-        # 'font' specifies the name of the font to use if the output is an
-        # HTML file.
+        # 'font' specifies the name of the font to use if the output is
+        # an HTML file.
        #font: "Courier new"
        font: null

-        # 'fontsize' specifies the font size to use if the output is an HTML
-        # file.
+        # 'fontsize' specifies the font size to use if the output is an
+        # HTML file.
        #fontsize: 12
        fontsize: null

-        # If 'only_ambiguous' is set to True, the function will only displays
-        # identical occurrences with ambiguous outputs, in text order.
+        # If 'only_ambiguous' is set to True, the function will only
+        # displays identical occurrences with ambiguous outputs, in text
+        # order.
        only_ambiguous: False

-        # If 'only_matches' is set to True, the function will force empty right
-        # and left contexts. Moreover, if used with -t/–text, the function will
-        # not surround matches with tabulations.
+        # If 'only_matches' is set to True, the function will force
+        # empty right and left contexts. Moreover, if used with 'text',
+        # the function will not surround matches with tabulations.
        only_matches: False

        # 'left' specifies the number of characters on the left of the
-        # occurrences. In Thai mode, this means the number of non-diacritic
-        # characters. For both 'left' and 'right' parameters, you can add the
-        # 's' character to stop at the first {S} tag. For instance, if you set
-        # '40s' for the left value, the left context will end at 40 characters
-        # at most, less if the {S} tag is found before.
+        # occurrences. In Thai mode, this means the number of
+        # non-diacritic characters. For both 'left' and 'right'
+        # parameters, you can add the 's' character to stop at the first
+        # {S} tag. For instance, if you set '40s' for the left value,
+        # the left context will end at 40 characters at most, less if
+        # the {S} tag is found before.
        # NOTE: the number must be quoted to avoid integer conversion.
        left: "0"

-        # 'right' specifies the number of characters (non-diacritic ones in
-        # Thai mode) on the right of the occurrences. If the occurrence is
-        # shorter than this value, the concordance line is completed up to
-        # right. If the occurrence is longer than the length defined by right,
-        # it is nevertheless saved as whole.
+        # 'right' specifies the number of characters (non-diacritic ones
+        # in Thai mode) on the right of the occurrences. If the
+        # occurrence is shorter than this value, the concordance line is
+        # completed up to right. If the occurrence is longer than the
+        # length defined by right, it is nevertheless saved as whole.
        # NOTE: the number must be quoted to avoid integer conversion.
        right: "0"

@@ -164,101 +170,107 @@ tools:
        # 'format' specifies the output format. Possible values are:
        #   - html: produces a concordance in HTML format;
        #   - text: produces a concordance in text format;
-        #   - glossanet: produces a concordance for GlossaNet in HTML format
-        #                where occurrences are links described by the 'script'
-        #                parameter;
-        #   - script: produces a HTML concordance file where occurrences are
-        #             links described by the 'script' parameter;
-        #   - index: produces an index of the concordance, made of the content
-        #            of the occurrences (with the grammar outputs, if any),
-        #            preceded by the positions of the occurrences in the text
-        #            file given in characters;
+        #   - glossanet: produces a concordance for GlossaNet in HTML
+        #                format where occurrences are links described by
+        #                the 'script' parameter;
+        #   - script: produces a HTML concordance file where occurrences
+        #             are links described by the 'script' parameter;
+        #   - index: produces an index of the concordance, made of the
+        #            content of the occurrences (with the grammar
+        #            outputs, if any), preceded by the positions of the
+        #            occurrences in the text file given in characters;
        #   - uima: produces an index of the concordance relative to the
        #           original text file, before any Unitex operation. The
        #           'offsets' parameter must be provided;
-        #   - prlg: produces a concordance for PRLG corpora where each line is
-        #           prefixed by information extracted with Unxmlize’s 'prlg'
-        #           option. You must provide both the 'offsets' and the
-        #           'unxmlize' parameter;
+        #   - prlg: produces a concordance for PRLG corpora where each
+        #           line is prefixed by information extracted with
+        #           Unxmlize’s 'prlg' option. You must provide both the
+        #           'offsets' and the 'unxmlize' parameter;
        #   - xml: produces xml index of the concordance;
-        #   - xml-with-header: produces an xml index of the concordance with
-        #                      full xml header;
-        #   - axis: quite the same as 'index', but the numbers represent the
-        #           median character of each occurrence;
-        #   - xalign: another index file, used by the text alignment module.
-        #             Each line is made of 3 integers X Y Z followed by the
-        #             content of the occurrence. X is the sentence number,
-        #             starting from 1. Y and Z are the starting and ending
-        #             positions of the occurrence in the sentence, given in
-        #             characters;
-        #   - merge: indicates to the function that it is supposed to produce
-        #            a modified version of the text and save it in a file. The
-        #            filename must be provided with the 'output' parameter.
+        #   - xml-with-header: produces an xml index of the concordance
+        #                      with full xml header;
+        #   - axis: quite the same as 'index', but the numbers represent
+        #           the median character of each occurrence;
+        #   - xalign: another index file, used by the text alignment
+        #             module. Each line is made of 3 integers X Y Z
+        #             followed by the content of the occurrence. X is
+        #             the sentence number, starting from 1. Y and Z are
+        #             the starting and ending positions of the
+        #             occurrence in the sentence, given in characters;
+        #   - merge: indicates to the function that it is supposed to
+        #            produce a modified version of the text and save it
+        #            in a file. The filename must be provided with the
+        #            'output' parameter.
        format: "text"

-        # 'script' describes the links format for 'glossanet' and 'script'
-        # output. For instance, if you use 'http://www.google.com/search?q=',
-        # you will obtain a HTML concordance file where occurrences are
-        # hyperlinks to Google queries.
+        # 'script' describes the links format for 'glossanet' and
+        # 'script' output. For instance, if you use
+        # 'http://www.google.com/search?q=', you will obtain a
+        # HTML concordance file where occurrences are hyperlinks to
+        # Google queries.
        script: null

-        # 'offsets' provides the file produced by tokenize’s output_offsets
-        # option (needed by the 'uima' and the 'prlg' format).
+        # 'offsets' provides the file produced by tokenize’s
+        # output_offsets option (needed by the 'uima' and the 'prlg'
+        # format).
        offsets: null

-        # 'unxmlize' provides the file produced by Unxmlize’s 'prlg' option
-        # (needed by the 'prlg' format).
+        # 'unxmlize' provides the file produced by Unxmlize’s 'prlg'
+        # option (needed by the 'prlg' format).
        unxmlize: null

-        # 'directory' indicates to the function that it must not work in the
-        # same directory than <index> but in 'directory'
+        # 'directory' indicates to the function that it must not work in
+        # the same directory than <index> but in 'directory'.
        directory: null

-        # If set to True, 'thai' indicates that the input text is in Thai
-        # language
+        # If set to True, 'thai' indicates that the input text is in
+        # Thai language.
        thai: False

    # Dico command (Unitex manual, p.272)
    dico:
-        # 'morpho' lists dictionaries to load in morphological mode, if needed
-        # by some .fst2 dictionaries.
+        # 'morpho' lists dictionaries to load in morphological mode, if
+        # needed by some .fst2 dictionaries.
        morpho: null

-        # If set to True, 'korean' indicates that the input text is in korean
-        # language.
+        # If set to True, 'korean' indicates that the input text is in
+        # korean language.
        korean: False

-        # If set to True, 'semitic' indicates that the input text is in a
-        # semitic language.
+        # If set to True, 'semitic' indicates that the input text is in
+        # a semitic language.
        semitic: False

-        # 'arabic_rules' specifies the Arabic typographic rule configuration
-        # file path.
+        # 'arabic_rules' specifies the Arabic typographic rule
+        # configuration file path.
        arabic_rules: null

-        # 'raw' specifies and alternative output file path containing both
-        # simple and compound words, without requiring a text directory.
+        # 'raw' specifies and alternative output file path containing
+        # both simple and compound words, without requiring a text
+        # directory.
        raw: null

    # Extract command (Unitex manual, p.277)
    extract:
-        # If set to True, 'non_matching_sentences' indicates to the function
-        # to extract all sentences that don’t contain matching units.
+        # If set to True, 'non_matching_sentences' indicates to the
+        # function to extract all sentences that don’t contain matching
+        # units.
        non_matching_sentences: False

    # Fst2Txt command (Unitex manual, p.280)
    fst2txt:
-        # If set to True, the search will start at any position in the text,
-        # even before a space. This parameter should only be used to carry out
-        # morphological searches.
+        # If set to True, the search will start at any position in the
+        # text, even before a space. This parameter should only be used
+        # to carry out morphological searches.
        start_on_space: False

-        # If set to True, the function will work in character by character
-        # tokenization mode. This is useful for languages like Thai.
+        # If set to True, the function will work in character by
+        # character tokenization mode. This is useful for languages like
+        # Thai.
        word_by_word: False

-        # If set to True, the function merge (instead of replace) transducer
-        # outputs with text inputs.
+        # If set to True, the function merge (instead of replace)
+        # transducer outputs with text inputs.
        merge: True

    # Grf2Fst2 command (Unitex manual, p.280)
@@ -266,26 +278,28 @@ tools:
        # If set to True, 'loop_check' enables error (loop) checking.
        loop_check: False

-        # If set to True, tokenization will be done character by character.
+        # If set to True, tokenization will be done character by
+        # character.
        char_by_char: False

-        # 'pkgdir' specifies the repository directory to use (see section
-        # 5.2.2, p.99).
+        # 'pkgdir' specifies the repository directory to use (see
+        # section 5.2.2, p.99).
        pkgdir: null

-        # If set to True, no warning will be emitted when a graph matches the
-        # empty word.
+        # If set to True, no warning will be emitted when a graph
+        # matches the empty word.
        no_empty_graph_warning: False

-        # If set to True, the function checks wether the given graph can be
-        # considered as a valid sentence automaton or not.
+        # If set to True, the function checks wether the given graph can
+        # be considered as a valid sentence automaton or not.
        tfst_check: False

        # If set to True, the function does not print the graph names.
        silent_grf_name: True

-        # 'named_repository' must be a list of X=Y sequences, separated by ‘;’,
-        # where X is the name of the repository denoted by pathname Y.
+        # 'named_repository' must be a list of X=Y sequences, separated
+        # by ‘;’, where X is the name of the repository denoted by
+        # pathname Y.
        named_repository: null

        # If set to True, the graph is compiled in debug mode.
@@ -297,95 +311,104 @@ tools:

    # Locate command (Unitex manual, p.283)
    locate:
-        # If set to True, the search will start at any position in the text,
-        # even before a space. This parameter should only be used to carry out
-        # morphological searches.
+        # If set to True, the search will start at any position in the
+        # text, even before a space. This parameter should only be used
+        # to carry out morphological searches.
        start_on_space: False

-        # If set to True, tokenization will be done character by character.
+        # If set to True, tokenization will be done character by
+        # character.
        char_by_char: False

-        # 'morpho' lists dictionaries to load in morphological mode, if needed
-        # by some .fst2 dictionaries.
+        # 'morpho' lists dictionaries to load in morphological mode, if
+        # needed by some .fst2 dictionaries.
        morpho: null

-        # If set to True, 'korean' indicates that the input text is in korean
-        # language.
+        # If set to True, 'korean' indicates that the input text is in
+        # korean language.
        korean: False

-        # 'arabic_rules' specifies the Arabic typographic rule configuration
-        # file path.
+        # 'arabic_rules' specifies the Arabic typographic rule
+        # configuration file path.
        arabic_rules: null

-        # If not null, the function puts produced files in 'sntdir' instead
-        # of the text directory. Note that 'sntdir' must end with a file
-        # separator (\ or /).
+        # If not null, the function puts produced files in 'sntdir'
+        # instead of the text directory. Note that 'sntdir' must end
+        # with a file separator (\ or /).
        sntdir: null

-        # This parameter specifies the negation operator to be used in Locate
-        # patterns. The two legal values for X are 'minus' and 'tilde'.
+        # This parameter specifies the negation operator to be used in
+        # Locate patterns. The two legal values for X are 'minus' and
+        # 'tilde'.
        negation_operator: "tilde"

        # If not null, the function stops after the first N matches. By
        # default, the function searches for all matches.
        number_of_matches: null

-        # 'stop_token_count' is a list of two integers. If specified, the
-        # function will emit a warning after 'int_1' iterations on a token and
-        # stops after 'int_2' iterations.
+        # 'stop_token_count' is a list of two integers. If specified,
+        # the function will emit a warning after 'int_1' iterations on a
+        # token and stops after 'int_2' iterations.
        #stop_token_count=[3,5]
        stop_token_count: null

-        # Possible values for 'match_mode' are: 'longest', 'shortest' and 'all'
+        # Possible values for 'match_mode' are: 'longest', 'shortest'
+        # and 'all'.
        match_mode: "longest"

        # Possible values for 'output_mode' are:
        #   - 'ignore': the transducer outputs will be ignored;
-        #   - 'merge': the transducer outputs will be merged with the input 
-        #              text;
-        #   - 'replace': the transducer outputs replaces the matching text.
+        #   - 'merge': the transducer outputs will be merged with the
+        #              input text;
+        #   - 'replace': the transducer outputs replaces the matching
+        #                text.
        output_mode: "merge"

-        # If set to True, this parameter enables special characters protection
-        # when 'merge' or 'replace' mode is used. This is useful when Locate is
-        # called by Dico in order to avoid producing bad lines like:
+        # If set to True, this parameter enables special characters
+        # protection when 'merge' or 'replace' mode is used. This is
+        # useful when Locate is called by Dico in order to avoid
+        # producing bad lines like:
        #    3,14,.PI.NUM
        protect_dic_chars: True

-        # If not null, this parameter must be a list of two strings, where:
-        # 'str_1' is a variable name whith content 'str_2'.
+        # If not null, this parameter must be a list of two strings,
+        # where: 'str_1' is a variable name whith content 'str_2'.
        # NOTE: 'str_2' must be ASCII.
        variable: null

        # If set to True, the function allows the production of several
-        # matches with same input but different outputs. If False, in case of
-        # ambiguous outputs, one will be arbitrarily chosen and kept,
-        # depending on the internal state of the function.
+        # matches with same input but different outputs. If False, in
+        # case of ambiguous outputs, one will be arbitrarily chosen and
+        # kept, depending on the internal state of the function.
        ambiguous_outputs: True

        # Possible values are:
-        #   - 'exit': kills the function if variable has an empty content;
+        #   - 'exit': kills the function if variable has an empty
+        #             content;
        #   - 'ignore': ignore the errors;
        #   - 'backtrack': stop the current path exploration.
        variable_error: "ignore"

    # Normalize command (Unitex manual, p.287)
    normalize:
-        # If set to True, every separator sequence will be turned into a single
-        # space.
+        # If set to True, every separator sequence will be turned into a
+        # single space.
        no_carriage_return: False

-        # 'input_offsets' specifies the base offset file path to be used.
+        # 'input_offsets' specifies the base offset file path to be
+        # used.
        input_offsets: null

-        # 'output_offsets' specifies the offset file path to be produced.
+        # 'output_offsets' specifies the offset file path to be
+        # produced.
        output_offsets: null

-        # 'replacement_rules' specifies the normalization rule file to be used.
+        # 'replacement_rules' specifies the normalization rule file to
+        # be used.
        replacement_rules: null

-        # If set to True, the function only applies replacement rules specified
-        # with the 'replacement_rules' parameter.
+        # If set to True, the function only applies replacement rules
+        # specified with the 'replacement_rules' parameter.
        no_separator_normalization: False

    # SortTxt command (Unitex manual, p.291)
@@ -397,54 +420,57 @@ tools:
        reverse: False

        # The 'sort_order' parameter specifies the file path of the
-        # 'Alphabet_sort.txt' file or any other file defining the alphabet
-        # order.
+        # 'Alphabet_sort.txt' file or any other file defining the
+        # alphabet order.
        sort_order: null

-        # If not null, the function backups the number of lines of the result
-        # file in the file specified by this parameter.
+        # If not null, the function backups the number of lines of the
+        # result file in the file specified by this parameter.
        line_info: null

-        # If set to True, 'thai' indicates that the input text is in Thai
-        # language
+        # If set to True, 'thai' indicates that the input text is in
+        # Thai language.
        thai: False

-        # If set to True, the function makes two entries XXX,YYY.ZZZ:A and
-        # XXX,YYY.ZZZ:B become a single entry: XXX,YYY.ZZZ:A:B
+        # If set to True, the function makes two entries X,Y.Z:A and
+        # X,Y.Z:B become a single entry: X,Y.Z:A:B
        factorize_inflectional_codes: False

    # Tokenize command (Unitex manual, p.294)
    tokenize:
-        # If set to True, the function is applied character by character, with
-        # the exceptions of the sentence delimiter {S}, the stop marker {STOP}
-        # and lexical tags like {today,.ADV} which are considered to be single
-        # units.
+        # If set to True, the function is applied character by
+        # character, with the exceptions of the sentence delimiter {S},
+        # the stop marker {STOP} and lexical tags like {today,.ADV}
+        # which are considered to be single units.
        char_by_char: False

-        # 'tokens' specifies the path of the 'tokens.txt' file to loa and
-        # modify, instead of creating a new one from scratch.
+        # 'tokens' specifies the path of the 'tokens.txt' file to load
+        # and modify, instead of creating a new one from scratch.
        tokens: null

-        # 'input_offsets' specifies the base offset file path to be used.
+        # 'input_offsets' specifies the base offset file path to be
+        # used.
        input_offsets: null

-        # 'output_offsets' specifies the offset file path to be produced.
+        # 'output_offsets' specifies the offset file path to be
+        # produced.
        output_offsets: null

    # Txt2Tfst command (Unitex manual, p.296)
    txt2tfst:
-        # If set to True, 'clean' indicates whether the rule of conservation of
-        # the best paths (see section 7.2.4) should be applied.
+        # If set to True, 'clean' indicates whether the rule of
+        # conservation of the best paths (see section 7.2.4) should be
+        # applied.
        clean: False

-        # This parameter specifies the file path of a normalization grammar
-        # that is to be applied to the text automaton.
+        # This parameter specifies the file path of a normalization
+        # grammar that is to be applied to the text automaton.
        normalization_grammar: null

-        # This parameter specifies the Elag tagset file to use to normalize
-        # dictionary entries.
+        # This parameter specifies the Elag tagset file to use to
+        # normalize dictionary entries.
        tagset: null

-        # If set to True, 'korean' indicates that the input text is in korean
-        # language.
+        # If set to True, 'korean' indicates that the input text is
+        # in korean language.
        korean: False