Adaptation of the tools function arguments in the unit tests

59d7ab92 · Patrick Watrin · cfcd9ff7 · cfcd9ff7 · 59d7ab92 · 59d7ab92
--- a/config/processor.yaml
+++ b/config/processor.yaml
-global:
-    debug: 1
-    verbose: 1
-    tempdir: "/tmp"
-    persistence: True
-    virtualization: True
-resources:
-    language: fr
-    alphabet: /home/resources/media/fr/unitex/preprocessing/Alphabet.txt
-    alphabet-sorted: /home/resources/media/fr/unitex/preprocessing/Alphabet_sort.txt
-    sentence: /home/resources/media/fr/unitex/preprocessing/sentence/Sentence.fst2
-    replace: /home/resources/media/fr/unitex/preprocessing/replace/Replace.fst2
-    dictionaries:
-        - /home/resources/media/fr/unitex/dictionary/delaf-short.bin
-        - /home/resources/media/fr/unitex/dictionary/delacf-light.bin
-        - /home/resources/media/fr/unitex/dictionary/toponyms.bin
-# The 'options' section can contain any of the argument used by the unitex tools
-# functions. Note that some argument will be overriden to fit the 'tag' and 'extract'
-# behaviour. For intance, there is not point to define a font or a context for
-# 'concord'.
-options:
-    locate:
-        match_mode: longest
--- a/config/unitex.yaml
+++ b/config/unitex.yaml
+global:
+    debug: 1
+    verbose: 1
+    tempdir: "/tmp"
+    persistence: True
+    virtualization: True
+resources:
+    language: "fr"
+    alphabet: "/home/resources/media/fr/unitex/preprocessing/Alphabet.txt"
+    alphabet-sorted: "/home/resources/media/fr/unitex/preprocessing/Alphabet_sort.txt"
+    sentence: "/home/resources/media/fr/unitex/preprocessing/sentence/Sentence.fst2"
+    replace: "/home/resources/media/fr/unitex/preprocessing/replace/Replace.fst2"
+    dictionaries:
+        - "/home/resources/media/fr/unitex/dictionary/delaf-short.bin"
+        - "/home/resources/media/fr/unitex/dictionary/delacf-light.bin"
+        - "/home/resources/media/fr/unitex/dictionary/toponyms.bin"
+# The 'options' section can contain any of the argument used by the unitex tools
+# functions. Note that, if you use the 'Processor' high-level class some argument
+# could be overriden to fit the 'tag', 'extract' and 'search' functions
+# behaviour. For intance, there is not point to define a font or a context for
+# 'concord'.
+options:
+    check_dic:
+        strict: False
+        no_space_warning: False
+    compress:
+        output: null
+        flip: False
+        semitic: False
+        version: "v2"
+    concord:
+        font: null
+        fontsize: null
+        only_ambiguous: False
+        only_matches: False
+        left: 0
+        right: 0
+        sort: "TO"
+        format: "text"
+        script: null
+        offsets: null
+        unxmlize: null
+        directory: null
+        thai: False
+    dico:
+        morpho: null
+        korean: False
+        semitic: False
+        arabic_rules: null
+        raw: null
+    extract:
+        non_matching_sentences: False
+    fst2txt:
+        start_on_space: False
+        word_by_word: False
+        merge: True
+    grf2fst2:
+        loop_check: False
+        char_by_char: False
+        pkgdir: null
+        no_empty_graph_warning: False
+        tfst_check: False
+        silent_grf_name: True
+        named_repository: null
+        debug: False
+        check_variables: True
+    locate:
+        start_on_space: False
+        char_by_char: False
+        morpho: null
+        korean: False
+        arabic_rules: null
+        sntdir: null
+        negation_operator: "tilde"
+        number_of_matches: null
+        stop_token_count: null
+        match_mode: "longest"
+        output_mode: "merge"
+        protect_dic_chars: True
+        variable: null
+        ambiguous_outputs: True
+        variable_error: "ignore"
+    normalize:
+        no_carriage_return: False
+        input_offsets: null
+        output_offsets: null
+        no_separator_normalization: False
+        replacement_rules: null
+    sort_txt:
+        duplicates: False
+        revers: False
+        sort_order: null
+        line_info: null
+        thai: False
+        factorize_inflectional_codes: False
+    tokenize:
+        char_by_char: False
+        tokens: null
+        input_offsets: null
+        output_offsets: null
+    txt2fst:
+        clean: False
+        normalization_grammar: null
+        tagset: null
+        korean: False
--- a/setup.py
+++ b/setup.py
@@ -87,46 +87,39 @@ class CustomClean(clean):
-setup(
+setup(name = "unitex",
-    name = "unitex",
+      version = "1.0",
-    version = "1.0",
+      description = "Python 3 binding for the Unitex library",
-    description = "Python 3 binding for the Unitex library",
+      long_description = open('README.md').read(),
-    long_description = open('README.md').read(),
+      author = "Patrick Watrin",
-    author = "Patrick Watrin",
+      author_email = "patrick.watrin@gmail.com",
-    author_email = "patrick.watrin@gmail.com",
+      # https://pypi.python.org/pypi?%3Aaction=list_classifiers
-    # https://pypi.python.org/pypi?%3Aaction=list_classifiers
+      classifiers = ["License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
-    classifiers = [
+                     "Programming Language :: Python",
-        "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
+                     "Development Status :: 4 - Beta",
-        "Programming Language :: Python",
+                     "Intended Audience :: Developers",
-        "Development Status :: 4 - Beta",
+                     "Topic :: Scientific/Engineering :: Information Analysis"],
-        "Intended Audience :: Developers",
-        "Topic :: Scientific/Engineering :: Information Analysis",
+      keywords = "Unitex, Finite-States Transducers, Natural Language Processing",
-    ],
-    keywords = "Unitex, Finite-States Transducers, Natural Language Processing",
+      license = "GPLv3",
+      install_requires = [],
-    license = "GPLv3",
-    install_requires = [
+      package_dir = {"unitex":"unitex"},
-        # TO FILL
+      packages = ["unitex"],
-    ],
+      data_files = [],
-    packages = ["unitex"],
-    package_dir = {'unitex': 'unitex'},
+      ext_modules=[Extension("_unitex",
+                             include_dirs = [UNITEX_INC, get_python_inc()],
-    data_files = [
+                             libraries=["unitex"],
-    ],
+                             library_dirs=['/usr/local/lib'],
+                             sources = ["extensions/_unitex.cpp"])],
-    ext_modules=[
-        Extension("_unitex",
+       #cmdclass = {
-                  include_dirs = [UNITEX_INC, get_python_inc()],
+       #    "build": CustomBuild,
-                  libraries=["unitex"],
+       #    "clean": CustomClean
-                  library_dirs=['/usr/local/lib'],
+       #}
-                  sources = ["extensions/_unitex.cpp"])
-    ],
-#    cmdclass = {
-#        "build": CustomBuild,
-#        "clean": CustomClean
-#    }
 )
--- a/tests/01_test_tools.py
+++ b/tests/01_test_tools.py
@@ -5,17 +5,17 @@ import os
 import shutil
 import unittest
+from unitex import *
 from unitex.tools import *
 class Arguments:
    def __init__(self, language=None):
        self.__arguments = {}
        self.__arguments["dic"] = "data/dictionary.dic" 
-        self.__arguments["dic_type"] = "delaf"
+        self.__arguments["dic_type"] = UnitexConstants.DELAF
        self.__arguments["dic_check"] = "data/CHECK_DIC.TXT" 
        self.__arguments["bin"] = "data/dictionary.bin" 
@@ -29,6 +29,7 @@ class Arguments:
        self.__arguments["txt"] = "data/corpus.txt" 
        self.__arguments["snt"] = "data/corpus.snt" 
        self.__arguments["dir"] = "data/corpus_snt" 
+        self.__arguments["xtr"] = "data/corpus.xtr" 
        self.__arguments["text.cod"] = os.path.join(self.__arguments["dir"], "text.cod")
        self.__arguments["tok_by_freq.txt"] = os.path.join(self.__arguments["dir"], "tok_by_freq.txt")
@@ -90,37 +91,42 @@ class TestUnitexTools(unittest.TestCase):
        if os.path.exists(self._arguments["fst"]):
            os.remove(self._arguments["fst"])
+        # Removing output file from the 'extract' command.
+        if os.path.exists(self._arguments["xtr"]):
+            os.remove(self._arguments["xtr"])
    def test_01_check_dic(self):
-        args = [self._arguments["dic"]]
+        dictionary = self._arguments["dic"]
+        dtype = self._arguments["dic_type"]
+        alphabet = self._arguments["alphabet"]
        kwargs = {}
-        kwargs["type"] = self._arguments["dic_type"]
-        kwargs["alphabet"] = self._arguments["alphabet"]
        kwargs["strict"] = False
        kwargs["no_space_warning"] = True
-        ret = check_dic(*args, **kwargs)
+        ret = check_dic(dictionary, dtype, alphabet, **kwargs)
        ok = os.path.exists(self._arguments["dic_check"]) and ret
        self.assertTrue(ok, "Dictionary checking failed!")
    def test_02_compress(self):
-        args = [self._arguments["dic"]]
+        dictionary = self._arguments["dic"]
        kwargs = {}
+        kwargs["output"] = None
        kwargs["flip"] = False
        kwargs["semitic"] = False
-        kwargs["version"] = "v2"
+        kwargs["version"] = UnitexConstants.DICTIONARY_VERSION_1
-        ret = compress(*args, **kwargs)
+        ret = compress(dictionary, **kwargs)
        ok = os.path.exists(self._arguments["bin"]) and os.path.exists(self._arguments["inf"]) and ret
        self.assertTrue(ok, "Compression failed!")
    def test_03_normalize(self):
-        args = [self._arguments["txt"]]
+        text = self._arguments["txt"]
        kwargs = {}
        kwargs["no_carriage_return"] = False
@@ -129,23 +135,23 @@ class TestUnitexTools(unittest.TestCase):
        kwargs["replacement_rules"] = None
        kwargs["no_separator_normalization"] = False
-        ret = normalize(*args, **kwargs)
+        ret = normalize(text, **kwargs)
        ok = os.path.exists(self._arguments["snt"]) and ret
        self.assertTrue(ok, "Normalisation failed!")
    def test_04_fst2txt(self):
-        args = [self._arguments["sentence"]]
+        grammar = self._arguments["sentence"]
+        text = self._arguments["snt"]
+        alphabet = self._arguments["alphabet"]
        kwargs = {}
-        kwargs["text"] = self._arguments["snt"]
-        kwargs["alphabet"] = self._arguments["alphabet"]
        kwargs["start_on_space"] = False
        kwargs["char_by_char"] = False
        kwargs["merge"] = True
-        ret = fst2txt(*args, **kwargs)
+        ret = fst2txt(grammar, text, alphabet, **kwargs)
        ok = ret
@@ -155,16 +161,16 @@ class TestUnitexTools(unittest.TestCase):
        if not os.path.exists(self._arguments["dir"]):
            os.mkdir(self._arguments["dir"])
-        args = [self._arguments["snt"]]
+        text = self._arguments["snt"]
+        alphabet = self._arguments["alphabet"]
        kwargs = {}
-        kwargs["alphabet"] = self._arguments["alphabet"]
        kwargs["char_by_char"] = False
        kwargs["tokens"] = None
        kwargs["input_offsets"] = None
        kwargs["output_offsets"] = None
-        ret = tokenize(*args, **kwargs)
+        ret = tokenize(text, alphabet, **kwargs)
        ok = ret
        ok = ok and os.path.exists(self._arguments["text.cod"])
@@ -176,18 +182,18 @@ class TestUnitexTools(unittest.TestCase):
        self.assertTrue(ok, "Tokenisation failed!")
    def test_06_dico(self):
-        args = [self._arguments["bin"]]
+        dictionaries = [self._arguments["bin"]]
+        text = self._arguments["snt"]
+        alphabet = self._arguments["alphabet"]
        kwargs = {}
-        kwargs["text"] = self._arguments["snt"]
-        kwargs["alphabet"] = self._arguments["alphabet"]
        kwargs["morpho"] = None
        kwargs["korean"] = False
        kwargs["semitic"] = False
        kwargs["arabic_rules"] = None
        kwargs["raw"] = None
-        ret = dico(*args, **kwargs)
+        ret = dico(dictionaries, text, alphabet, **kwargs)
        ok = ret
        ok = ok and os.path.exists(self._arguments["dlf"])
@@ -216,21 +222,19 @@ class TestUnitexTools(unittest.TestCase):
        ok = True
-        for f in files:
+        for text in files:
-            args = [f]
+            ret = sort_txt(text, **kwargs)
-            ret = sort_txt(*args, **kwargs)
            ok = ok and ret
        self.assertTrue(ok, "Sorting failed!")
    def test_08_grf2fst2(self):
-        args = [self._arguments["grf"]]
+        grammar = self._arguments["grf"]
+        alphabet = self._arguments["alphabet"]
        kwargs = {}
        kwargs["loop_check"] = False
-        kwargs["alphabet"] = self._arguments["alphabet"]
        kwargs["char_by_char"] = False
        kwargs["pkgdir"] = None
        kwargs["no_empty_graph_warning"] = False
@@ -240,47 +244,48 @@ class TestUnitexTools(unittest.TestCase):
        kwargs["debug"] = False
        kwargs["check_variables"] = False
-        ret = grf2fst2(*args, **kwargs)
+        ret = grf2fst2(grammar, alphabet, **kwargs)
        ok = os.path.exists(self._arguments["fst"]) and ret
        self.assertTrue(ok, "Grammar compilation failed!")
    def test_09_locate(self):
-        args = [self._arguments["fst"]]
+        grammar = self._arguments["fst"]
+        text = self._arguments["snt"]
+        alphabet = self._arguments["alphabet"]
        kwargs = {}
-        kwargs["text"] = self._arguments["snt"]
-        kwargs["alphabet"] = self._arguments["alphabet"]
        kwargs["start_on_space"] = False
        kwargs["char_by_char"] = False
        kwargs["morpho"] = None
        kwargs["korean"] = False
        kwargs["arabic_rules"] = None
        kwargs["sntdir"] = None
-        kwargs["negation_operator"] = None
+        kwargs["negation_operator"] = UnitexConstants.NEGATION_OPERATOR
        kwargs["number_of_matches"] = None
        kwargs["stop_token_count"] = None
-        kwargs["match_mode"] = "longest"
+        kwargs["match_mode"] = UnitexConstants.MATCH_MODE_LONGEST
-        kwargs["output_mode"] = "merge"
+        kwargs["output_mode"] = UnitexConstants.OUTPUT_MODE_MERGE
        kwargs["protect_dic_chars"] = True
        kwargs["variable"] = None
        kwargs["ambiguous_outputs"] = True
-        kwargs["variable_error"] = "ignore"
+        kwargs["variable_error"] = UnitexConstants.ON_ERROR_IGNORE
-        ret = locate(*args, **kwargs)
+        ret = locate(grammar, text, alphabet, **kwargs)
        ok = os.path.exists(self._arguments["ind"]) and os.path.exists(self._arguments["concord.n"]) and ret
        self.assertTrue(ok, "Locate failed!")
    def test_10_concord(self):
-        args = [self._arguments["ind"]]
+        index = self._arguments["ind"]
+        alphabet = self._arguments["alphabet"]
        kwargs = {}
        kwargs["font"] = None
@@ -290,35 +295,34 @@ class TestUnitexTools(unittest.TestCase):
        kwargs["left"] = "1000s"
        kwargs["right"] = "1000s"
-        kwargs["sort"] = "CR"
+        kwargs["sort"] = UnitexConstants.SORT_CENTER_RIGHT
-        kwargs["format"] = "text"
+        kwargs["format"] = UnitexConstants.FORMAT_TEXT
        kwargs["script"] = None
        kwargs["offsets"] = None
        kwargs["unxmlize"] = None
        kwargs["output"] = None
        kwargs["directory"] = None
-        kwargs["alphabet"] = self._arguments["alphabet"]
        kwargs["thai"] = False
-        ret = concord(*args, **kwargs)
+        ret = concord(index, alphabet, **kwargs)
        ok = os.path.exists(self._arguments["concordances"]) and ret
        self.assertTrue(ok, "Concord failed!")
    def test_11_txt2tfst(self):
-        args = [self._arguments["snt"]]
+        text = self._arguments["snt"]
+        alphabet = self._arguments["alphabet"]
        kwargs = {}
-        kwargs["alphabet"] = self._arguments["alphabet"]
        kwargs["clean"] = False
        kwargs["normalization_grammar"] = None
        kwargs["tagset"] = None
        kwargs["korean"] = False
-        ret = txt2tfst(*args, **kwargs)
+        ret = txt2tfst(text, alphabet, **kwargs)
        ok = ret
        ok = ok and os.path.exists(self._arguments["text.tfst"])
@@ -326,6 +330,20 @@ class TestUnitexTools(unittest.TestCase):
        self.assertTrue(ok, "Txt2Tfst failed!")
+    def test_12_extract(self):
+        text = self._arguments["snt"]
+        output = self._arguments["xtr"]
+        index = self._arguments["ind"]
+        kwargs = {}
+        kwargs["non_matching_sentences"] = False
+        ret = extract(text, output, index, **kwargs)
+        ok = ret
+        ok = ok and os.path.exists(self._arguments["xtr"])
+        self.assertTrue(ok, "Extract failed!")
 if __name__ == '__main__':

--- a/unitex/__init__.py
+++ b/unitex/__init__.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-__all__ = ["io", "tools", "processor"]
 import logging
 import os
 import sys
@@ -93,7 +91,7 @@ else:
    if DEBUG not in (0, 1):
        raise UnitexException( "Wrong $UNITEX_DEBUG value..." )
-# If a log file is specified, the log will be duplicated
+# If a log file is specified, the log will be redirected
 # to this file
 LOG = os.path.expandvars('$UNITEX_LOG')
 if LOG != '$UNITEX_LOG':
@@ -101,37 +99,22 @@ if LOG != '$UNITEX_LOG':
 else:
    LOG = None
-LOGGER = logging.getLogger("unitex")
+kwargs = {}
-ch = logging.StreamHandler()
 if DEBUG == 1:
-    ch.setLevel(logging.DEBUG)
+    kwargs["level"] = logging.DEBUG
 elif VERBOSE == 1:
-    ch.setLevel(logging.WARNING)
+    kwargs["level"] = logging.WARNING
 elif VERBOSE == 2:
-    ch.setLevel(logging.INFO)
+    kwargs["level"] = logging.INFO
 else:
-    ch.setLevel(logging.ERROR)
+    kwargs["level"] = logging.ERROR
-cf = logging.Formatter("%(name)-12s: %(levelname)-8s %(message)s")
-ch.setFormatter(cf)
-LOGGER.addHandler(ch)
 if LOG is not None:
-    fh = logging.FileHandler(LOG)
+    kwargs["format"] = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    kwargs["filename"] = LOG
-    if DEBUG == 1:
+    kwargs["filemode"] = "a"
-        fh.setLevel(logging.DEBUG)
+else:
-    elif VERBOSE == 1:
+    kwargs["format"] = "%(name)-12s: %(levelname)-8s %(message)s"
-        fh.setLevel(logging.WARNING)
-    elif VERBOSE == 2:
-        fh.setLevel(logging.INFO)
-    else:
-        fh.setLevel(logging.ERROR)
-    ff = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
-    fh.setFormatter(ff)
-    LOGGER.addHandler(fh)
+logging.basicConfig(**kwargs)
--- a/unitex/config.py
+++ b/unitex/config.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+import logging
 import os
 import tempfile
 from unitex import *
 from unitex.io import exists
+LOGGER = logging.getLogger(__name__)
 class Options(object):
-    def __init__(self):
+    def __init__(self, options=None):
        self.__options = {}
+        if options is not None:
+            self.load(options)
    def __contains__(self, key):
        return key in self.__options
@@ -180,10 +186,11 @@ class ConcordOptions(Options):
            self["output"] = output
        directory = options.get("directory", None)
-        if directory is not None and isinstance(directory, str) is False:
+        if directory is not None:
-            raise UnitexException("[CONCORD] Wrong value for the 'directory' option. String required.")
+            if isinstance(directory, str) is False:
-        if exists(directory) is False:
+                raise UnitexException("[CONCORD] Wrong value for the 'directory' option. String required.")
-            raise UnitexException("[CONCORD] The text 'directory' doesn't exist.")
+            if exists(directory) is False:
+                raise UnitexException("[CONCORD] The text 'directory' doesn't exist.")
        self["directory"] = directory
        thai = options.get("thai", False)
@@ -218,7 +225,7 @@ class DicoOptions(Options):
            raise UnitexException("[DICO] Wrong value for the 'semitic' option. Boolean required.")
        self["semitic"] = semitic
-        arabic_rules = options.get("arabic_rules", False)
+        arabic_rules = options.get("arabic_rules", None)
        if arabic_rules is not None:
            if isinstance(arabic_rules, str) is False:
                raise UnitexException("[DICO] Wrong value for the 'arabic_rules' option. String required.")
@@ -226,7 +233,7 @@ class DicoOptions(Options):
                raise UnitexException("[DICO] Rules file '%s' doesn't exist." % arabic_rules)
        self["arabic_rules"] = arabic_rules
-        raw = options.get("raw", False)
+        raw = options.get("raw", None)
        if raw is not None and isinstance(raw, str) is False:
            raise UnitexException("[DICO] Wrong value for the 'raw' option. String required.")
        self["raw"] = raw
@@ -257,10 +264,10 @@ class Fst2TxtOptions(Options):
            raise UnitexException("[FST2TXT] Wrong value for the 'start_on_space' option. Boolean required.")
        self["start_on_space"] = start_on_space
-        word_by_word = options.get("word_by_word", False)
+        char_by_char = options.get("char_by_char", False)
-        if isinstance(word_by_word, bool) is False:
+        if isinstance(char_by_char, bool) is False:
-            raise UnitexException("[FST2TXT] Wrong value for the 'word_by_word' option. Boolean required.")
+            raise UnitexException("[FST2TXT] Wrong value for the 'char_by_char' option. Boolean required.")
-        self["word_by_word"] = word_by_word
+        self["char_by_char"] = char_by_char
        merge = options.get("merge", True)
        if isinstance(merge, bool) is False:
@@ -306,7 +313,7 @@ class Grf2Fst2Options(Options):
        self["silent_grf_name"] = silent_grf_name
        named_repositories = options.get("named_repositories", None)
-        if isinstance(named_repositories, str) is False:
+        if named_repositories is not None and isinstance(named_repositories, str) is False:
            raise UnitexException("[GRF2FST2] Wrong value for the 'named_repositories' option. String required.")
        self["named_repositories"] = named_repositories
@@ -315,7 +322,7 @@ class Grf2Fst2Options(Options):
            raise UnitexException("[GRF2FST2] Wrong value for the 'debug' option. Boolean required.")
        self["debug"] = debug
-        check_variables = options.get("check_variables", False)
+        check_variables = options.get("check_variables", True)
        if isinstance(check_variables, bool) is False:
            raise UnitexException("[GRF2FST2] Wrong value for the 'check_variables' option. Boolean required.")
        self["check_variables"] = check_variables
@@ -368,8 +375,8 @@ class LocateOptions(Options):
                raise UnitexException("[LOCATE] Directory '%s' doesn't exist." % sntdir)
        self["sntdir"] = sntdir
-        negation_operator = options.get("negation_operator", None)
+        negation_operator = options.get("negation_operator", UnitexConstants.NEGATION_OPERATOR)
-        if negation_operator is not None and negation_operator not in (UnitexConstants.NEGATION_OPERATOR, UnitexConstants.NEGATION_OPERATOR_OLD):
+        if negation_operator not in (UnitexConstants.NEGATION_OPERATOR, UnitexConstants.NEGATION_OPERATOR_OLD):
            raise UnitexException("[LOCATE] Wrong value for the 'negation_operator' option. UnitexConstants.NEGATION_OPERATOR(_OLD) required.")
        self["negation_operator"] = negation_operator
@@ -469,7 +476,7 @@ class NormalizeOptions(Options):
            raise UnitexException("[NORMALIZE] Wrong value for the 'no_separator_normalization' option. Boolean required.")
        self["no_separator_normalization"] = no_separator_normalization
-        replacement_rules = options.get("replacement_rules", False)
+        replacement_rules = options.get("replacement_rules", None)
        if replacement_rules is not None:
            if isinstance(replacement_rules, str) is False:
                raise UnitexException("[NORMALIZE] Wrong value for the 'replacement_rules' option. String required.")
@@ -536,7 +543,7 @@ class TokenizeOptions(Options):
            if isinstance(tokens, str) is False:
                raise UnitexException("[TOKENIZE] Wrong value for the 'tokens' option. String required.")
            if exists(tokens) is False:
-                raise UnitexException("[TOKENIZE] Offsets file '%s' doesn't exist." % tokens)
+                raise UnitexException("[TOKENIZE] Tokens file '%s' doesn't exist." % tokens)
        self["tokens"] = tokens
        input_offsets = options.get("input_offsets", None)
@@ -664,6 +671,8 @@ class UnitexConfig(Options):
        super(UnitexConfig, self).__init__()
    def load(self, settings):
+        options = settings.get("global", {})
        verbose = options.get("verbose", VERBOSE)
        if verbose not in (0, 1, 2):
            raise UnitexException("Wrong value for the 'verbose' global option.")
@@ -689,25 +698,19 @@ class UnitexConfig(Options):
            raise UnitexException("Wrong value for the 'virtualization' global option.")
        self["virtualization"] = bool(virtualization)
-        resources = ResourcesOptions()
+        self["resources"] = ResourcesOptions(settings.get("resources", {}))
-        self["resources"] = resources.load(settings.get("resources", {}))
        options = settings.get("options", {})
-        normalize = NormalizeOptions()
+        self["check_dic"] = CheckDicOptions(options.get("normalize", {}))
-        self["normalize"] = normalize.load(options.get("normalize", {}))
+        self["compress"] = CheckDicOptions(options.get("normalize", {}))
+        self["concord"] = ConcordOptions(options.get("concord", {}))
-        tokenize = TokenizeOptions()
+        self["dico"] = DicoOptions(options.get("dico", {}))
-        self["tokenize"] = current.load(options.get("tokenize", {}))
+        self["extract"] = ExtractOptions(options.get("extract", {}))
+        self["fst2txt"] = Fst2TxtOptions(options.get("extract", {}))
-        dico = DicoOptions()
+        self["Grf2Fst2"] = Grf2Fst2Options(options.get("extract", {}))
-        self["dico"] = current.load(options.get("dico", {}))
+        self["locate"] = LocateOptions(options.get("locate", {}))
+        self["normalize"] = NormalizeOptions(options.get("normalize", {}))
-        locate = LocateOptions()
+        self["sort_txt"] = SortTxtOptions(options.get("normalize", {}))
-        self["locate"] = current.load(options.get("locate", {}))
+        self["tokenize"] = TokenizeOptions(options.get("tokenize", {}))
+        self["txt2tfst"] = Txt2TFstOptions(options.get("tokenize", {}))
-        concord = ConcordOptions()
-        self["concord"] = current.load(options.get("concord", {}))
-        extract = ExtractOptions()
-        self["extract"] = current.load(options.get("extract", {}))
--- a/unitex/io.py
+++ b/unitex/io.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+import logging
 import os
 from _unitex import *
-from unitex import UnitexException, UnitexConstants, LOGGER
+from unitex import UnitexException, UnitexConstants
+LOGGER = logging.getLogger(__name__)
@@ -168,6 +171,14 @@ def ls(path):
    return unitex_ls(path)
 def exists(path):
+    """This function verify if a file exists (on disk or virtual filesystem).
+    Argument:
+        path [str] -- directory path
+    Return [bool]:
+        The function returns 'True' if it succeeds and 'False' otherwise.
+    """
    if path.startswith(UnitexConstants.VFS_PREFIX) is False:
        return os.path.exists(path)
    return path in ls(path)

--- a/unitex/processor.py
+++ b/unitex/processor.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+import logging
 from unitex import *
 from unitex.resources import *
 from unitex.tools import *
+LOGGER = logging.getLogger(__name__)
 class UnitexProcessor(object):

--- a/unitex/resources.py
+++ b/unitex/resources.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
+import logging
 from _unitex import *
-from unitex import LOGGER
+LOGGER = logging.getLogger(__name__)

--- a/unitex/tools.py
+++ b/unitex/tools.py