diff --git a/setup.py b/setup.py
index c40ce57d0a000203ba3bf2dca86825f0e530a036..20761be75cbaae3ed2c66bdc24b51d531897dc64 100644
--- a/setup.py
+++ b/setup.py
@@ -107,8 +107,11 @@ setup(name = "unitex",
       license = "GPLv3",
       install_requires = [],
       
-      package_dir = {"unitex":"unitex"},
-      packages = ["unitex"],
+      package_dir = {"unitex": "unitex",
+					 "unitex.utils": "unitex/utils"},
+
+      packages = ["unitex",
+				  "unitex.utils"],
       
       data_files = [],
       
diff --git a/tests/05_test_utils.py b/tests/05_test_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..682ee92970daa4c82b386cfc8751407b587e8f0e
--- /dev/null
+++ b/tests/05_test_utils.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+import shutil
+import unittest
+
+from unitex.utils.fsa import Automaton
+
+
+
+class Arguments:
+
+    def __init__(self, language=None):
+        self.__arguments = {}
+
+        self.__arguments["raw"] = "data/grf-raw.dot"
+        self.__arguments["determinized"] = "data/grf-determinized.dot"
+        self.__arguments["minimized"] = "data/grf-minimized.dot"
+
+        self.__arguments["automaton"] = None
+
+    def __getitem__(self, key):
+        if key not in self.__arguments:
+            raise KeyError("Argument '%s' not found ..." % key)
+        return self.__arguments[key]
+
+    def __setitem__(self, key, value):
+        self.__arguments[key] = value
+
+
+
+class TestUnitexUtils(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(self):
+        self._arguments = Arguments()
+
+    @classmethod
+    def tearDownClass(self):
+        if os.path.exists(self._arguments["raw"]):
+            os.remove(self._arguments["raw"])
+
+        if os.path.exists(self._arguments["determinized"]):
+            os.remove(self._arguments["determinized"])
+
+        if os.path.exists(self._arguments["minimized"]):
+            os.remove(self._arguments["minimized"])
+
+    def test_01_automaton_build(self):
+        self._arguments["automaton"] = Automaton("MWU Test")
+
+        path1 = "président français de la république"
+        path2 = "président de la république"
+        path3 = "ministre islandais de la défense"
+        path4 = "ministre islandais à la défense"
+        path5 = "secrétaire d'état à la défense"
+        path6 = "secrétaire d'état"
+        path7 = "secrétaire"
+        path8 = "adjoint au secrétaire d'état"
+        path9 = "adjoint au secrétaire d'état à la défense"
+
+        self._arguments["automaton"].add_path(path1.split())
+        self._arguments["automaton"].add_path(path2.split())
+        self._arguments["automaton"].add_path(path3.split())
+        self._arguments["automaton"].add_path(path4.split())
+        self._arguments["automaton"].add_path(path5.split())
+        self._arguments["automaton"].add_path(path6.split())
+        self._arguments["automaton"].add_path(path7.split())
+        self._arguments["automaton"].add_path(path8.split())
+        self._arguments["automaton"].add_path(path9.split())
+
+        self._arguments["automaton"].todot(self._arguments["raw"])
+        self.assertTrue(os.path.exists(self._arguments["raw"]), "Automaton building failed!")
+
+    def test_02_automaton_determinize(self):
+        self._arguments["automaton"].determinize()
+        self._arguments["automaton"].todot(self._arguments["determinized"])
+
+        self.assertTrue(os.path.exists(self._arguments["determinized"]), "Automaton determinization failed!")
+
+
+    def test_03_automaton_minimize(self):
+        self._arguments["automaton"].minimize()
+        self._arguments["automaton"].todot(self._arguments["minimized"])
+
+        self.assertTrue(os.path.exists(self._arguments["minimized"]), "Automaton minimization failed!")
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/06_test_formats.py b/tests/06_test_formats.py
new file mode 100644
index 0000000000000000000000000000000000000000..f9fd0512f0988ab523afcc3d394508451199ecd6
--- /dev/null
+++ b/tests/06_test_formats.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+import shutil
+import unittest
+
+from unitex.utils.formats import *
+
+
+
+class Arguments:
+
+    def __init__(self, language=None):
+        self.__arguments = {}
+
+        self.__arguments["bin-v1"] = "data/dictionary-v1.bin"
+        self.__arguments["inf-v1"] = "data/dictionary-v1.inf"
+        self.__arguments["enc-v1"] = "utf-16-le"
+
+        self.__arguments["grf"] = "data/automaton.grf"
+
+    def __getitem__(self, key):
+        if key not in self.__arguments:
+            raise KeyError("Argument '%s' not found ..." % key)
+        return self.__arguments[key]
+
+
+
+class TestUnitexUtils(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(self):
+        self._arguments = Arguments()
+
+    @classmethod
+    def tearDownClass(self):
+        if os.path.exists(self._arguments["grf"]):
+            os.remove(self._arguments["grf"])
+
+    def test_01_grf_build(self):
+        grf = GRF("GRF")
+
+        path1 = "président français de la république"
+        path2 = "président de la république"
+        path3 = "ministre islandais de la défense"
+        path4 = "ministre islandais à la défense"
+        path5 = "secrétaire d'état à la défense"
+        path6 = "secrétaire d'état"
+        path7 = "secrétaire"
+        path8 = "adjoint au secrétaire d'état"
+        path9 = "adjoint au secrétaire d'état à la défense"
+
+        grf.add_path(path1.split())
+        grf.add_path(path2.split())
+        grf.add_path(path3.split())
+        grf.add_path(path4.split())
+        grf.add_path(path5.split())
+        grf.add_path(path6.split())
+        grf.add_path(path7.split())
+        grf.add_path(path8.split())
+        grf.add_path(path9.split())
+
+        grf.save(self._arguments["grf"])
+        self.assertTrue(os.path.exists(self._arguments["grf"]), "GRF creation failed!")
+
+    def test_02_old_dictionary(self):
+        dictionary = OldCompiledDictionary()
+        dictionary.load(self._arguments["bin-v1"],\
+                        self._arguments["inf-v1"],\
+                        self._arguments["enc-v1"])
+
+        ret = True if dictionary.find("Sébastien") else False
+
+        self.assertTrue(ret, "Dictionary lookup failed!")
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/data/dictionary-v1.bin b/tests/data/dictionary-v1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1fdbe060565129d4b7f737e3598c05ac24caa528
Binary files /dev/null and b/tests/data/dictionary-v1.bin differ
diff --git a/tests/data/dictionary-v1.inf b/tests/data/dictionary-v1.inf
new file mode 100644
index 0000000000000000000000000000000000000000..1a4d8e80f6c0d1c4db237d8846e236f0dbadf236
Binary files /dev/null and b/tests/data/dictionary-v1.inf differ
diff --git a/tests/data/text.tfst b/tests/data/text.tfst
new file mode 100644
index 0000000000000000000000000000000000000000..4fb9e17e4d7cdbb5f22acc50d0ed790c5e9d5c11
--- /dev/null
+++ b/tests/data/text.tfst
@@ -0,0 +1,273 @@
+0000000002
+$1
+Dans son jardin de Norvège, Sébastien joue du biniou près de son verger. 
+0/4 1/1 2/3 1/1 3/6 1/1 4/2 1/1 5/7 6/1 1/1 7/9 1/1 8/4 1/1 9/2 1/1 10/6 1/1 11/4 1/1 4/2 1/1 2/3 1/1 12/6 13/1 1/1
+0_0
+: 1 1
+: 2 2 3 2 4 2
+: 5 5 6 3
+: 7 4 8 5 9 5
+: 11 5 12 5 13 5
+: 10 6
+: 14 7
+: 15 8
+: 16 9 17 9
+: 18 10 19 11
+: 21 11
+: 20 12
+: 22 15 23 13 24 13
+: 25 14 26 15 27 15
+: 31 15 32 15 33 15
+: 28 16 29 16 30 16
+: 34 17 35 17
+: 36 18
+t
+f
+@<E>
+.
+@STD
+@{Dans,dans.PREP+Dnom+z1}
+@0.0.0-0.3.0
+.
+@STD
+@{son,son.N+Conc+[Veg]+z1:ms}
+@2.0.0-2.2.0
+.
+@STD
+@{son,son.N+[Bruit]+z1:ms}
+@2.0.0-2.2.0
+.
+@STD
+@{son,son.DET+Dposs3s+z1:ms:fs}
+@2.0.0-2.2.0
+.
+@STD
+@{jardin de,jardin de.NDET+Dnom7}
+@4.0.0-6.1.0
+.
+@STD
+@{jardin,jardin.N+z1:ms}
+@4.0.0-4.5.0
+.
+@STD
+@{de,.PREP+z1}
+@6.0.0-6.1.0
+.
+@STD
+@{de,de.PREP+z1}
+@6.0.0-6.1.0
+.
+@STD
+@{de,de.DET+Dind+z1:ms:fs:mp:fp}
+@6.0.0-6.1.0
+.
+@STD
+@Norvège
+@8.0.0-8.6.0
+.
+@STD
+@{du,.DET+Dind+z1:ms}
+@6.0.0-6.1.0
+.
+@STD
+@{des,un.DET+Dind+z1:mp:fp}
+@6.0.0-6.1.0
+.
+@STD
+@{de la,du.DET+Dind+z1:fs}
+@6.0.0-6.1.0
+.
+@STD
+@,
+@9.0.0-9.0.0
+.
+@STD
+@Sébastien
+@11.0.0-11.8.0
+.
+@STD
+@{joue,jouer.V+z1:P1s:P3s:S1s:S3s:Y2s}
+@13.0.0-13.3.0
+.
+@STD
+@{joue,joue.N+z1:fs}
+@13.0.0-13.3.0
+.
+@STD
+@{de,.PREP+z1}
+@15.0.0-15.1.0
+.
+@STD
+@{du,du.DET+Dind+z1:ms}
+@15.0.0-15.1.0
+.
+@STD
+@{biniou,biniou.N+z2:ms}
+@17.0.0-17.5.0
+.
+@STD
+@{le,.DET+Ddef+z1:ms}
+@15.0.0-15.1.0
+.
+@STD
+@{près de,près de.PREP+EPCPQ+z1}
+@19.0.0-21.1.0
+.
+@STD
+@{près,près.PREP+Dnom+z1}
+@19.0.0-19.3.0
+.
+@STD
+@{près,près.ADV}
+@19.0.0-19.3.0
+.
+@STD
+@{de,.PREP+z1}
+@21.0.0-21.1.0
+.
+@STD
+@{de,de.PREP+z1}
+@21.0.0-21.1.0
+.
+@STD
+@{de,de.DET+Dind+z1:ms:fs:mp:fp}
+@21.0.0-21.1.0
+.
+@STD
+@{son,son.N+Conc+[Veg]+z1:ms}
+@23.0.0-23.2.0
+.
+@STD
+@{son,son.N+[Bruit]+z1:ms}
+@23.0.0-23.2.0
+.
+@STD
+@{son,son.DET+Dposs3s+z1:ms:fs}
+@23.0.0-23.2.0
+.
+@STD
+@{du,.DET+Dind+z1:ms}
+@21.0.0-21.1.0
+.
+@STD
+@{des,un.DET+Dind+z1:mp:fp}
+@21.0.0-21.1.0
+.
+@STD
+@{de la,du.DET+Dind+z1:fs}
+@21.0.0-21.1.0
+.
+@STD
+@{verger,verger.V+z1:W}
+@25.0.0-25.5.0
+.
+@STD
+@{verger,verger.N+z1:ms}
+@25.0.0-25.5.0
+.
+@STD
+@.
+@26.0.0-26.0.0
+.
+f
+$2
+Il est heureux Monsieur Paumier et ce n'est pas dommage. 
+15/2 1/1 16/3 1/1 17/7 1/1 18/8 1/1 19/7 1/1 20/2 1/1 21/2 1/1 22/1 23/1 16/3 1/1 24/3 1/1 25/7 13/1 1/1
+29_77
+: 1 1
+: 2 2 3 2 4 2
+: 5 3 6 3
+: 7 4
+: 8 5
+: 9 6
+: 10 7 11 7 12 7
+: 13 8
+: 14 9 15 9 16 9
+: 17 10 18 10
+: 19 11
+: 20 12
+t
+f
+@<E>
+.
+@STD
+@{Il,il.PRO+PpvIL+z1:3ms}
+@0.0.0-0.1.0
+.
+@STD
+@{est,être.V+z1:P3s}
+@2.0.0-2.2.0
+.
+@STD
+@{est,est.N+z1:ms}
+@2.0.0-2.2.0
+.
+@STD
+@{est,est.A+z1:ms:fs:mp:fp}
+@2.0.0-2.2.0
+.
+@STD
+@{heureux,heureux.N+z1:ms:mp}
+@4.0.0-4.6.0
+.
+@STD
+@{heureux,heureux.A+z1:ms:mp}
+@4.0.0-4.6.0
+.
+@STD
+@{Monsieur,monsieur.N+z1:ms}
+@6.0.0-6.7.0
+.
+@STD
+@{Paumier,paumier.N:ms}
+@8.0.0-8.6.0
+.
+@STD
+@{et,et.CONJC}
+@10.0.0-10.1.0
+.
+@STD
+@{ce,ce.PRO+PpvIL+z1:3ms:3mp}
+@12.0.0-12.1.0
+.
+@STD
+@{ce,ce.PRO+Pdem+z1:ms}
+@12.0.0-12.1.0
+.
+@STD
+@{ce,ce.DET+Ddem+z1:ms}
+@12.0.0-12.1.0
+.
+@STD
+@{ne,.XI+z1}
+@14.0.0-15.0.0
+.
+@STD
+@{est,être.V+z1:P3s}
+@16.0.0-16.2.0
+.
+@STD
+@{est,est.N+z1:ms}
+@16.0.0-16.2.0
+.
+@STD
+@{est,est.A+z1:ms:fs:mp:fp}
+@16.0.0-16.2.0
+.
+@STD
+@{pas,pas.N+z1:ms:mp}
+@18.0.0-18.2.0
+.
+@STD
+@{pas,pas.ADV+z1}
+@18.0.0-18.2.0
+.
+@STD
+@{dommage,dommage.N+z1:ms}
+@20.0.0-20.6.0
+.
+@STD
+@.
+@21.0.0-21.0.0
+.
+f
diff --git a/tests/data/text.tind b/tests/data/text.tind
new file mode 100644
index 0000000000000000000000000000000000000000..f3f1dca1d4a84042d99df8e84574e2756812fc1f
Binary files /dev/null and b/tests/data/text.tind differ
diff --git a/unitex/processor.py b/unitex/processor.py
index 665d070885d60c878b8c7f9b2c1ddd3c09b8bdaf..50e06bbbbaf63b368e3c199eae808ba0e90ea3a7 100644
--- a/unitex/processor.py
+++ b/unitex/processor.py
@@ -390,6 +390,9 @@ class UnitexProcessor(object):
         self.__snt = None
         self.__dir = None
 
+    def tofst(self):
+        pass
+
     def iter(self, grammar, **kwargs):
         """
         This function iters over the grammar matches.
diff --git a/unitex/utils/__init__.py b/unitex/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..586a5fd94ea213fa09f346a788b78c5db0b9646b
--- /dev/null
+++ b/unitex/utils/__init__.py
@@ -0,0 +1,4 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+
diff --git a/unitex/utils/formats.py b/unitex/utils/formats.py
new file mode 100644
index 0000000000000000000000000000000000000000..e312fe127b3c325d34351b1a72842524feaaa2d5
--- /dev/null
+++ b/unitex/utils/formats.py
@@ -0,0 +1,601 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import array
+import logging
+import re
+import struct
+
+from unitex import UnitexException, UnitexConstants
+from unitex.utils.fsa import FSAConstants, Automaton
+from unitex.utils.types import Tag, Entry
+
+_LOGGER = logging.getLogger(__name__)
+
+
+
+class CompressedEntry(Entry):
+
+    SEPARATORS = (" ", "-")
+    SPLITTER = re.compile("([-\s])")
+
+    def __init__(self):
+        super(CompressedEntry, self).__init__()
+
+    def compute(self, lemma, form):
+        n, i = "", 0
+
+        while i < len(lemma) and lemma[i].isdigit():
+            n = n + lemma[i]
+            i = i + 1
+
+        if i > 0:
+            prefix = form[:len(form)-int(n)]
+        else:
+            prefix = form
+
+        suffix = lemma[i:]
+
+        return "%s%s" % (prefix, suffix)
+
+    def uncompress(self, lemma):
+        form = self.get_form()
+        if not lemma:
+            return form
+
+        # If two words don't have de same number of elements
+        # the compressed lemma is preceded by '_'
+        if lemma[0] == '_':
+            return self.compute(lemma[1:], form)
+
+        wtab = self.SPLITTER.split(form)
+        ltab = self.SPLITTER.split(lemma)
+
+        l = []
+        for i in range(len(ltab)):
+            if not ltab[i]:
+                continue
+            elif ltab[i] in self.SEPARATORS:
+                l.append(ltab[i])
+            else:
+                l.append(self.compute(ltab[i], wtab[i]))
+
+        return "".join(l)
+
+    def load(self, form, data, lemmatize=True):
+        data = data.rstrip()
+
+        self.set_form(form)
+        lemma = ""
+
+        i = 0
+
+        lemma, escaped = "", False
+        try:
+            while True:
+                if data[i] == "." and escaped is False:
+                    break
+                elif data[i] == "\\":
+                    if escaped is True:
+                        lemma += data[i]
+                        escaped = False
+                    else:
+                        lemma += data[i]
+                        escaped = True
+                else:
+                    lemma += data[i]
+                    escaped = False
+                i += 1
+        except IndexError:
+            raise UnitexException("Wrong lemma for entry '%s' ..." % data)
+
+        if lemmatize is True:
+            self.set_lemma(self.uncompress(lemma))
+
+        Tag.load(self, data[i+1:])
+
+
+
+class OldCompiledDictionary:
+
+    INITIAL_STATE_OFFSET=4
+    INF_SEPARATOR=re.compile(r"(?<![\\]),")
+
+    def __init__(self):
+        self.__bin = None
+        self.__inf = None
+
+        self.__buffer = None
+
+    def lookup(self, token, i=None, pos=None):
+        if i is None:
+            i = 0
+
+        if pos is None:
+            pos = self.INITIAL_STATE_OFFSET
+        tnbr = self.__bin[pos] * 256 + self.__bin[pos+1]
+        pos = pos + 2
+
+        _LOGGER.debug("Lookup Start: token[%s|%s] -- pos(%s) -- tnbr(%s)\n" % (token[:i], token[i:], pos, tnbr))
+
+        if i == len(token):
+            data = []
+
+            _LOGGER.debug("   Check Final State: pos(%s) -- tnbr(%s)\n" % (pos, tnbr))
+            if not (tnbr & 32768):
+                _LOGGER.debug("      -> Final\n")
+                index = self.__bin[pos] * 256 * 256 + self.__bin[pos+1] * 256 + self.__bin[pos+2]
+
+                for inf in self.INF_SEPARATOR.split(self.__inf[index]):
+                    E = CompressedEntry()
+                    E.load(token, inf)
+
+                    data.append(E)
+            else:
+                _LOGGER.debug("      -> Not final\n")
+
+            return data, pos-2
+        elif tnbr & 32768:
+            tnbr = tnbr - 32768
+        else:
+            pos = pos + 3
+
+        for j in range(tnbr):
+            char = chr(self.__bin[pos] * 256 + self.__bin[pos+1])
+            _LOGGER.debug("   Matching char[%s] -- pos(%s) -> current[%s]\n" % (token[i], pos, char))
+
+            pos = pos + 2
+
+            offset = self.__bin[pos] * 256 * 256 + self.__bin[pos+1] * 256 + self.__bin[pos+2]
+            pos = pos + 3
+
+            if char == token[i]:
+                _LOGGER.debug("      -> Char found\n")
+                return self.lookup(token, i+1, offset)
+
+            # WEIRD... Objective: handle whitespaces in MWU dictionaries for the match function
+            #               -> ["Conseil", "d'", "administration"] == "Conseil d'administration"
+            elif char == u" " and i == 0:
+                _LOGGER.debug("   -> Char is whitespace [pass]\n")
+                return self.lookup(token, i, offset)
+
+        return None, pos
+
+    def find(self, token):
+        entries, pos = self.lookup(token)
+        return entries
+
+    def match(self, sequence, i=None, mode=None, separator=None):
+        if i is None:
+            i = 0
+
+        if mode is None:
+            mode = UnitexConstants.MATCH_MODE_LONGEST
+        elif mode not in [UnitexConstants.MATCH_MODE_LONGEST,\
+                          UnitexConstants.MATCH_MODE_SHORTEST,\
+                          UnitexConstants.MATCH_MODE_ALL]:
+            raise UnitexException("Wrong match mode: %s ..." % mode)
+
+        matches = []
+
+        buffer, pos, tnbr = [], None, None
+        for j in range(i, len(sequence)):
+            _LOGGER.debug("Match Token: '%s'\n" % sequence[j])
+
+            entries, pos = self.lookup(sequence[j], pos=pos)
+            if entries is None:
+                _LOGGER.debug("   -> No entry found ...\n")
+                break
+            _LOGGER.debug("   -> Entries found: pos[%s] -- tnbr[%s]\n" % (pos, tnbr))
+
+            buffer.append(j)
+
+            if entries:
+                matches.append((entries, buffer[:]))
+                if mode == UnitexConstants.MATCH_MODE_SHORTEST:
+                    return matches
+
+            if separator is not None:
+                _LOGGER.debug("Match Separator: '%s'\n" % separator)
+                entries, pos = self.lookup(separator, pos=pos)
+                if entries is None:
+                    _LOGGER.debug("   -> No separator found ...\n")
+                    break
+                _LOGGER.debug("   -> Separator found\n")
+
+        if not matches:
+            return None
+        elif mode == UnitexConstants.MATCH_MODE_LONGEST:
+            return [matches[-1]]
+        elif mode == UnitexConstants.MATCH_MODE_ALL:
+            return matches
+
+    def dump(self, pos=None):
+        if pos is None:
+            pos = self.INITIAL_STATE_OFFSET
+            self.__buffer = []
+
+        tnbr = self.__bin[pos] * 256 + self.__bin[pos+1]
+        pos = pos + 2
+
+        if not (tnbr & 32768):
+            index = self.__bin[pos] * 256 * 256 + self.__bin[pos+1] * 256 + self.__bin[pos+2]
+
+            form = "".join(self.__buffer)
+
+            for inf in self.INF_SEPARATOR.split(self.__inf[index]):
+                E = CompressedEntry()
+                E.load(form, inf)
+                yield E
+
+            pos = pos + 3
+
+        else:
+            tnbr = tnbr - 32768
+
+        for j in range(tnbr):
+            self.__buffer.append(chr(self.__bin[pos] * 256 + self.__bin[pos+1]))
+            pos = pos + 2
+
+            offset = self.__bin[pos] * 256 * 256 + self.__bin[pos+1] * 256 + self.__bin[pos+2]
+            pos = pos + 3
+
+            for E in self.dump(offset):
+                yield E
+
+        if self.__buffer:
+            self.__buffer.pop()
+
+    def load(self, bin, inf, encoding=None):
+        if encoding is None:
+            encoding = UnitexConstants.DEFAULT_ENCODING
+        INF = open(inf, "r", encoding=encoding)
+
+        self.__inf = INF.readlines()
+        self.__inf.pop(0) # Remove number information
+
+        INF.close()
+
+        BIN = open(bin, "r+b")
+
+        a = struct.unpack('B', BIN.read(1))[0]
+        b = struct.unpack('B', BIN.read(1))[0]
+        c = struct.unpack('B', BIN.read(1))[0]
+        d = struct.unpack('B', BIN.read(1))[0]
+        size = d + (256*c) + (256*256*b) + (256*256*256*a)
+
+        BIN.close()
+
+        BIN = open(bin, "rb")
+
+        self.__bin = array.array('B')
+
+        byte = BIN.read(1)
+        while byte:
+            tmp = struct.unpack('B', byte)[0]
+
+            self.__bin.append(tmp)
+
+            byte = BIN.read(1)
+
+        BIN.close()
+
+
+
+class GRF(Automaton):
+
+    def __init__(self, name="GRF"):
+        super(GRF, self).__init__(name)
+
+    def load(self, file, encoding=None):
+        if encoding is None:
+            encoding = UnitexConstants.DEFAULT_ENCODING
+        raise NotImplementedError
+
+    def save(self, file, encoding=None):
+        if encoding is None:
+            encoding = UnitexConstants.DEFAULT_ENCODING
+
+        X = 1000
+        Y = 1000
+        GAP = 20
+
+        transitions = []
+        transitions.append({"label": FSAConstants.EPSILON, "targets": set([])})
+        transitions.append({"label": "", "targets": set([])})
+
+        nmap = {}
+        root = []
+
+        for edge, sid, tid in self.iter("dfs"):
+            source = self[sid]
+            target = self[tid]
+
+            index = 0
+
+            key = (str(edge), tid)
+            if key in nmap:
+                index = nmap[key]
+            else:
+                index = len(transitions)
+                nmap[key] = index
+                transitions.append({"label": str(edge), "targets": set([])})
+
+            if sid == self.get_initial():
+                transitions[0]["targets"].add(str(index))
+            if target.is_final() is True:
+                transitions[index]["targets"].add("1")
+
+            for _edge in target:
+                for _target in target[_edge]:
+                    _index = 0
+
+                    _key = (str(_edge), _target.get_id())
+                    if _key in nmap:
+                        _index = nmap[_key]
+                    else:
+                        _index = len(transitions)
+                        nmap[_key] = _index
+                        transitions.append({"label": str(_edge), "targets": set([])})
+
+                    transitions[index]["targets"].add(str(_index))
+
+        with open(file, "w", encoding=encoding) as output:
+            output.write("#Unigraph\r\n")
+            output.write("SIZE %s %s\r\n" % (X+GAP, Y+GAP))
+            output.write("FONT Times New Roman:B 10\r\n")
+            output.write("OFONT Monospaced:B 8\r\n")
+            output.write("BCOLOR 16777215\r\n")
+            output.write("FCOLOR 0\r\n")
+            output.write("ACOLOR 13487565\r\n")
+            output.write("SCOLOR 16711680\r\n")
+            output.write("CCOLOR 255\r\n")
+            output.write("DBOXES y\r\n")
+            output.write("DFRAME y\r\n")
+            output.write("DDATE y\r\n")
+            output.write("DFILE y\r\n")
+            output.write("DDIR n\r\n")
+            output.write("DRIG n\r\n")
+            output.write("DRST n\r\n")
+            output.write("FITS 100\r\n")
+            output.write("PORIENT L\r\n")
+            output.write("#\r\n")
+            output.write("%s\r\n" % len(transitions))
+
+            for transition in transitions:
+                label = transition["label"]
+                size = len(transition["targets"])
+                targets = " ".join(list(transition["targets"]))
+
+                if size == 0:
+                    output.write('"%s" %s %s %s \r\n' % (label, GAP, GAP, size))
+                else:
+                    output.write('"%s" %s %s %s %s \r\n' % (label, GAP, GAP, size, targets))
+
+
+
+class SentenceFST(Automaton):
+
+    def __init__(self, name="SentenceFST"):
+        super(SentenceFST, self).__init__(name)
+
+        self.__sentence = None
+
+        self.__tokens = None
+        self.__labels = None
+
+    def get_sentence(self):
+        return self.__sentence
+
+    def get_tokens(self):
+        return self.__tokens
+
+    def get_token(self, i):
+        return self.__tokens[i]
+
+    def get_label(self, i):
+        return self.__labels[i]
+
+    def load(self, sentence, tokens, states, labels):
+        self.__sentence = sentence
+
+        self.__tokens = []
+        self.__labels = {}
+
+        start = 0
+        for index, length in tokens:
+            end = start + length
+
+            self.__tokens.append(self.__sentence[start:end])
+            start = end
+
+        transitions = []
+
+        for i in range(len(states)):
+            initial = False
+            if i == 0:
+                initial = True
+
+            final = False
+            if states[i] == "t":
+                final = True
+
+            sid = self.add_node(initial=initial, final=final)
+            if final is True:
+                break
+
+            for lid, tid in states[i]:
+                entry = labels[lid][0]
+
+                p1 = labels[lid][1][0][0]
+                p2 = labels[lid][1][1][0]
+
+                if not self.__labels.has_key(p1):
+                    self.__labels[p1] = []
+                self.__labels[p1].append((entry, p2))
+
+                transitions.append((sid, lid, tid))
+
+        for sid, lid, tid in transitions:
+            self.add_edge(lid, sid, tid)
+
+
+
+class TextFST:
+
+    def __init__(self):
+        self.__file = None
+        self.__size = 0
+
+    def __len__(self):
+        return self.__size
+
+    def next(self):
+        line = self.__file.readline()
+
+        while line:
+            line = line.rstrip()
+
+            if line[0] != "$":
+                raise UnitexException("File '%s' is corrupted ..." % self.__file.name)
+
+            # The sentence number (format '$n')
+            number = int(line[1:])
+
+            line = self.__file.readline()
+            line = line.rstrip()
+
+            # The text of the sentence
+            text = line
+
+            line = self.__file.readline()
+            line = line.rstrip()
+
+            # The tokens of the text
+            #   -> [(x1, y), (x2, y2), ..., (xi, yi)]
+            #      where,
+            #        - x: token index in file 'tokens.txt'
+            #        - y: length of the token (in characters)
+            tokens = [tuple(int(t) for t in token.split("/")) for token in line.split(" ")]
+
+            line = self.__file.readline()
+            line = line.rstrip()
+
+            # The offset of the sentence (from the begining of the text)
+            #   -> X_Y
+            #      where,
+            #        - X: the offset in tokens
+            #        - Y: the offset in characters
+            offset = tuple(int(o) for o in line.split("_"))
+
+            line = self.__file.readline()
+            line = line.rstrip()
+
+            states = []
+            while line != "t":
+                if line[0] != ":":
+                    raise UnitexException("File '%s' is corrupted ..." % self.__file.name)
+
+                line = line[1:].strip()
+                line = line.split()
+
+                state = []
+                for i in range(0, len(line), 2):
+                    state.append((int(line[i]), int(line[i+1])))
+                states.append(state)
+
+                line = self.__file.readline()
+                line = line.rstrip()
+
+                if not line:
+                    raise UnitexException("File '%s' is corrupted ..." % self.__file.name)
+
+            states.append(line)
+
+            line = self.__file.readline()
+            line = line.rstrip()
+
+            if line[0] != "f":
+                raise UnitexException("File '%s' is corrupted ..." % self.__file.name)
+
+            line = self.__file.readline()
+            line = line.rstrip()
+
+            tags = []
+            while line != "f":
+                if line == "@<E>":
+                    tags.append(("<E>", None))
+
+                elif line == "@STD":
+                    line = self.__file.readline()
+                    line = line.rstrip()
+
+                    content = line[1:]
+
+                    entry = Entry()
+
+                    if ENTRY.match(content):
+                        entry.load(content)
+                    else:
+                        entry.set_form(content)
+
+                    line = self.__file.readline()
+                    line = line.rstrip()
+
+                    if line[0] != "@":
+                        raise UnitexException("File '%s' is corrupted ..." % self.__file.name)
+
+                    position = [tuple(int(i) for i in p.split(".")) for p in line[1:].split("-")]
+
+                    tags.append((entry, position))
+
+                else:
+                    raise UnitexException("File '%s' is corrupted ..." % self.__file.name)
+
+                line = self.__file.readline()
+                line = line.rstrip()
+
+                if line[0] != ".":
+                    raise UnitexException("File '%s' is corrupted ..." % self.__file.name)
+
+                line = self.__file.readline()
+                line = line.rstrip()
+
+            _LOGGER.debug("SENTENCE[%s]\n" % number)
+            _LOGGER.debug(" - offset: %s\n" % offset)
+            _LOGGER.debug(" - text: %s\n" % text)
+            _LOGGER.debug(" - tokens: %s\n" % tokens)
+            _LOGGER.debug(" - states:\n")
+            for state in states:
+                _LOGGER.debug("   - s: %s\n" % state)
+            _LOGGER.debug(" - tags:\n")
+            for tag in tags:
+                _LOGGER.debug("   - t: %s\n" % tag)
+
+            S = SentenceFST("SENTENCE[%d]" % number)
+            S.load(text, tokens, states, tags)
+
+            return S
+
+    def __iter__(self):
+        sentence = self.next()
+        while sentence:
+            yield sentence
+
+            sentence = self.next()
+
+    def open(self, file, encoding=None):
+        if encoding is None:
+            encoding = UnitexConstants.DEFAULT_ENCODING
+
+        self.__file = open(file, "r", encoding=encoding)
+
+        line = self.__file.readline()
+        line = line.rstrip()
+
+        # The number of sentence in the text fst (format: '000000000N')
+        self.__size = int(line)
+
+    def close(self):
+        self.__file.close()
+        self.__size = 0
diff --git a/unitex/utils/fsa.py b/unitex/utils/fsa.py
new file mode 100644
index 0000000000000000000000000000000000000000..de6250e4150ab03b4b21f9833d151c43b663392d
--- /dev/null
+++ b/unitex/utils/fsa.py
@@ -0,0 +1,574 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from io import open
+
+from unitex import *
+
+_LOGGER = logging.getLogger(__name__)
+
+
+
+class FSAConstants:
+
+    EPSILON = "<E>"
+
+    DEPTH_FIRST_SEARCH = "dfs"
+    BREADTH_FIRST_SEARCH = "bfs"
+
+
+
+class Edge:
+
+    def __init__(self, label, targets=None, source=None):
+        self.__label = label
+
+        self.__source = source
+
+        self.__targets = targets
+        if self.__targets is not None:
+            self.__tids = set([target.get_id() for target in targets])
+
+    def __len__(self):
+        return len(self.__targets)
+
+    def __str__(self):
+        return self.get_label()
+
+    def __hash__(self):
+        return hash(self.get_label())
+
+    def __cmp__(self, e):
+        return cmp(self.get_label(), self.get_label())
+
+    def __iter__(self):
+        for target in self.__targets:
+            yield target
+
+    def __contains__(self, target):
+        return True if target.get_id() in self.__tids else False
+
+    def __getitem__(self, i):
+        return self.__targets[i]
+
+    def get_label(self):
+        return self.__label
+
+    def get_source(self):
+        return self.__source
+
+    def set_source(self, source):
+        self.__source = source
+
+    def get_targets(self):
+        return self.__targets
+
+    def set_targets(self, targets):
+        self.__targets = targets
+        self.__tids = set([target.get_id() for target in targets])
+
+    def add_target(self, target):
+        if target.get_id() in self.__tids:
+            return
+        self.__targets.append(target)
+
+    def del_target(self, target):
+        if target.get_id() not in self.__tids:
+            return
+
+        self.__tids.remove(target.get_id())
+
+        for i in range(len(self.__targets)):
+            _target = self.__targets[i]
+            if _target.get_id() == target.get_id():
+                del self.__targets[i]
+                break
+
+
+
+class Node:
+
+    def __init__(self, _id, final=False):
+        self.__id = _id
+
+        self.__final = final
+        self.__edges = {}
+
+        self.__depth = 0
+
+        self.__visited = False
+
+    def __len__(self):
+        return len(self.__edges)
+
+    def __contains__(self, label):
+        return label in self.__edges
+
+    def __getitem__(self, label):
+        return self.__edges.get(label, None)
+
+    def __iter__(self):
+        for label in self.__edges:
+            yield label
+
+    def __str__(self):
+        s = "NODE[%s]" % str(self.get_id())
+
+        if self.is_final():
+            s += " -- FINAL"
+
+        for label in self:
+            targets = " | ".join([str(target.get_id()) for target in self[label]])
+            s += "\n\t%s -> (%s)" % (label, targets)
+
+        return s
+
+    def get_id(self):
+        return self.__id
+
+    def set_id(self, i):
+        self.__id = i
+
+    def is_deterministic(self):
+        if FSAConstants.EPSILON in self.__edges:
+            return False
+
+        for label in self.__edges:
+            if len(self[label]) > 1:
+                return False
+
+        return True
+
+    def exists(self, label, node=None):
+        if not label in self:
+            return False
+
+        if node is not None and node not in self[label]:
+            return False
+
+        return True
+
+    def add(self, label, target):
+        if self.exists(label, target) is True:
+            return
+
+        if self.exists(label) is False:
+            edge = Edge(label, [target], self)
+            self.__edges[label] = edge
+        else:
+            self[label].add_target(target)
+
+    def delete(self, label, node=None):
+        if not self.exists(label, node):
+            raise UnitexException("Edge not found: %s" % label)
+
+        if node is None:
+            del self.__edges[label]
+        else:
+            self[label].del_target(node)
+
+    def set_depth(self, depth):
+        self.__depth = depth
+
+    def get_depth(self):
+        return self.__depth
+
+    def is_visited(self):
+        return self.__visited
+
+    def set_visited(self, visited=True):
+        self.__visited = visited
+
+    def is_final(self):
+        return self.__final
+
+    def set_final(self, final=True):
+        self.__final = final
+
+
+
+class NodeSets:
+
+    def __init__ (self):
+        self.__sets = {}
+
+    def __getitem__(self, _id):
+        return self.__sets[_id]
+
+    def __contains__(self, s):
+        return s in self.all()
+
+    def __iter__ (self):
+        return iter(self.all())
+
+    def all(self):
+        return set([tuple(l) for l in self.__sets.values()])
+
+    def add(self, s):
+        _set = tuple(sorted(set(s)))
+        for _id in s:
+            self.__sets[_id] = _set
+
+
+
+class Automaton:
+
+    def __init__(self, name="Automaton"):
+        self.__name = name
+
+        self.__nodes = []
+
+        self.__initial = 0
+        self.__finals = []
+
+        self.__nodes.append(Node(self.__initial, False))
+
+    def __len__(self):
+        return len(self.__nodes)
+
+    def __getitem__(self, _id):
+        try:
+            return self.__nodes[_id]
+        except IndexError:
+            return None
+
+    def __iter__(self):
+        for node in self.__nodes:
+            yield node
+
+    def __str__(self):
+        title = "# FSA -- %s #" % self.get_name()
+
+        s = "%s\n%s\n%s\n\n" % ("#" * len(title), title, "#" * len(title))
+
+        for node in self:
+            s += "%s\n\n" % node
+
+        return s
+
+    def get_name(self):
+        return self.__name
+
+    def set_name(self, name):
+        self.__name = name
+
+    def get_depth(self):
+        depth = 0
+        for nid in self.__finals:
+            final = self.__nodes[nid]
+
+            if final.get_depth() > depth:
+                depth = final.get_depth()
+
+        return depth
+
+    def get_initial(self):
+        return self.__initial
+
+    def set_initial(self, initial):
+        self.__initial = initial
+
+    def get_finals(self):
+        return self.__finals
+
+    def set_finals(self, finals):
+        self.__finals = finals
+
+    def get_nodes(self):
+        return self.__nodes
+
+    def set_nodes(self, nodes):
+        self.__nodes = nodes
+
+    def add_edge(self, label, sid, tid):
+        source = self[sid]
+        target = self[tid]
+
+        target.set_depth(source.get_depth() + 1)
+
+        source.add(label, target)
+
+    def add_node(self, initial=False, final=False):
+        if initial is True:
+            return self.__initial
+        elif final is True:
+            self.__finals.append(len(self.__nodes))
+            self.__nodes.append(Node(self.__finals[-1], True))
+            return self.__finals[-1]
+
+        nid = len(self.__nodes)
+
+        self.__nodes.append(Node(nid, final))
+
+        return nid
+
+    def add_path(self, path):
+        if len(path) == 0:
+            raise UnitexException("Empty path!")
+        sid = self.add_node(initial=True, final=False)
+
+        for label in path[:-1]:
+            tid = self.add_node(initial=False, final=False)
+            self.add_edge(label, sid, tid)
+
+            sid = tid
+        else:
+            self.add_edge(path[-1], sid, self.add_node(initial=False, final=True))
+
+    def get_alphabet(self):
+        alphabet = set()
+
+        for node in self:
+            for label in node:
+                alphabet.add(label)
+
+        return tuple(alphabet)
+
+    def is_deterministic(self):
+        for node in self:
+            if not node.is_deterministic():
+                return False
+        return True
+
+    def __closure(self, nid):
+        stack = [nid]
+        result = set(stack)
+
+        while len(stack) > 0:
+            current = stack.pop()
+
+            if FSAConstants.EPSILON in self[current]:
+                edge = self[current][FSAConstants.EPSILON]
+                if edge not in result:
+                    stack.append(edge)
+                    result.add(edge)
+
+        return tuple(result)
+
+    def determinize(self):
+        dfa = Automaton("DETERMINIZED(%s)" % self.get_name())
+
+        alphabet = self.get_alphabet()
+
+        initials = self.__closure(self.get_initial())
+
+        hid = dfa.add_node(initial=True, final=False)
+
+        visited = {}
+        visited[initials] = hid
+
+        stack = [initials]
+        while len(stack) > 0:
+            current = stack.pop()
+
+            for label in alphabet:
+                new = set()
+                for node in current:
+                    if not label in self[node]:
+                        continue
+                    for next in self[node][label]:
+                        new.update(self.__closure(next.get_id()))
+                new = tuple(new)
+
+                if len(new) == 0:
+                    continue
+
+                if new not in visited:
+                    stack.append(new)
+
+                    final = True in [self[_id].is_final() for _id in new]
+                    nid = dfa.add_node(final=final)
+
+                    visited[new] = nid
+
+                dfa.add_edge(label, visited[current], visited[new])
+
+        self.set_name(dfa.get_name())
+
+        self.set_initial(dfa.get_initial())
+        self.set_finals(dfa.get_finals())
+
+        self.set_nodes(dfa.get_nodes())
+
+    def minimize(self):
+        min = Automaton("MINIMIZED(%s)" % self.get_name())
+
+        alphabet = self.get_alphabet()
+
+        nodetoset = {}
+        settonode = {}
+
+        sets = NodeSets()
+
+        rest, final = [], []
+        for node in self:
+            if node.is_final():
+                final.append(node.get_id())
+            else:
+                rest.append(node.get_id())
+
+        sets.add(rest)
+        sets.add(final)
+
+        stack = [s for s in sets if len(s) > 1]
+
+        def target_set(_id, label):
+            edge = self[_id][label]
+
+            if edge is None:
+                return None
+            else:
+                return sets[edge[0].get_id()]
+
+        while len(stack) > 0:
+            current = stack.pop()
+
+            for label in alphabet:
+                target = target_set(current[0], label)
+
+                one, two = [current[0]], []
+                for _id in current[1:]:
+                    if target_set(_id, label) == target:
+                        one.append(_id)
+                    else:
+                        two.append(_id)
+
+                if len(two) > 0:
+                    sets.add(one)
+                    sets.add(two)
+
+                    if len(one) > 1:
+                        stack.append(one)
+                    if len(two) > 1:
+                        stack.append(two)
+
+                    break
+
+        for s in sets:
+            initial = self.get_initial() in s
+            final = True in [self[_id].is_final() for _id in s]
+
+            _id = min.add_node(initial=initial, final=final)
+
+            nodetoset[_id] = s
+            settonode[s] = _id
+
+        for node in min:
+            done = set()
+
+            s = nodetoset[node.get_id()]
+
+            source = self[s[0]]
+            for label in source:
+                edge = source[label]
+
+                if label in done:
+                    continue
+                done.add(label)
+
+                for target in edge:
+                    t = sets[target.get_id()]
+                    min.add_edge(label, node.get_id(), settonode[t])
+
+        self.set_name(min.get_name())
+
+        self.set_initial(min.get_initial())
+        self.set_finals(min.get_finals())
+
+        self.set_nodes(min.get_nodes())
+
+    def reset(self):
+        for node in self:
+            node.set_visited(False)
+
+    def __expand(self, source):
+        L = []
+
+        source.set_visited(True)
+        for label in source:
+            edge = source[label]
+            for target in source[label]:
+                L.append((edge.get_label(), source.get_id(), target.get_id()))
+
+        return L
+
+    def iter(self, iter_type=None):
+        if iter_type is None:
+            iter_type = FSAConstants.BREADTH_FIRST_SEARCH
+
+        if len(self[self.get_initial()]) == 0:
+            raise UnitexException("Empty FSA")
+
+        i = None
+        if iter_type == FSAConstants.DEPTH_FIRST_SEARCH:
+            i = -1
+        elif iter_type == FSAConstants.BREADTH_FIRST_SEARCH:
+            i = 0
+        else:
+            raise UnitexException("Unknown iter type: %s" % iter_type)
+
+        root = self[self.get_initial()]
+        if root.is_visited():
+            self.reset()
+
+        L = self.__expand(root)
+        while L:
+            edge, sid, tid = L.pop(i)
+            yield (edge, sid, tid)
+
+            if not self[tid].is_visited():
+                L += self.__expand(self[tid])
+
+    def todot(self, file, encoding=None):
+        if encoding is None:
+            encoding = UnitexConstants.DEFAULT_ENCODING
+
+        with open(file, "w", encoding=encoding) as output:
+            output.write("digraph Automaton {\n\n")
+            output.write("\tcenter = 1;\n")
+            output.write("\tcharset = \"%s\";\n" % encoding)
+            output.write("\trankdir = LR;\n")
+            output.write("\tranksep = 1;\n")
+            output.write("\tedge [arrowhead = vee];\n\n")
+
+            nodes = set()
+            edges = set()
+
+            for node in self:
+                sid = node.get_id()
+                n1 = "node%s" % sid
+
+                if not sid in nodes:
+                    nodes.add(sid)
+
+                    if node.get_id() == self.get_initial():
+                        output.write("\t%s[shape = circle, label = \"\"];\n" % n1)
+                    elif node.is_final():
+                        output.write("\t%s[shape = doublecircle, label = \"\"];\n" % n1)
+                    else:
+                        output.write("\t%s[shape = point, label = \"\"];\n" % n1)
+
+                for label in node:
+                    for target in node[label]:
+                        if (node.get_id(), label, target.get_id()) in edges:
+                            continue
+                        edges.add((node.get_id(), label, target.get_id()))
+
+                        tid = target.get_id()
+                        n2 = "node%s" % tid
+
+                        if not tid in nodes:
+                            nodes.add(tid)
+
+                            if target.get_id() == self.get_initial():
+                                output.write("\t%s[shape = circle, label = \"\"];\n" % n2)
+                            elif target.is_final():
+                                output.write("\t%s[shape = doublecircle, label = \"\"];\n" % n2)
+                            else:
+                                output.write("\t%s[shape = point, label = \"\"];\n" % n2)
+
+                        output.write("\t%s -> %s [label = \"%s\"];\n" % (n1, n2, label))
+
+                output.write("\n")
+
+            output.write("}\n")
diff --git a/unitex/utils/types.py b/unitex/utils/types.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce33eafd9b03c83097184298220964b92d420858
--- /dev/null
+++ b/unitex/utils/types.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import logging
+import re
+
+from unitex import UnitexException
+
+_LOGGER = logging.getLogger(__name__)
+
+
+
+class Tag(object):
+
+    def __init__(self, tag=None):
+        self.__pos = ""
+
+        self.__features = []
+        self.__flexions = []
+
+        if tag is not None:
+            self.load(tag)
+
+    def __str__(self):
+        return self.get()
+
+    def load(self, tag):
+        self.__pos = ""
+
+        self.__features = []
+        self.__flexions = []
+
+        i = 0
+
+        pos = ""
+        while i < len(tag) and tag[i] != '+' and tag[i] != ':':
+            pos = pos + tag[i]
+            i += 1
+
+        self.set_pos(pos)
+
+        while i < len(tag) and tag[i] == '+':
+            sign = tag[i]
+
+            i += 1
+
+            tmp = ""
+            while i < len(tag) and tag[i] != '+' and tag[i] != ':':
+                tmp = tmp + tag[i]
+                i += 1
+
+            if tmp:
+                self.add_feature(tmp)
+
+        while i < len(tag) and tag[i] == ':':
+            sign = tag[i]
+
+            i += 1
+
+            tmp = ""
+            while i < len(tag) and tag[i] != ':':
+                tmp = tmp + tag[i]
+                i += 1
+
+            if tmp:
+                self.add_flexion(tmp)
+
+    def get(self):
+        tag = self.get_pos()
+
+        features = "+".join(self.get_features())
+        if features:
+            tag += "+%s" % features
+
+        flexions = "".join(self.get_flexions())
+        if flexions:
+            tag += ":%s" % flexions
+
+        return tag
+
+    def set_pos(self, pos):
+        self.__pos = pos
+
+    def get_pos(self):
+        return self.__pos
+
+    def set_features(self, features):
+        self.__features = features
+
+    def get_features(self):
+        return self.__features
+
+    def add_feature(self, feature):
+        self.__features.append(feature)
+
+    def set_flexions(self, flexions):
+        self.__flexions = flexions
+
+    def get_flexions(self):
+        return self.__flexions
+
+    def add_flexion(self, flexion):
+        self.__flexions.append(flexion)
+
+
+
+class Entry(Tag):
+
+    def __init__(self, entry=None):
+        super(Tag, self).__init__()
+
+        self.__form = ""
+        self.__lemma = ""
+
+        if entry is not None:
+            self.load(entry)
+
+    def __str__(self):
+        return self.get()
+
+    def load(self, entry):
+        i = 0
+
+        escaped = False
+
+        form = ""
+        try:
+            while True:
+
+                if entry[i] == "," and escaped is False:
+                    i += 1
+                    break
+
+                elif entry[i] == "\\":
+                    if escaped is True:
+                        form += entry[i]
+                        escaped = False
+                    else:
+                        escaped = True
+
+                else:
+                    form += entry[i]
+                    escaped = False
+
+                i += 1
+        except IndexError:
+            raise UnitexException("Invalid entry format '%s'. No comma found." % entry)
+
+        self.set_form(form)
+
+        escaped = False
+
+        lemma = ""
+        try:
+            while True:
+
+                if entry[i] == "." and escaped is False:
+                    i += 1
+                    break
+
+                elif entry[i] == "\\":
+                    if escaped is True:
+                        lemma += entry[i]
+                        escaped = False
+                    else:
+                        escaped = True
+
+                else:
+                    lemma += entry[i]
+                    escaped = False
+
+                i += 1
+        except IndexError:
+            raise UnitexException("Invalid entry format '%s'. No dot found." % entry)
+
+        self.set_lemma(lemma)
+
+        Tag.load(self, entry[i:])
+
+    def get(self):
+        form = self.get_form(escape=True)
+        lemma = self.get_lemma(escape=True)
+        if not lemma:
+            lemma = ""
+
+        tag = Tag.get(self)
+
+        return "%s,%s.%s" % (form, lemma, tag)
+
+    def set_form(self, form):
+        self.__form = form
+
+    def get_form(self, escape=False):
+        if escape is False:
+            return self.__form
+        return self.__form.replace(",", "\,")
+
+    def set_lemma(self, lemma):
+        self.__lemma = lemma
+
+    def get_lemma(self, escape=False):
+        if escape is False:
+            return self.__lemma
+        return self.__lemma.replace(",", "\,")