From 52cb4793a2220b11ef17217560c6db7818371053 Mon Sep 17 00:00:00 2001
From: pat <pat@lucy.home>
Date: Mon, 26 Sep 2016 15:09:30 +0200
Subject: [PATCH] integration of the tind fime (for iteration) in TextFST
 object

---
 unitex/utils/formats.py | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/unitex/utils/formats.py b/unitex/utils/formats.py
index 7a6abb7..9e0a6bc 100644
--- a/unitex/utils/formats.py
+++ b/unitex/utils/formats.py
@@ -458,8 +458,6 @@ class TextFST:
         self.__tfst = None
         self.__tind = None
 
-        self.__size = 0
-
     def __del__(self):
         self.__tfst.close()
 
@@ -467,6 +465,8 @@ class TextFST:
         return len(self.__tind)
 
     def __getitem__(self, i):
+        if i >= len(self):
+            raise UnitexException("TextFST index out of range (size: %s)." % len(self))
         position = self.__tind[i]
 
         self.__tfst.seek(position)
@@ -606,13 +606,6 @@ class TextFST:
             encoding = UnitexConstants.DEFAULT_ENCODING
 
         self.__tfst = open(fst, "r", encoding=encoding)
-
-        line = self.__tfst.readline()
-        line = line.rstrip()
-
-        # The number of sentence in the text fst (format: '000000000N')
-        self.__size = int(line)
-
         self.__tind = []
 
         with open(index, "rb") as fin:
-- 
GitLab