From 52cb4793a2220b11ef17217560c6db7818371053 Mon Sep 17 00:00:00 2001 From: pat <pat@lucy.home> Date: Mon, 26 Sep 2016 15:09:30 +0200 Subject: [PATCH] integration of the tind fime (for iteration) in TextFST object --- unitex/utils/formats.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/unitex/utils/formats.py b/unitex/utils/formats.py index 7a6abb7..9e0a6bc 100644 --- a/unitex/utils/formats.py +++ b/unitex/utils/formats.py @@ -458,8 +458,6 @@ class TextFST: self.__tfst = None self.__tind = None - self.__size = 0 - def __del__(self): self.__tfst.close() @@ -467,6 +465,8 @@ class TextFST: return len(self.__tind) def __getitem__(self, i): + if i >= len(self): + raise UnitexException("TextFST index out of range (size: %s)." % len(self)) position = self.__tind[i] self.__tfst.seek(position) @@ -606,13 +606,6 @@ class TextFST: encoding = UnitexConstants.DEFAULT_ENCODING self.__tfst = open(fst, "r", encoding=encoding) - - line = self.__tfst.readline() - line = line.rstrip() - - # The number of sentence in the text fst (format: '000000000N') - self.__size = int(line) - self.__tind = [] with open(index, "rb") as fin: -- GitLab