Skip to content
Extraits de code Groupes Projets
Valider 0a622ab9 rédigé par Patrick Watrin's avatar Patrick Watrin
Parcourir les fichiers

Persistence functions for dictionaries, fst2 and alphabet files

parent 0de89792
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
global:
debug: 1
verbose: 1
persistence: 1
virtualization: 1
resources:
language: fr
alphabet: /home/resources/media/fr/unitex/preprocessing/Alphabet.txt
alphabet-sorted: /home/resources/media/fr/unitex/preprocessing/Alphabet_sort.txt
sentence: /home/resources/media/fr/unitex/preprocessing/Sentence.fst2
replace: /home/resources/media/fr/unitex/preprocessing/Replace.fst2
dictionaries:
- /home/resources/media/fr/unitex/dictionary/delaf-short.bin
- /home/resources/media/fr/unitex/dictionary/delacf-light.bin
- /home/resources/media/fr/unitex/dictionary/toponyms.bin
# The 'options' section can contain any of the argument used by the unitex tools
# functions.
options:
match-mode: longest
...@@ -24,7 +24,9 @@ UNITEX_INC = os.path.abspath(UNITEX_INC) ...@@ -24,7 +24,9 @@ UNITEX_INC = os.path.abspath(UNITEX_INC)
class CustomBuild(build): class CustomBuild(build):
def run(self): def run(self):
command = "cd %s && make 64BITS=yes LIBRARY=yes" % os.path.join(UNITEX_INC, "build") build.run(self)
command = "cd %s && make 64BITS=yes LIBRARY=yes TRE_DIRECT_COMPILE=yes DEBUG=yes" % os.path.join(UNITEX_INC, "build")
try: try:
process = subprocess.Popen(command, stderr=subprocess.PIPE, shell=True) process = subprocess.Popen(command, stderr=subprocess.PIPE, shell=True)
...@@ -36,13 +38,14 @@ class CustomBuild(build): ...@@ -36,13 +38,14 @@ class CustomBuild(build):
if process.returncode != 0: if process.returncode != 0:
raise OSError(process.stderr.read()) raise OSError(process.stderr.read())
build.run(self)
class CustomClean(clean): class CustomClean(clean):
def run(self): def run(self):
clean.run(self)
command = "cd %s && make clean" % os.path.join(UNITEX_INC, "build") command = "cd %s && make clean" % os.path.join(UNITEX_INC, "build")
try: try:
...@@ -55,13 +58,14 @@ class CustomClean(clean): ...@@ -55,13 +58,14 @@ class CustomClean(clean):
if process.returncode != 0: if process.returncode != 0:
raise OSError(process.stderr.read()) raise OSError(process.stderr.read())
clean.run(self)
class CustomInstall(install): class CustomInstall(install):
def run(self): def run(self):
install.run(self)
library = None library = None
if sys.platform == "darwin": if sys.platform == "darwin":
...@@ -84,7 +88,6 @@ class CustomInstall(install): ...@@ -84,7 +88,6 @@ class CustomInstall(install):
if process.returncode != 0: if process.returncode != 0:
raise OSError(process.stderr.read()) raise OSError(process.stderr.read())
install.run(self)
...@@ -118,9 +121,9 @@ setup( ...@@ -118,9 +121,9 @@ setup(
data_files = [ data_files = [
], ],
cmdclass = { # cmdclass = {
"build": CustomBuild, # "build": CustomBuild,
"clean": CustomClean, # "clean": CustomClean,
"install": CustomInstall # "install": CustomInstall
} # }
) )
Fichier ajouté
Le fichier a été supprimé par une entrée .gitattributes, ou son encodage n'est pas pris en charge.
Le fichier a été supprimé par une entrée .gitattributes, ou son encodage n'est pas pris en charge.
Le fichier a été supprimé par une entrée .gitattributes, ou son encodage n'est pas pris en charge.
#Unigraph
SIZE 1188 840
FONT Times New Roman: 10
OFONT Arial Unicode MS:B 12
BCOLOR 16777215
FCOLOR 0
ACOLOR 13487565
SCOLOR 16711680
CCOLOR 255
DBOXES y
DFRAME n
DDATE n
DFILE n
DDIR n
DRIG n
DRST n
FITS 100
PORIENT L
#
6
"<E>/<ITEM>" 70 200 1 2
"" 450 200 0
"<être>" 162 200 1 3
"en" 277 200 1 4
"<N>" 353 200 1 5
"<E>/</ITEM>" 411 200 1 1
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import unittest
from unitex.resources import *
class Values:
def __init__(self, language=None):
self.__values = {}
self.__values["dictionary"] = "data/persistent-dictionary.bin"
self.__values["fst2"] = "data/persistent-fst2.fst2"
self.__values["alphabet"] = "data/Alphabet.txt"
def __getitem__(self, key):
if key not in self.__values:
raise KeyError("Value key '%s' not found..." % key)
return self.__values[key]
def __setitem__(self, key, value):
if key in self.__values:
raise KeyError("Value key '%s' already exists" % key)
self.__values[key] = value
class TestUnitexResources(unittest.TestCase):
@classmethod
def setUpClass(self):
self._values = Values()
def test_01_load_dictionary(self):
path = self._values["dictionary"]
output = load_persistent_dictionary(path)
self._values["persistent-dictionary"] = output
ok = is_persistent_dictionary(output)
self.assertTrue(ok, "Dictionary loading failed!")
def test_02_unload_dictionary(self):
path = self._values["persistent-dictionary"]
free_persistent_dictionary(path)
ok = not is_persistent_dictionary(path)
self.assertTrue(ok, "Dictionary freeing failed!")
def test_03_load_fst2(self):
path = self._values["fst2"]
output = load_persistent_fst2(path)
self._values["persistent-fst2"] = output
ok = is_persistent_fst2(output)
self.assertTrue(ok, "Fst2 loading failed!")
def test_04_unload_fst2(self):
path = self._values["persistent-fst2"]
free_persistent_fst2(path)
ok = not is_persistent_fst2(path)
self.assertTrue(ok, "Fst2 freeing failed!")
def test_05_load_alphabet(self):
path = self._values["alphabet"]
output = load_persistent_alphabet(path)
self._values["persistent-alphabet"] = output
ok = is_persistent_alphabet(output)
self.assertTrue(ok, "Alphabet loading failed!")
def test_06_unload_alphabet(self):
path = self._values["persistent-alphabet"]
free_persistent_alphabet(path)
ok = not is_persistent_alphabet(path)
self.assertTrue(ok, "Alphabet freeing failed!")
if __name__ == '__main__':
unittest.main()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import yaml
from unitex import UnitexException, LOGGER
class UnitexSettings:
def __init__(self):
raise NotImplementedError
def get(self, key, default=None):
raise NotImplementedError
def set(self, key, value):
raise NotImplementedError
def load(self, f):
raise NotImplementedError
...@@ -7,7 +7,22 @@ from unitex import UnitexException, LOGGER, LIBUNITEX ...@@ -7,7 +7,22 @@ from unitex import UnitexException, LOGGER, LIBUNITEX
class UnitexFile: def enable_stdout():
"""This function enable Unitex standard output. This should be used
for debug purposes only.
"""
pass
def disable_stdout():
"""This function disable Unitex standard output to ensure multithread
output consistency (i.e. avoid output mixing between threads) and to
improve performances.
"""
pass
class UnitexFile(object):
def __init__(self): def __init__(self):
raise NotImplementedError raise NotImplementedError
......
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import ctypes from unitex import UnitexException, LOGGER
from unitex import UnitexException, LOGGER, LIBUNITEX
class UnitexSettings:
def __init__(self):
raise NotImplementedError
def get(self, key, default=None):
raise NotImplementedError
def set(self, key, value):
raise NotImplementedError
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import ctypes
from unitex import UnitexException, LOGGER, LIBUNITEX
def load_persistent_dictionary(path):
"""This function loads a dictionary in persistent space.
Argument:
path [str] -- the exisent file path in filespace (hard disk or virtual file system)
Return [str]:
The persistent file path [str] (derived from filename but not strictly identical,
depending of implementation). This path must be used by the unitex tools and the
'free_persistent_dictionary' function.
"""
filename = ctypes.c_char_p(bytes(str(path), "utf-8"))
size = len(path) + 0x200
persistent_filename_buffer = ctypes.create_string_buffer(size)
buffer_size = ctypes.c_int(size+1)
LOGGER.info("Load persistent dictionary '%s'..." % path)
ret = LIBUNITEX.persistence_public_load_dictionary(filename, persistent_filename_buffer, buffer_size)
if ret == 0:
LOGGER.debug("Loading dictionary '%s' failed..." % path)
raise UnitexException("Unable to load persistent dictionary '%s'..." % path)
output = persistent_filename_buffer.value
output = output.decode("utf-8")
return output
def is_persistent_dictionary(path):
"""This function checks if a dictionary path points to the persistent space.
Argument:
path [str] -- the file path to check
Return [bool]:
True if the dictionary is persitent otherwise False
"""
filename = ctypes.c_char_p(bytes(str(path), "utf-8"))
ret = LIBUNITEX.persistence_public_is_persisted_dictionary_filename(filename)
if ret == 0:
return False
return True
def free_persistent_dictionary(path):
"""This function unloads a dictionary from persistent space.
Argument:
path [str] -- the persistent file path returned by the 'load_persistent_dictionary'
function
"""
filename = ctypes.c_char_p(bytes(str(path), "utf-8"))
LIBUNITEX.persistence_public_unload_dictionary(filename)
def load_persistent_fst2(path):
"""This function loads a fst2 in persistent space.
Argument:
path [str] -- the exisent file path in filespace (hard disk or virtual file system)
Return [str]:
The persistent file path [str] (derived from filename but not strictly identical,
depending of implementation). This path must be used by the unitex tools and the
'free_persistent_fst2' function.
"""
filename = ctypes.c_char_p(bytes(str(path), "utf-8"))
size = len(path) + 0x200
persistent_filename_buffer = ctypes.create_string_buffer(size)
buffer_size = ctypes.c_int(size+1)
LOGGER.info("Load persistent fst2 '%s'..." % path)
ret = LIBUNITEX.persistence_public_load_fst2(filename, persistent_filename_buffer, buffer_size)
if ret == 0:
LOGGER.debug("Loading fst2 '%s' failed..." % path)
raise UnitexException("Unable to load persistent fst2 '%s'..." % path)
output = persistent_filename_buffer.value
output = output.decode("utf-8")
return output
def is_persistent_fst2(path):
"""This function checks if a fst2 path points to the persistent space.
Argument:
path [str] -- the file path to check
Return [bool]:
True if the fst2 is persitent otherwise False
"""
filename = ctypes.c_char_p(bytes(str(path), "utf-8"))
ret = LIBUNITEX.persistence_public_is_persisted_fst2_filename(filename)
if ret == 0:
return False
return True
def free_persistent_fst2(path):
"""This function unloads a fst2 from persistent space.
Argument:
path [str] -- the persistent file path returned by the 'load_persistent_fst2'
function
"""
filename = ctypes.c_char_p(bytes(str(path), "utf-8"))
LIBUNITEX.persistence_public_unload_fst2(filename)
def load_persistent_alphabet(path):
"""This function loads a alphabet in persistent space.
Argument:
path [str] -- the exisent file path in filespace (hard disk or virtual file system)
Return [str]:
The persistent file path [str] (derived from filename but not strictly identical,
depending of implementation). This path must be used by the unitex tools and the
'free_persistent_alphabet' function.
"""
filename = ctypes.c_char_p(bytes(str(path), "utf-8"))
size = len(path) + 0x200
persistent_filename_buffer = ctypes.create_string_buffer(size)
buffer_size = ctypes.c_int(size+1)
LOGGER.info("Load persistent alphabet '%s'..." % path)
ret = LIBUNITEX.persistence_public_load_alphabet(filename, persistent_filename_buffer, buffer_size)
if ret == 0:
LOGGER.debug("Loading alphabet '%s' failed..." % path)
raise UnitexException("Unable to load persistent alphabet '%s'..." % path)
output = persistent_filename_buffer.value
output = output.decode("utf-8")
return output
def is_persistent_alphabet(path):
"""This function checks if a alphabet path points to the persistent space.
Argument:
path [str] -- the file path to check
Return [bool]:
True if the alphabet is persitent otherwise False
"""
filename = ctypes.c_char_p(bytes(str(path), "utf-8"))
ret = LIBUNITEX.persistence_public_is_persisted_alphabet_filename(filename)
if ret == 0:
return False
return True
def free_persistent_alphabet(path):
"""This function unloads a alphabet from persistent space.
Argument:
path [str] -- the persistent file path returned by the 'load_persistent_alphabet'
function
"""
filename = ctypes.c_char_p(bytes(str(path), "utf-8"))
LIBUNITEX.persistence_public_unload_alphabet(filename)
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter