From ee061282224fcbb9a0bd486fbc6ddff53ffbb421 Mon Sep 17 00:00:00 2001 From: Patrick Watrin <pat@lucy.local> Date: Thu, 25 Feb 2016 12:56:41 +0100 Subject: [PATCH] PEP 0008 cosmetics --- unitex/__init__.py | 16 ++++++----- unitex/io.py | 47 ++++++++++++++++++-------------- unitex/resources.py | 60 +++++++++++++++++++++++------------------ unitex/tools.py | 65 +++++++++++++++++++++++++++------------------ 4 files changed, 110 insertions(+), 78 deletions(-) diff --git a/unitex/__init__.py b/unitex/__init__.py index 771fd87..ed95843 100644 --- a/unitex/__init__.py +++ b/unitex/__init__.py @@ -111,11 +111,12 @@ _LOGGER = logging.getLogger(__name__) def enable_stdout(): """ - This function enables Unitex standard output. This is the default but - should be used for debug purposes only. + This function enables Unitex standard output. This is the default + but should be used for debug purposes only. Return [bool]: - The function returns 'True' if it succeeds and 'False' otherwise. + The function returns 'True' if it succeeds and 'False' + otherwise. """ _LOGGER.info("Enabling standard output...") ret = unitex_enable_stdout() @@ -131,7 +132,8 @@ def disable_stdout(): improve performances. Return [bool]: - The function returns 'True' if it succeeds and 'False' otherwise. + The function returns 'True' if it succeeds and 'False' + otherwise. """ _LOGGER.info("Disabling standard output...") ret = unitex_disable_stdout() @@ -146,7 +148,8 @@ def enable_stderr(): should be used for debug purposes only. Return [bool]: - The function returns 'True' if it succeeds and 'False' otherwise. + The function returns 'True' if it succeeds and 'False' + otherwise. """ _LOGGER.info("Enabling error output...") ret = unitex_enable_stderr() @@ -162,7 +165,8 @@ def disable_stderr(): improve performances. Return [bool]: - The function returns 'True' if it succeeds and 'False' otherwise. + The function returns 'True' if it succeeds and 'False' + otherwise. """ _LOGGER.info("Disabling error output...") ret = unitex_disable_stderr() diff --git a/unitex/io.py b/unitex/io.py index f633974..0244d50 100644 --- a/unitex/io.py +++ b/unitex/io.py @@ -22,16 +22,17 @@ _LOGGER = logging.getLogger(__name__) def cp(source_path, target_path): """ - This function copies a file. Both pathes can be on the virtual filesystem - or the disk filesystem. Therefor, this function can be used to virtualize a - file or to dump a virtual file. + This function copies a file. Both pathes can be on the virtual + filesystem or the disk filesystem. Therefor, this function can be + used to virtualize a file or to dump a virtual file. Arguments: source_path [str] -- source file path target_path [str] -- target file path Return [bool]: - The function returns 'True' if it succeeds and 'False' otherwise. + The function returns 'True' if it succeeds and 'False' + otherwise. """ _LOGGER.info("Copying file '%s' to '%s'..." % (source_path, target_path)) ret = unitex_cp(source_path, target_path) @@ -42,14 +43,15 @@ def cp(source_path, target_path): def rm(path): """ - This function removes a file. The path can be on the virtual filesystem - or the disk filesystem. + This function removes a file. The path can be on the virtual + filesystem or the disk filesystem. Argument: path [str] -- file path Return [bool]: - The function returns 'True' if it succeeds and 'False' otherwise. + The function returns 'True' if it succeeds and 'False' + otherwise. """ _LOGGER.info("Removing file '%s'..." % path) ret = unitex_rm(path) @@ -60,15 +62,16 @@ def rm(path): def mv(old_path, new_path): """ - This function moves/renames a file. Both pathes can be on the virtual - filesystem or the disk filesystem. + This function moves/renames a file. Both pathes can be on the + virtual filesystem or the disk filesystem. Arguments: old_path [str] -- old file path new_path [str] -- new file path Return [bool]: - The function returns 'True' if it succeeds and 'False' otherwise. + The function returns 'True' if it succeeds and 'False' + otherwise. """ _LOGGER.info("Moving file '%s' to '%s'..." % (old_path, new_path)) ret = unitex_mv(old_path, new_path) @@ -85,7 +88,8 @@ def mkdir(path): path [str] -- directory path Return [bool]: - The function returns 'True' if it succeeds and 'False' otherwise. + The function returns 'True' if it succeeds and 'False' + otherwise. """ _LOGGER.info("Creating directory '%s'..." % path) ret = unitex_mkdir(path) @@ -102,7 +106,8 @@ def rmdir(path): path [str] -- directory path Return [bool]: - The function returns 'True' if it succeeds and 'False' otherwise. + The function returns 'True' if it succeeds and 'False' + otherwise. """ _LOGGER.info("Removing directory '%s'..." % path) ret = unitex_rmdir(path) @@ -119,21 +124,23 @@ def ls(path): path [str] -- directory path Return [list(str)]: - The function returns a list of files (not directories) if the directory - is not empty and an empty list otherwise. + The function returns a list of files (not directories) if the + directory is not empty and an empty list otherwise. """ _LOGGER.info("Listing directory '%s'..." % path) return unitex_ls(path) def exists(path): """ - This function verify if a file exists (on disk or virtual filesystem). + This function verify if a file exists (on disk or virtual + filesystem). Argument: path [str] -- directory path Return [bool]: - The function returns 'True' if it succeeds and 'False' otherwise. + The function returns 'True' if it succeeds and 'False' + otherwise. """ if path.startswith(UnitexConstants.VFS_PREFIX) is False: return os.path.exists(path) @@ -144,11 +151,11 @@ def exists(path): class UnitexFile(object): """ The UnitexFile class provides the minimum functionality necessary to - manipulate files on the disk and the virtual filesystems. It's mainly - useful to read files from virtual filesystem whithout having to copy them - to the disk. + manipulate files on the disk and the virtual filesystems. It's + mainly useful to read files from virtual filesystem whithout having + to copy them to the disk. - **WARNING: the encoding must be UTF-8 and the data Unicode strings.** + *WARNING: the encoding must be UTF-8 and the data Unicode strings.* """ def __init__(self): diff --git a/unitex/resources.py b/unitex/resources.py index b57be97..5301d69 100644 --- a/unitex/resources.py +++ b/unitex/resources.py @@ -24,25 +24,28 @@ def load_persistent_dictionary(path): This function loads a dictionary in persistent space. Argument: - path [str] -- the exisent file path in filespace (hard disk or virtual file system) + path [str] -- the exisent file path in filespace (hard disk or + virtual file system). Return [str]: - The persistent file path [str] (derived from filename but not strictly identical, - depending of implementation). This path must be used by the unitex tools and the - 'free_persistent_dictionary' function. + The persistent file path [str] (derived from filename but not + strictly identical, depending of implementation). This path must + be used by the unitex tools and the 'free_persistent_dictionary' + function. """ _LOGGER.info("Load persistent dictionary '%s'..." % path) return unitex_load_persistent_dictionary(path) def is_persistent_dictionary(path): """ - This function checks if a dictionary path points to the persistent space. + This function checks if a dictionary path points to the persistent + space. Argument: - path [str] -- the file path to check + path [str] -- the file path to check. Return [bool]: - True if the dictionary is persitent otherwise False + True if the dictionary is persitent otherwise False. """ return unitex_is_persistent_dictionary(path) @@ -51,8 +54,8 @@ def free_persistent_dictionary(path): This function unloads a dictionary from persistent space. Argument: - path [str] -- the persistent file path returned by the 'load_persistent_dictionary' - function + path [str] -- the persistent file path returned by the + 'load_persistent_dictionary' function. """ _LOGGER.info("Free persistent dictionary '%s'..." % path) unitex_free_persistent_dictionary(path) @@ -64,12 +67,14 @@ def load_persistent_fst2(path): This function loads a fst2 in persistent space. Argument: - path [str] -- the exisent file path in filespace (hard disk or virtual file system) + path [str] -- the exisent file path in filespace (hard disk or + virtual file system). Return [str]: - The persistent file path [str] (derived from filename but not strictly identical, - depending of implementation). This path must be used by the unitex tools and the - 'free_persistent_fst2' function. + The persistent file path [str] (derived from filename but not + strictly identical, depending of implementation). This path must + be used by the unitex tools and the 'free_persistent_fst2' + function. """ _LOGGER.info("Load persistent fst2 '%s'..." % path) return unitex_load_persistent_fst2(path) @@ -79,10 +84,10 @@ def is_persistent_fst2(path): This function checks if a fst2 path points to the persistent space. Argument: - path [str] -- the file path to check + path [str] -- the file path to check. Return [bool]: - True if the fst2 is persitent otherwise False + True if the fst2 is persitent otherwise False. """ return unitex_is_persistent_fst2(path) @@ -91,8 +96,8 @@ def free_persistent_fst2(path): This function unloads a fst2 from persistent space. Argument: - path [str] -- the persistent file path returned by the 'load_persistent_fst2' - function + path [str] -- the persistent file path returned by the + 'load_persistent_fst2' function. """ _LOGGER.info("Free persistent fst2 '%s'..." % path) unitex_free_persistent_fst2(path) @@ -104,25 +109,28 @@ def load_persistent_alphabet(path): This function loads a alphabet in persistent space. Argument: - path [str] -- the exisent file path in filespace (hard disk or virtual file system) + path [str] -- the exisent file path in filespace (hard disk or + virtual file system). Return [str]: - The persistent file path [str] (derived from filename but not strictly identical, - depending of implementation). This path must be used by the unitex tools and the - 'free_persistent_alphabet' function. + The persistent file path [str] (derived from filename but not + strictly identical, depending of implementation). This path must + be used by the unitex tools and the 'free_persistent_alphabet' + function. """ _LOGGER.info("Load persistent alphabet '%s'..." % path) return unitex_load_persistent_alphabet(path) def is_persistent_alphabet(path): """ - This function checks if a alphabet path points to the persistent space. + This function checks if a alphabet path points to the persistent + space. Argument: - path [str] -- the file path to check + path [str] -- the file path to check. Return [bool]: - True if the alphabet is persitent otherwise False + True if the alphabet is persitent otherwise False. """ return unitex_is_persistent_alphabet(path) @@ -131,8 +139,8 @@ def free_persistent_alphabet(path): This function unloads a alphabet from persistent space. Argument: - path [str] -- the persistent file path returned by the 'load_persistent_alphabet' - function + path [str] -- the persistent file path returned by the + 'load_persistent_alphabet' function. """ _LOGGER.info("Free persistent alphabet '%s'..." % path) unitex_free_persistent_alphabet(path) diff --git a/unitex/tools.py b/unitex/tools.py index 74cbedd..82c5df7 100644 --- a/unitex/tools.py +++ b/unitex/tools.py @@ -31,17 +31,23 @@ def check_dic(dictionary, dtype, alphabet, **kwargs): stored in the <dela> directory. Arguments: - dictionary [str] -- the dictionary file path - dtype [str] -- the dictionary type: UnitexConstants.DELAF (inflected) - UnitexConstants.DELAS (non inflected) - alphabet [str] -- the alphabet file path + dictionary [str] -- the dictionary file path. + + dtype [str] -- the dictionary type: + - UnitexConstants.DELAF (inflected); + - UnitexConstants.DELAS (non inflected). + + alphabet [str] -- the alphabet file path. Keyword arguments: - strict [bool] -- strict syntax checking against unprotected dot and comma (default: False) - no_space_warning [bool] -- tolerates spaces in grammatical/semantic/inflectional codes (default: True) + strict [bool] -- strict syntax checking against unprotected dot + and comma (default: False). + + no_space_warning [bool] -- tolerates spaces in grammatical, + semantic and inflectional codes (default: True). Return [bool]: - The function returns 'True' if it succeeds and 'False' otherwise. + True if it succeeds and False otherwise. """ options = CheckDicOptions() options.load(kwargs) @@ -78,30 +84,37 @@ def check_dic(dictionary, dtype, alphabet, **kwargs): def compress(dictionary, **kwargs): """ - This function takes a DELAF dictionary as a parameter and compresses it. The - compression of a dictionary dico.dic produces two files: + This function takes a DELAF dictionary as a parameter and compresses + it. The compression of a dictionary dico.dic produces two files: + + - dico.bin: a binary file containing the minimum automaton of + the inflected forms of the dictionary; - - dico.bin: a binary file containing the minimum automaton of the inflected - forms of the dictionary; - - dico.inf: a text file containing the compressed forms required for the reconstruction - of the dictionary lines from the inflected forms contained in the - automaton. + - dico.inf: a text file containing the compressed forms required + for the reconstruction of the dictionary lines from the + inflected forms contained in the automaton. Arguments: - dictionary [str] -- the dictionary file path + dictionary [str] -- the dictionary file path. Keyword arguments: - output [str] -- sets the output file. By default, a file xxx.dic will - produce a file xxx.bin - flip [bool] -- indicates that the inflected and canonical forms should be swapped in the - compressed dictionary. This option is used to construct an inverse dictionary - which is necessary for the program 'Reconstrucao' (default: False) - semitic [bool] -- indicates that the semitic compression algorithm should be used. Setting this - option with semitic languages like Arabic significantly reduces the size of - the output dictionary (default: False) - version [str] -- 'v1': produces an old style .bin file - 'v2': produces a new style .bin file, with no file size limitation to 16 Mb - and a smaller size (default) + output [str] -- sets the output file. By default, a file xxx.dic + will produce a file xxx.bin. + + flip [bool] -- indicates that the inflected and canonical forms + should be swapped in the compressed dictionary. This option + is used to construct an inverse dictionary which is + necessary for the program 'Reconstrucao' (default: False). + + semitic [bool] -- indicates that the semitic compression + algorithm should be used. Setting this option with semitic + languages like Arabic significantly reduces the size of the + output dictionary (default: False). + + version [str] -- 'v1': produces an old style .bin file; + 'v2': produces a new style .bin file, with no + file size limitation to 16 Mb and a + smaller size (default). Return [bool]: The function return 'True' if it succeeds and 'False' otherwise. -- GitLab