Skip to content
Extraits de code Groupes Projets
Valider 65e319c3 rédigé par Patrick Watrin's avatar Patrick Watrin
Parcourir les fichiers

Some preliminary documentation

parent a729d93f
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
Unitex binding for python 3
# Unitex binding for python
## Features
* Unitex as a library
* Persistence
* Virtualization
## Installation
The library has been tested on MacOSX and Linux. The installation requires
## Getting started
There is three ways to use the Unitex Python library:
1. The `_unitex` C++ extension.
2. The Unitex basic commands and features.
3. The `Processor` high-level class.
The following sections gives some sample codes to illustrate each of them.
### The `_unitex` C++ extension.
### The Unitex basic commands and features.
### The `Processor` high-level class.
## Useful links
* **The Unitex/GramLab corpus processor:** [homepage](http://www-igm.univ-mlv.fr/~unitex/) and [documentation](http://igm.univ-mlv.fr/~unitex/UnitexManual3.1.pdf)
......@@ -91,7 +91,7 @@ setup(
name = "unitex",
version = "1.0",
description = "Python 3 binding for the Unitex library",
long_description = open('README').read(),
long_description = open('README.md').read(),
author = "Patrick Watrin",
author_email = "patrick.watrin@gmail.com",
......@@ -125,8 +125,8 @@ setup(
sources = ["extensions/_unitex.cpp"])
],
# cmdclass = {
# "build": CustomBuild,
# "clean": CustomClean
# }
cmdclass = {
"build": CustomBuild,
"clean": CustomClean
}
)
......@@ -17,6 +17,8 @@ class UnitexException(Exception):
DEFAULT_ENCODING="utf-8"
# VERBOSE = 0: ERROR logging level
# VERBOSE = 1: WARNING logging level
# VERBOSE = 2: INFO logging level
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import yaml
from unitex import UnitexException, LOGGER
class UnitexSettings:
def __init__(self):
self.__settings = None
def get(self, key, default=None):
raise NotImplementedError
def set(self, key, value):
raise NotImplementedError
def load(self, f):
with open(f, 'r') as ymlfile:
self.__config = yaml.load(ymlfile)
raise NotImplementedError
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from unitex import UnitexException, LOGGER
import os
import yaml
from unitex import UnitexException, LOGGER, DEFAULT_ENCODING
class UnitexProcessor:
class UnitexSettings(object):
def __init__(self):
raise NotImplementedError
self.__settings = None
def open(self, path, mode=None, encoding=None):
raise NotImplementedError
def __contains__(self, key):
return key in self.__settings
def __getitem__(self, key):
if key not in self.__settings:
raise UnitexException("Key '%s' not found!" % key)
return self.__settings[key]
def set(self, key, value):
self.__settings[key] = value
def load(self, f):
with open(f, 'r') as config:
self.__settings = yaml.load(config)
def check(self):
resources = self.__settings.get("resources", None)
if resources is None:
raise UnitexException("You must provide the 'resources' config element.")
language = resources.get("language", None)
if language is None:
raise UnitexException("The 'resources' section must contain the 'language' element.")
alphabet = resources.get("alphabet", None)
if alphabet is None:
LOGGER.warning("No alphabet file provided.")
else:
class UnitexProcessor(object):
def __init__(self, config=None):
self.__settings = None
if config is not None:
self.reset(config)
def reset(self, config):
self.__settings = UnitexSettings()
self.__settings.load(config)
def open(self, path, mode="srtlf", encoding=None, tagged=False, virtualize=False):
if encoding is None:
encoding = DEFAULT_ENCODING
def close(self):
raise NotImplementedError
......
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter