diff --git a/config/unitex.yaml b/config/unitex.yaml index 209a36e3e01a3b31193b7c71dcc0bf0c8d508f69..bf5848689bc9f388f1b37bd465d3ed8b9985961a 100644 --- a/config/unitex.yaml +++ b/config/unitex.yaml @@ -3,30 +3,26 @@ global: verbose: 2 log: null - tempdir: "/tmp" - persistence: True virtualization: True resources: language: "fr" - alphabet: "/home/resources/media/fr/unitex/preprocessing/Alphabet.txt" - alphabet-sorted: "/home/resources/media/fr/unitex/preprocessing/Alphabet_sort.txt" - sentence: "/home/resources/media/fr/unitex/preprocessing/sentence/Sentence.fst2" - replace: "/home/resources/media/fr/unitex/preprocessing/replace/Replace.fst2" + alphabet: "/full/path/to/Alphabet.txt" + alphabet-sorted: "/full/path/to/Alphabet_sort.txt" + sentence: "/full/path/to/Sentence.fst2" + replace: "/full/path/to/Replace.fst2" dictionaries: - - "/home/resources/media/fr/unitex/dictionary/delaf-short.bin" - - "/home/resources/media/fr/unitex/dictionary/delacf-light.bin" - - "/home/resources/media/fr/unitex/dictionary/toponyms.bin" + - "/full/path/to/dictionary.bin" -# The 'options' section can contain any of the argument used by the unitex tools +# The 'tools' section can contain any of the argument used by the unitex tools # functions. Note that, if you use the 'Processor' high-level class some argument # could be overriden to fit the 'tag', 'extract' and 'search' functions # behaviour. For intance, there is no point to define a font or a context for # 'concord'. -options: +tools: check_dic: strict: False no_space_warning: False diff --git a/tests/01_test_tools.py b/tests/01_test_tools.py index 0dc2371ef8bd7b6fbf77ed6bc245f0affdb747a9..29e6115a8ab726299b0480b13298e5320cd8aa64 100644 --- a/tests/01_test_tools.py +++ b/tests/01_test_tools.py @@ -22,7 +22,7 @@ class Arguments: self.__arguments["inf"] = "data/dictionary.inf" self.__arguments["alphabet"] = "data/Alphabet.txt" - self.__arguments["alphabet_sort"] = "data/Alphabet_sort.txt" + self.__arguments["alphabet-sorted"] = "data/Alphabet_sort.txt" self.__arguments["sentence"] = "data/Sentence.fst2" @@ -215,7 +215,7 @@ class TestUnitexTools(unittest.TestCase): kwargs = {} kwargs["duplicates"] = False kwargs["reverse"] = False - kwargs["sort_order"] = self._arguments["alphabet_sort"] + kwargs["sort_order"] = self._arguments["alphabet-sorted"] kwargs["line_info"] = self._arguments["stat_dic.n"] kwargs["thai"] = False kwargs["factorize_inflectional_codes"] = False @@ -285,7 +285,7 @@ class TestUnitexTools(unittest.TestCase): def test_10_concord(self): index = self._arguments["ind"] - alphabet = self._arguments["alphabet"] + alphabet = self._arguments["alphabet-sorted"] kwargs = {} kwargs["font"] = None diff --git a/tests/02_test_resources.py b/tests/02_test_resources.py index e2eea95b35c5109fcfe3b9e2468007a0f3d5199e..48b7451e3ef371320f91ad03676d0b117e1a258c 100644 --- a/tests/02_test_resources.py +++ b/tests/02_test_resources.py @@ -3,6 +3,7 @@ import os, unittest +from unitex import UnitexConstants from unitex.resources import * from unitex.tools import compress @@ -48,14 +49,13 @@ class TestUnitexResources(unittest.TestCase): os.remove(self._arguments["inf"]) def test_01_load_dictionary(self): - args = [self._arguments["dic"]] - kwargs = {} + kwargs["output"] = None kwargs["flip"] = False kwargs["semitic"] = False - kwargs["version"] = "v2" + kwargs["version"] = UnitexConstants.DICTIONARY_VERSION_1 - ret = compress(*args, **kwargs) + ret = compress(self._arguments["dic"], **kwargs) path = self._arguments["bin"] diff --git a/tests/04_test_processor.py b/tests/04_test_processor.py new file mode 100644 index 0000000000000000000000000000000000000000..a66a2c1e6974e2cca82dd7c4fcdb333aee2aa202 --- /dev/null +++ b/tests/04_test_processor.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import os, shutil, unittest + +from unitex import UnitexConstants +from unitex.tools import compress, grf2fst2 +from unitex.processor import UnitexProcessor + + + +class Arguments: + + def __init__(self, language=None): + self.__arguments = {} + + self.__arguments["config"] = "data/unitex.yaml" + + self.__arguments["alphabet"] = "data/Alphabet.txt" + + self.__arguments["dic"] = "data/dictionary.dic" + self.__arguments["bin"] = "data/dictionary.bin" + self.__arguments["inf"] = "data/dictionary.inf" + + self.__arguments["grf"] = "data/grammar.grf" + self.__arguments["fst2"] = "data/grammar.fst2" + + self.__arguments["txt"] = "data/corpus.txt" + self.__arguments["tag"] = "data/corpus.tag" + self.__arguments["xml"] = "data/corpus.xml" + + def __getitem__(self, key): + if key not in self.__arguments: + raise KeyError("Argument '%s' not found ..." % key) + return self.__arguments[key] + + + +class TestUnitexIO(unittest.TestCase): + + @classmethod + def setUpClass(self): + self._arguments = Arguments() + + dictionary = self._arguments["dic"] + + kwargs = {} + kwargs["output"] = None + kwargs["flip"] = False + kwargs["semitic"] = False + kwargs["version"] = UnitexConstants.DICTIONARY_VERSION_1 + + ret = compress(dictionary, **kwargs) + + grammar = self._arguments["grf"] + alphabet = self._arguments["alphabet"] + + kwargs = {} + kwargs["loop_check"] = False + kwargs["char_by_char"] = False + kwargs["pkgdir"] = None + kwargs["no_empty_graph_warning"] = False + kwargs["tfst_check"] = False + kwargs["silent_grf_name"] = False + kwargs["named_repositories"] = None + kwargs["debug"] = False + kwargs["check_variables"] = False + + ret = grf2fst2(grammar, alphabet, **kwargs) + + @classmethod + def tearDownClass(self): + if os.path.exists(self._arguments["bin"]): + os.remove(self._arguments["bin"]) + + if os.path.exists(self._arguments["inf"]): + os.remove(self._arguments["inf"]) + + if os.path.exists(self._arguments["fst2"]): + os.remove(self._arguments["fst2"]) + + if os.path.exists(self._arguments["tag"]): + os.remove(self._arguments["tag"]) + + if os.path.exists(self._arguments["xml"]): + os.remove(self._arguments["xml"]) + + def test_01_processor_txt(self): + processor = UnitexProcessor(self._arguments["config"]) + processor.open(self._arguments["txt"], mode="srtlf", tagged=False) + + kwargs = {} + kwargs["xml"] = False + + ret = processor.tag(self._arguments["fst2"], self._arguments["tag"], **kwargs) + + processor.close(clean=True, free=True) + self.assertTrue(ret, "Tagging process failed (txt format)!") + + def test_02_processor_xml(self): + processor = UnitexProcessor(self._arguments["config"]) + processor.open(self._arguments["txt"], mode="srtlf", tagged=False) + + kwargs = {} + kwargs["xml"] = True + + ret = processor.tag(self._arguments["fst2"], self._arguments["tag"], **kwargs) + + processor.close(clean=True, free=True) + self.assertTrue(ret, "Tagging process failed (xml format)!") + + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/data/Replace.fst2 b/tests/data/Replace.fst2 new file mode 100644 index 0000000000000000000000000000000000000000..af5e70b0bd1bd67bee35481ceaf7aacb8ea2c214 Binary files /dev/null and b/tests/data/Replace.fst2 differ diff --git a/tests/data/Sentence.fst2 b/tests/data/Sentence.fst2 index 6e955340e44645ff673f76071baaf4a3cf697f70..e3641eba7c57da4d4b01e991ab603c1617536210 100644 Binary files a/tests/data/Sentence.fst2 and b/tests/data/Sentence.fst2 differ diff --git a/tests/data/unitex.yaml b/tests/data/unitex.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dd9e7ae7e0aa407b5df7911d6ba4bfccc3eeeb40 --- /dev/null +++ b/tests/data/unitex.yaml @@ -0,0 +1,119 @@ +global: + debug: 1 + verbose: 2 + log: null + + persistence: True + virtualization: True + +resources: + language: "fr" + + alphabet: "data/Alphabet.txt" + alphabet-sorted: "data/Alphabet_sort.txt" + sentence: "data/Sentence.fst2" + replace: "data/Replace.fst2" + + dictionaries: + - "data/dictionary.bin" + +# The 'tools' section can contain any of the argument used by the unitex tools +# functions. Note that, if you use the 'Processor' high-level class some argument +# could be overriden to fit the 'tag', 'extract' and 'search' functions +# behaviour. For intance, there is no point to define a font or a context for +# 'concord'. +tools: + check_dic: + strict: False + no_space_warning: False + + compress: + output: null + flip: False + semitic: False + version: "v2" + + concord: + font: null + fontsize: null + only_ambiguous: False + only_matches: False + left: "0" + right: "0" + sort: "TO" + format: "text" + script: null + offsets: null + unxmlize: null + directory: null + thai: False + + dico: + morpho: null + korean: False + semitic: False + arabic_rules: null + raw: null + + extract: + non_matching_sentences: False + + fst2txt: + start_on_space: False + word_by_word: False + merge: True + + grf2fst2: + loop_check: False + char_by_char: False + pkgdir: null + no_empty_graph_warning: False + tfst_check: False + silent_grf_name: True + named_repository: null + debug: False + check_variables: True + + locate: + start_on_space: False + char_by_char: False + morpho: null + korean: False + arabic_rules: null + sntdir: null + negation_operator: "tilde" + number_of_matches: null + stop_token_count: null + match_mode: "longest" + output_mode: "merge" + protect_dic_chars: True + variable: null + ambiguous_outputs: True + variable_error: "ignore" + + normalize: + no_carriage_return: False + input_offsets: null + output_offsets: null + no_separator_normalization: False + replacement_rules: null + + sort_txt: + duplicates: False + revers: False + sort_order: null + line_info: null + thai: False + factorize_inflectional_codes: False + + tokenize: + char_by_char: False + tokens: null + input_offsets: null + output_offsets: null + + txt2fst: + clean: False + normalization_grammar: null + tagset: null + korean: False diff --git a/unitex/config.py b/unitex/config.py index d6bdcc895e0ea4987e1d5fe3e11f381689e729dc..d4112ec4e8846be95820f66554c1911c72fbaf0f 100644 --- a/unitex/config.py +++ b/unitex/config.py @@ -12,25 +12,12 @@ _LOGGER = logging.getLogger(__name__) -class Options(object): +class Options(dict): def __init__(self, options=None): - self.__options = {} - if options is not None: self.load(options) - def __contains__(self, key): - return key in self.__options - - def __getitem__(self, key): - if key not in self.__options: - raise UnitexException("Key '%s' not found!" % key) - return self.__options[key] - - def __setitem__(self, key, value): - self.__options[key] = value - def load(self, options): raise NotImplementedError @@ -359,7 +346,7 @@ class LocateOptions(Options): raise UnitexException("[LOCATE] Wrong value for the 'korean' option. Boolean required.") self["korean"] = korean - arabic_rules = options.get("arabic_rules", False) + arabic_rules = options.get("arabic_rules", None) if arabic_rules is not None: if isinstance(arabic_rules, str) is False: raise UnitexException("[LOCATE] Wrong value for the 'arabic_rules' option. String required.") @@ -405,8 +392,8 @@ class LocateOptions(Options): output_mode = options.get("output_mode", UnitexConstants.OUTPUT_MODE_IGNORE) if output_mode not in (UnitexConstants.OUTPUT_MODE_IGNORE, - UnitexConstants.OUTPUT_MODE_MERGE, - UnitexConstants.OUTPUT_MODE_RELACE): + UnitexConstants.OUTPUT_MODE_MERGE, + UnitexConstants.OUTPUT_MODE_RELACE): raise UnitexException("[LOCATE] Wrong value for the 'output_mode' option. UnitexConstants.OUTPUT_MODE_X required.") self["output_mode"] = output_mode @@ -688,11 +675,6 @@ class UnitexConfig(Options): raise UnitexException("Wrong value for the 'log' global option. String required.") self["log"] = log - tempdir = options.get("tempdir", tempfile.gettempdir()) - if not exists(tempdir): - raise UnitexException("Temporary directory '%s' doesn't exist." % tempdir) - self["tempdir"] = tempdir - persistence = options.get("persistence", False) if isinstance(persistence, bool) is False: raise UnitexException("Wrong value for the 'persistence' global option. Boolean required.") @@ -705,17 +687,18 @@ class UnitexConfig(Options): self["resources"] = ResourcesOptions(settings.get("resources", {})) - options = settings.get("options", {}) - - self["check_dic"] = CheckDicOptions(options.get("check_dic", {})) - self["compress"] = CheckDicOptions(options.get("compress", {})) - self["concord"] = ConcordOptions(options.get("concord", {})) - self["dico"] = DicoOptions(options.get("dico", {})) - self["extract"] = ExtractOptions(options.get("extract", {})) - self["fst2txt"] = Fst2TxtOptions(options.get("fst2txt", {})) - self["grf2fst2"] = Grf2Fst2Options(options.get("grf2fst2", {})) - self["locate"] = LocateOptions(options.get("locate", {})) - self["normalize"] = NormalizeOptions(options.get("normalize", {})) - self["sort_txt"] = SortTxtOptions(options.get("sort_txt", {})) - self["tokenize"] = TokenizeOptions(options.get("tokenize", {})) - self["txt2tfst"] = Txt2TFstOptions(options.get("txt2tfst", {})) + tools = settings.get("tools", {}) + + self["tools"] = {} + self["tools"]["check_dic"] = CheckDicOptions(tools.get("check_dic", {})) + self["tools"]["compress"] = CheckDicOptions(tools.get("compress", {})) + self["tools"]["concord"] = ConcordOptions(tools.get("concord", {})) + self["tools"]["dico"] = DicoOptions(tools.get("dico", {})) + self["tools"]["extract"] = ExtractOptions(tools.get("extract", {})) + self["tools"]["fst2txt"] = Fst2TxtOptions(tools.get("fst2txt", {})) + self["tools"]["grf2fst2"] = Grf2Fst2Options(tools.get("grf2fst2", {})) + self["tools"]["locate"] = LocateOptions(tools.get("locate", {})) + self["tools"]["normalize"] = NormalizeOptions(tools.get("normalize", {})) + self["tools"]["sort_txt"] = SortTxtOptions(tools.get("sort_txt", {})) + self["tools"]["tokenize"] = TokenizeOptions(tools.get("tokenize", {})) + self["tools"]["txt2tfst"] = Txt2TFstOptions(tools.get("txt2tfst", {})) diff --git a/unitex/processor.py b/unitex/processor.py index 1f0a61217ed1af4e6ab06ec885ea6617312e3bb6..8344d85283102c2e6c6c31180ec30145d4f2e932 100644 --- a/unitex/processor.py +++ b/unitex/processor.py @@ -2,8 +2,15 @@ # -*- coding: utf-8 -*- import logging +import os +import re import yaml +# Compatibility Python 2/3 +from io import open + +from xml.sax.saxutils import escape + from unitex import * from unitex.config import UnitexConfig from unitex.io import * @@ -14,13 +21,26 @@ _LOGGER = logging.getLogger(__name__) +RULES = [] +RULES.append((re.compile(r"&"), "&")) + +def escape(sequence): + for pattern, substitute in RULES: + sequence = pattern.sub(substitute, sequence) + return sequence + + + class UnitexProcessor(object): def __init__(self, config): self.__options = None self.__persisted_objects = None - self.__working_directory = None + + self.__txt = None + self.__snt = None + self.__dir = None self.init(config) @@ -98,12 +118,211 @@ class UnitexProcessor(object): free_persistent_alphabet(_object) def clean(self): - if self.__working_directory is None: + if self.__txt is None: + _LOGGER.error("Unable to clean processor. No file opened!") return - rmdir(self.__working_directory) + + if self.__options["virtualization"] is True: + if self.__dir is not None: + for vf in ls("%s%s" % (UnitexConstants.VFS_PREFIX, self.__dir)): + rm(vf) + rm(self.__snt) + rm(self.__txt) + else: + rmdir(self.__dir) + rm(self.__snt) + + def _normalize(self): + kwargs = self.__options["tools"]["normalize"] + + ret = normalize(self.__txt, **kwargs) + if ret is False: + raise UnitexException("Text normalization failed!") + + def _segment(self): + grammar = self.__options["resources"]["sentence"] + if grammar is None: + raise UnitexException("Unable to segment text. No sentence grammar provided.") + + alphabet = self.__options["resources"]["alphabet"] + if alphabet is None: + raise UnitexException("Unable to segment text. No alphabet file provided.") + + kwargs = {} + kwargs["start_on_space"] = self.__options["tools"]["fst2txt"]["start_on_space"] + kwargs["char_by_char"] = self.__options["tools"]["fst2txt"]["char_by_char"] + kwargs["merge"] = True + + ret = fst2txt(grammar, self.__snt, alphabet, **kwargs) + if ret is False: + raise UnitexException("Text segmentation failed!") + + def _replace(self): + grammar = self.__options["resources"]["replace"] + if grammar is None: + raise UnitexException("Unable to normalize text. No replace grammar provided.") + + alphabet = self.__options["resources"]["alphabet"] + if alphabet is None: + raise UnitexException("Unable to normalize text. No alphabet file provided.") + + kwargs = {} + kwargs["start_on_space"] = self.__options["tools"]["fst2txt"]["start_on_space"] + kwargs["char_by_char"] = self.__options["tools"]["fst2txt"]["char_by_char"] + kwargs["merge"] = False + + ret = fst2txt(grammar, self.__snt, alphabet, **kwargs) + if ret is False: + raise UnitexException("Text normalization failed!") + + def _tokenize(self): + alphabet = self.__options["resources"]["alphabet"] + if alphabet is None: + raise UnitexException("Unable to tokenize text. No alphabet file provided.") + + kwargs = self.__options["tools"]["tokenize"] + + ret = tokenize(self.__snt, alphabet, **kwargs) + + def _lexicalize(self): + dictionaries = self.__options["resources"]["dictionaries"] + if not dictionaries: + raise UnitexException("Unable to lexicalize text. No dictionaries provided.") + + alphabet = self.__options["resources"]["alphabet"] + if alphabet is None: + raise UnitexException("Unable to tokenize text. No alphabet file provided.") + + kwargs = self.__options["tools"]["dico"] + + ret = dico(dictionaries, self.__snt, alphabet, **kwargs) + if ret is False: + raise UnitexException("Text lexicalization failed!") + + def _locate(self, grammar, match_mode, output_mode): + alphabet = self.__options["resources"]["alphabet"] + if alphabet is None: + raise UnitexException("Unable to locate pattern. No alphabet file provided.") + + kwargs = {} + kwargs["morpho"] = self.__options["tools"]["locate"]["morpho"] + kwargs["start_on_space"] = self.__options["tools"]["locate"]["start_on_space"] + kwargs["char_by_char"] = self.__options["tools"]["locate"]["char_by_char"] + kwargs["korean"] = self.__options["tools"]["locate"]["korean"] + kwargs["arabic_rules"] = self.__options["tools"]["locate"]["arabic_rules"] + kwargs["negation_operator"] = self.__options["tools"]["locate"]["negation_operator"] + kwargs["stop_token_count"] = self.__options["tools"]["locate"]["stop_token_count"] + kwargs["protect_dic_chars"] = self.__options["tools"]["locate"]["protect_dic_chars"] + kwargs["variable"] = self.__options["tools"]["locate"]["variable"] + kwargs["variable_error"] = self.__options["tools"]["locate"]["variable_error"] + + kwargs["sntdir"] = None + kwargs["number_of_matches"] = None + kwargs["ambiguous_outputs"] = False + + if match_mode not in (UnitexConstants.MATCH_MODE_LONGEST, + UnitexConstants.MATCH_MODE_SHORTEST): + raise UnitexException("Wrong value for the 'match_mode' option. UnitexConstants.MATCH_MODE_X required.") + kwargs["match_mode"] = match_mode + + if output_mode not in (UnitexConstants.OUTPUT_MODE_IGNORE, + UnitexConstants.OUTPUT_MODE_MERGE, + UnitexConstants.OUTPUT_MODE_RELACE): + raise UnitexException("Wrong value for the 'output_mode' option. UnitexConstants.OUTPUT_MODE_X required.") + kwargs["output_mode"] = output_mode + + ret = locate(grammar, self.__snt, alphabet, **kwargs) + if ret is False: + raise UnitexException("Locate failed!") + + index = os.path.join(self.__dir, "concord.ind") + if self.__options["virtualization"] is True: + index = "%s%s" % (UnitexConstants.VFS_PREFIX, index) + + if exists(index) is False: + raise UnitexException("Locate failed! No index produced.") + return index + + def _concord(self, index, merge=False, output=None): + alphabet = self.__options["resources"]["alphabet"] + if alphabet is None: + raise UnitexException("Unable to build concordance. No alphabet file provided.") + + kwargs = {} + kwargs["font"] = None + kwargs["fontsize"] = None + kwargs["only_ambiguous"] = False + kwargs["left"] = "0" + kwargs["right"] = "0" + kwargs["sort"] = UnitexConstants.SORT_TEXT_ORDER + kwargs["script"] = None + kwargs["offsets"] = None + kwargs["unxmlize"] = None + kwargs["directory"] = None + kwargs["thai"] = self.__options["tools"]["concord"]["thai"] + + result = None + + if merge is True: + kwargs["format"] = UnitexConstants.FORMAT_MERGE + if output is None: + raise UnitexException("You must provide the output file path to use the merge option.") + kwargs["output"] = output + kwargs["only_matches"] = False + + result = output + + else: + kwargs["format"] = UnitexConstants.FORMAT_TEXT + kwargs["output"] = None + kwargs["only_matches"] = False + + result = os.path.join(self.__dir, "concord.txt") + if self.__options["virtualization"] is True: + index = "%s%s" % (UnitexConstants.VFS_PREFIX, result) + + ret = concord(index, alphabet, **kwargs) + if ret is False: + raise UnitexException("Concord failed!") + + if exists(result) is False: + raise UnitexException("Concord failed! No concordances produced.") + return result def open(self, path, mode="srtlf", tagged=False): - pass + directory, filename = os.path.split(path) + name, extension = os.path.splitext(filename) + + self.__txt = path + self.__snt = os.path.join(directory, "%s.snt" % name) + self.__dir = os.path.join(directory, "%s_snt" % name) + + if self.__options["virtualization"] is True: + txt = "%s%s" % (UnitexConstants.VFS_PREFIX, self.__txt) + cp(self.__txt, txt) + + self.__txt = txt + self.__snt = "%s%s" % (UnitexConstants.VFS_PREFIX, self.__snt) + + else: + if os.path.exists(self.__dir) is False: + mkdir(self.__dir) + elif "f" in mode: + rmdir(self.__dir) + mkdir(self.__dir) + + self._normalize() + + if tagged is False: + if "s" in mode: + self._segment() + if "r" in mode: + self._replace() + + if "t" in mode: + self._tokenize() + if "l" in mode: + self._lexicalize() def close(self, clean=True, free=False): if clean is True: @@ -112,8 +331,48 @@ class UnitexProcessor(object): if free is True: self.free() + self.__txt = None + self.__snt = None + self.__dir = None + def tag(self, grammar, output, **kwargs): - raise NotImplementedError + xml = kwargs.get("xml", False) + match_mode = kwargs.get("match_mode", UnitexConstants.MATCH_MODE_LONGEST) + output_mode = UnitexConstants.OUTPUT_MODE_MERGE + + index = self._locate(grammar, match_mode, output_mode) + + if xml is False: + self._concord(index, merge=True, output=output) + if exists(output) is False: + raise UnitexException("No tagged file produced!") + return True + + _output = os.path.join(self.__dir, "concord-merge-temp.txt") + if self.__options["virtualization"] is True: + _output = "%s%s" % (UnitexConstants.VFS_PREFIX, _output) + + self._concord(index, merge=True, output=_output) + if exists(_output) is False: + raise UnitexException("No (temporary) tagged file produced!") + + tagged = open(output, "w", encoding="utf-8") + tagged.write("<?xml version='1.0' encoding='UTF-8'?>\n") + tagged.write("<TAGFILE query='%s'>\n" % grammar) + + merged = UnitexFile() + merged.open(_output, "r") + content = merged.read() + merged.close() + + content = escape(content) + tagged.write(content) + + tagged.write("</TAGFILE>\n") + tagged.close() + rm(_output) + + return True def search(self, grammar, output, **kwargs): raise NotImplementedError