Source code for solthiruthi.dictionary

## -*- coding: utf-8 -*-
## (C) 2015 Muthiah Annamalai,
## 
from __future__ import print_function
import abc
import sys
import codecs
from pprint import pprint

from . import resources
from . import datastore

PYTHON3 = (sys.version[0] == '3')
    
# specify dictionary interface without specifying storage
[docs]class Dictionary: __metaclass__ = abc.ABCMeta
[docs] @abc.abstractmethod def add(self,word): return
[docs] @abc.abstractmethod def getWordsEndingWith(self,sfx): return
[docs] @abc.abstractmethod def hasWordsStartingWith(self,pfx): return
[docs] @abc.abstractmethod def getWordsStartingWith(self,pfx): return
[docs] @abc.abstractmethod def isWord(self,word): return
[docs] @abc.abstractmethod def getAllWords(self): return
[docs] @abc.abstractmethod def getDictionaryPath(self): return
[docs] def getSize(self): count = 0 for word in self.getAllWordsIterable(): count += 1 return count
[docs] def getAllWordsIterable(self): for word in self.getAllWords(): yield word return
[docs] def loadWordFile(self,pre_processor=None): filename = self.getDictionaryPath() # words will be loaded from the file into the Trie structure with codecs.open(filename,'r','utf-8') as fp: # 2-3 compatible for word in fp.readlines(): if pre_processor: self.add( pre_processor(word.strip()) ) else: self.add(word.strip()) return
[docs]class Agarathi(Dictionary): def __init__(self,dictionary_path,reverse=False): self.dictionary_path = dictionary_path self.Finalized = False self.reverse = reverse if reverse: self.store = datastore.RTrie() else: self.store = datastore.DTrie() return # delegate to store
[docs] def getWordsEndingWith(self,sfx): if not getattr(self.store,'getWordsEndingWith'): raise Exception("getWordsEndingWith is not an accessible method") return self.store.getWordsEndingWith(sfx)
# delegate to store
[docs] def getWordsStartingWith(self,pfx,limit=float("inf")): if not getattr(self.store,'getAllWordsPrefix'): raise Exception("getWordsStartingWith is not an accessible method") return self.store.getAllWordsPrefix(pfx)
[docs] def hasWordsStartingWith(self,pfx): if not getattr(self.store,'hasWordPrefix'): raise Exception("hasWordsStartingWith is not an accessible method") return self.store.hasWordPrefix(pfx)
[docs] def add(self,word): if self.Finalized: raise Exception("dictionary is finalized. cannot add more") self.store.add(word) return
[docs] def isWord(self,word): return self.store.isWord(word)
[docs] def finalize(self): self.Finalized = True
[docs] def getDictionaryPath(self): return self.dictionary_path
[docs] def getAllWords(self): return self.store.getAllWords()
[docs] def getAllWordsIterable(self): for word in self.store.getAllWordsIterable(): yield word return
def _reverse_dict(DictT): def function_reverse_dict_type(): obj = DictT() obj.reverse=True obj.store = datastore.RTrie() return obj return function_reverse_dict_type
[docs]class EmptyAgarathi(Agarathi): def __init__(self): Agarathi.__init__(self,dictionary_path=None)
[docs]class TamilVU(Agarathi): def __init__(self): Agarathi.__init__(self,resources.DICTIONARY_DATA_FILES['tamilvu'])
[docs]class EnglishLinux(Agarathi): # use lower case def __init__(self): Agarathi.__init__(self,resources.DICTIONARY_DATA_FILES['english'])
[docs] def isWord(self,word): return Agarathi.isWord(self,word.lower())
[docs] def add(self,word): return Agarathi.add(self,word.lower())
[docs]def reverse_TamilVU(): return _reverse_dict(TamilVU)()
[docs]class Madurai(Agarathi): def __init__(self): Agarathi.__init__(self,resources.DICTIONARY_DATA_FILES['projmad'])
[docs]def reverse_Madurai(): return _reverse_dict(Madurai)()
[docs]class Wikipedia(Agarathi): def __init__(self): Agarathi.__init__(self,resources.DICTIONARY_DATA_FILES['wikipedia'])
[docs]def reverse_Wikipedia(): return _reverse_dict(Wikipedia)()
# Methods for loading TamilVU, Wikipedia and Project Madurai cleaned up data
[docs]class DictionaryBuilder:
[docs] @staticmethod def create(DType): if not callable(DType): raise Exception(u"input @DType should be a class reference, or a factory function") obj = DType() obj.loadWordFile() return [obj,obj.getSize()]
[docs] @staticmethod def createUsingWordList(wlist): obj = EmptyAgarathi() for w in wlist: obj.add(w) return obj,obj.getSize()