Source code for solthiruthi.dictionary
## -*- coding: utf-8 -*-
## (C) 2015 Muthiah Annamalai,
##
from __future__ import print_function
import abc
import sys
import codecs
from pprint import pprint
from . import resources
from . import datastore
PYTHON3 = (sys.version[0] == '3')
# specify dictionary interface without specifying storage
[docs]class Dictionary:
__metaclass__ = abc.ABCMeta
[docs] @abc.abstractmethod
def add(self,word):
return
[docs] @abc.abstractmethod
def getWordsEndingWith(self,sfx):
return
[docs] @abc.abstractmethod
def hasWordsStartingWith(self,pfx):
return
[docs] @abc.abstractmethod
def getWordsStartingWith(self,pfx):
return
[docs] @abc.abstractmethod
def isWord(self,word):
return
[docs] @abc.abstractmethod
def getAllWords(self):
return
[docs] @abc.abstractmethod
def getDictionaryPath(self):
return
[docs] def getSize(self):
count = 0
for word in self.getAllWordsIterable():
count += 1
return count
[docs] def getAllWordsIterable(self):
for word in self.getAllWords():
yield word
return
[docs] def loadWordFile(self,pre_processor=None):
filename = self.getDictionaryPath()
# words will be loaded from the file into the Trie structure
with codecs.open(filename,'r','utf-8') as fp:
# 2-3 compatible
for word in fp.readlines():
if pre_processor:
self.add( pre_processor(word.strip()) )
else:
self.add(word.strip())
return
[docs]class Agarathi(Dictionary):
def __init__(self,dictionary_path,reverse=False):
self.dictionary_path = dictionary_path
self.Finalized = False
self.reverse = reverse
if reverse:
self.store = datastore.RTrie()
else:
self.store = datastore.DTrie()
return
# delegate to store
[docs] def getWordsEndingWith(self,sfx):
if not getattr(self.store,'getWordsEndingWith'):
raise Exception("getWordsEndingWith is not an accessible method")
return self.store.getWordsEndingWith(sfx)
# delegate to store
[docs] def getWordsStartingWith(self,pfx,limit=float("inf")):
if not getattr(self.store,'getAllWordsPrefix'):
raise Exception("getWordsStartingWith is not an accessible method")
return self.store.getAllWordsPrefix(pfx)
[docs] def hasWordsStartingWith(self,pfx):
if not getattr(self.store,'hasWordPrefix'):
raise Exception("hasWordsStartingWith is not an accessible method")
return self.store.hasWordPrefix(pfx)
[docs] def add(self,word):
if self.Finalized:
raise Exception("dictionary is finalized. cannot add more")
self.store.add(word)
return
[docs] def isWord(self,word):
return self.store.isWord(word)
[docs] def finalize(self):
self.Finalized = True
[docs] def getDictionaryPath(self):
return self.dictionary_path
[docs] def getAllWords(self):
return self.store.getAllWords()
[docs] def getAllWordsIterable(self):
for word in self.store.getAllWordsIterable():
yield word
return
def _reverse_dict(DictT):
def function_reverse_dict_type():
obj = DictT()
obj.reverse=True
obj.store = datastore.RTrie()
return obj
return function_reverse_dict_type
[docs]class EmptyAgarathi(Agarathi):
def __init__(self):
Agarathi.__init__(self,dictionary_path=None)
[docs]class TamilVU(Agarathi):
def __init__(self):
Agarathi.__init__(self,resources.DICTIONARY_DATA_FILES['tamilvu'])
[docs]class EnglishLinux(Agarathi):
# use lower case
def __init__(self):
Agarathi.__init__(self,resources.DICTIONARY_DATA_FILES['english'])
[docs] def isWord(self,word):
return Agarathi.isWord(self,word.lower())
[docs] def add(self,word):
return Agarathi.add(self,word.lower())
[docs]def reverse_TamilVU():
return _reverse_dict(TamilVU)()
[docs]class Madurai(Agarathi):
def __init__(self):
Agarathi.__init__(self,resources.DICTIONARY_DATA_FILES['projmad'])
[docs]def reverse_Madurai():
return _reverse_dict(Madurai)()
[docs]class Wikipedia(Agarathi):
def __init__(self):
Agarathi.__init__(self,resources.DICTIONARY_DATA_FILES['wikipedia'])
[docs]def reverse_Wikipedia():
return _reverse_dict(Wikipedia)()
# Methods for loading TamilVU, Wikipedia and Project Madurai cleaned up data
[docs]class DictionaryBuilder:
[docs] @staticmethod
def create(DType):
if not callable(DType):
raise Exception(u"input @DType should be a class reference, or a factory function")
obj = DType()
obj.loadWordFile()
return [obj,obj.getSize()]
[docs] @staticmethod
def createUsingWordList(wlist):
obj = EmptyAgarathi()
for w in wlist:
obj.add(w)
return obj,obj.getSize()