Source code for solthiruthi.Ezhimai

## -*- coding: utf-8 -*-
## (C) 2015 Muthiah Annamalai
## 
from __future__ import print_function

import codecs
import os
import sys
from tamil import utf8
from pprint import pprint
from . import WordSpeller
from . import resources

PYTHON3 = sys.version > '3'

[docs]class PattiyalThiruthi(WordSpeller.ISpeller): def __init__(self,option): """ spell checker based on whitelist agarathi """ if option != 'std': raise Exception(u'unknown dictionary specified %s'%option) self.agarathi = PattiyalThiruthi.loadWordFile(resources.DICTIONARY_DATA_FILES[u'tamilvu'])
[docs] def process_word(self,word): # {'word':word,'is_error':False,'alternatives':None} rval = self.get_return_obj(word) if not ( word in self.agarathi ): rval['is_error'] = True # we don't provide alternatives %rval['alternatives'] return rval
[docs] @staticmethod def loadWordFile(filename): # words will be loaded from the file into the Trie structure with codecs.open(filename,'r','utf-8') as fp: data = map( lambda word: word.strip(), fp.readlines() ) if PYTHON3: return frozenset( data ) return set( data )
if __name__ == "__main__": obj = PattiyalThiruthi('std') from pprint import pprint in_words = u"டைட்டானிக் படத்தில் வரும் ஜேக் மற்றும் ரோஸ் போன்று தன் காதலை வெளிப்படுத்தும் இரு தவளைகள்".split() pprint( map( obj.process_word, in_words ) )