solthiruthi package

Submodules

solthiruthi.Ezhimai module

class solthiruthi.Ezhimai.PattiyalThiruthi(option)

Bases: solthiruthi.WordSpeller.ISpeller

static loadWordFile(filename)
process_word(word)

solthiruthi.WordSpeller module

class solthiruthi.WordSpeller.ISpeller

Bases: object

get_return_obj(word)
process_word(word)

solthiruthi.data_parser module

class solthiruthi.data_parser.DataParser(files)

Bases: object

analysis()
parse_data(filename)
process()
static run(args)
class solthiruthi.data_parser.WordList(cat)

Bases: object

add(word)

solthiruthi.datastore module

class solthiruthi.datastore.DTrie

Bases: solthiruthi.datastore.Trie

trie where number of alphabets at each nodes grows with time; implementation uses a dictionary; it contains an attribute count for frequency of letter.

add(word)
static buildEnglishTrie(nalpha=26)
getAllWords()
getAllWordsAndCount()
getAllWordsHelper(ref_trie, prefix, all_words)
getAllWordsIterable()
getAllWordsIterableHelper(ref_trie, prefix)
getAllWordsPrefix(prefix)
getWordCount(word)
get_letters(word)
hasWordPrefix(wrd_prefix)
isWord(word, ret_ref_trie=False)

return a boolean as first output, and second output will be the reference trie

isWordAndTrie(word, prefix=False)
class solthiruthi.datastore.Node

Bases: object

class solthiruthi.datastore.Queue

Bases: list

ExceptionMsg = 'Queue does not support list method %s'
append(object) → None -- append object to end
insert(obj)

L.insert(index, object) -- insert object before index

isempty()
peek()

look at next imminent item

remove(value) → None -- remove first occurrence of value.

Raises ValueError if the value is not present.

reverse()

L.reverse() -- reverse IN PLACE

sort(key=None, reverse=False) → None -- stable sort *IN PLACE*
class solthiruthi.datastore.RTrie(is_tamil=False)

Bases: solthiruthi.datastore.DTrie

add(word)
getAllWordsIterable()
getAllWordsPrefix(pfx)
getWordsEndingWith(sfx)
reverse(word)
class solthiruthi.datastore.TamilTrie(get_idx=<function getidx>, invert_idx=<function tamil>, alphabet_len=323)

Bases: solthiruthi.datastore.Trie

Store a list of words into the Trie data structure

add(word)
static buildEnglishTrie(nalpha=26)
getAllWords()
getAllWordsHelper(ref_trie, ref_word_limits, prefix, all_words)
getAllWordsIterable()
getAllWordsPrefix(prefix)
get_letters(word)
hasWordPrefix(prefix)
isWord(word, ret_ref_trie=False)

return a boolean as first output, and second output will be the reference trie

class solthiruthi.datastore.Trie

Bases: object

add(word)
static deserializeFromFile(filename)
getAllWords()
getAllWordsIterable()
getAllWordsPrefix(prefix)
hasWordPrefix(prefix)
isWord(word, ret_ref_trie=False)

return a boolean as first output, and second output will be the reference trie

loadWordFile(filename)
static mk_empty_trie(alpha_len)
static serializeToFile(obj, filename)
solthiruthi.datastore.do_load()

4 GB program - very inefficient

solthiruthi.datastore.do_stuff()

solthiruthi.dictionary module

class solthiruthi.dictionary.Agarathi(dictionary_path, reverse=False)

Bases: solthiruthi.dictionary.Dictionary

add(word)
finalize()
getAllWords()
getAllWordsIterable()
getDictionaryPath()
getWordsEndingWith(sfx)
getWordsStartingWith(pfx, limit=inf)
hasWordsStartingWith(pfx)
isWord(word)
class solthiruthi.dictionary.Dictionary

Bases: object

add(word)
getAllWords()
getAllWordsIterable()
getDictionaryPath()
getSize()
getWordsEndingWith(sfx)
getWordsStartingWith(pfx)
hasWordsStartingWith(pfx)
isWord(word)
loadWordFile(pre_processor=None)
class solthiruthi.dictionary.DictionaryBuilder

Bases: object

static create(DType)
static createUsingWordList(wlist)
class solthiruthi.dictionary.EmptyAgarathi

Bases: solthiruthi.dictionary.Agarathi

class solthiruthi.dictionary.EnglishLinux

Bases: solthiruthi.dictionary.Agarathi

add(word)
isWord(word)
class solthiruthi.dictionary.Madurai

Bases: solthiruthi.dictionary.Agarathi

class solthiruthi.dictionary.ParallelDictionary

Bases: solthiruthi.dictionary.Agarathi

getWordTranslation(word)
loadWordFile()
class solthiruthi.dictionary.SimpleDictionary(filename)

Bases: solthiruthi.dictionary.Dictionary

add(w)
getAllWords()
getDictionaryPath()
getWordsEndingWith(pfx)
getWordsStartingWith(pfx)
hasWordsStartingWith(pfx)
isWord(w)
loadWordFile()
class solthiruthi.dictionary.TamilVU

Bases: solthiruthi.dictionary.Agarathi

class solthiruthi.dictionary.Wikipedia

Bases: solthiruthi.dictionary.Agarathi

solthiruthi.dictionary.reverse_Madurai()
solthiruthi.dictionary.reverse_TamilVU()
solthiruthi.dictionary.reverse_Wikipedia()

solthiruthi.dom module

class solthiruthi.dom.Document(filename)

Bases: solthiruthi.datastore.Queue

open contents of a file on load

tokenize()
class solthiruthi.dom.Entity(word, flagged=False, **kwargs)

Bases: solthiruthi.dom.Position

getLetters()
isFlagged()
isWord()
class solthiruthi.dom.NonEntity(word, **kwargs)

Bases: solthiruthi.dom.Entity, solthiruthi.dom.Position

isWord()
class solthiruthi.dom.Position(row, col)

Bases: object

class solthiruthi.dom.WordEntity(word, **kwargs)

Bases: solthiruthi.dom.Entity

isWord()

solthiruthi.heuristics module

class solthiruthi.heuristics.AdjacentConsonants(freq=2)

Bases: solthiruthi.heuristics.Rule

donot allow adjacent consonants in the word. this may not be as useful as AdjacentVowels rules

agaram_letters = {'க', 'ங', 'ச', 'ஞ', 'ட', 'ண', 'த', 'ந', 'ன', 'ப', 'ம', 'ய', 'ர', 'ற', 'ல', 'ள', 'ழ', 'வ'}
apply(word, ctx=None)

ignore ctx information right now

mei_letters = {'க்', 'ங்', 'ச்', 'ஞ்', 'ட்', 'ண்', 'த்', 'ந்', 'ன்', 'ப்', 'ம்', 'ய்', 'ர்', 'ற்', 'ல்', 'ள்', 'ழ்', 'வ்'}
reason = 'ஒன்றைத்தொடர்ந்துஒன்று மெய் எழுத்துக்கள் வரக்கூடாது. இது பெரும்பாலும் பிழையாக இருக்கும்.'
class solthiruthi.heuristics.AdjacentVowels

Bases: solthiruthi.heuristics.Rule

donot allow adjacent vowels in the word. ஆஅக்காள் (originally -> அக்காள்) will be flagged

apply(word, ctx=None)

ignore ctx information right now

reason = 'ஒன்றைத்தொடர்ந்துஒன்று உயிரெழுத்துக்கள் வரக்கூடாது. இது பெரும்பாலும் பிழையாக இருக்கும்.'
uyir_letters = {'அ', 'ஆ', 'இ', 'ஈ', 'உ', 'ஊ', 'எ', 'ஏ', 'ஐ', 'ஒ', 'ஓ', 'ஔ'}
class solthiruthi.heuristics.BadIME

Bases: solthiruthi.heuristics.Rule

donot allow vowels with kombu, thunaikaal etc in the word. ஆாள் (originally intended as -> ஆள்) will be flagged

apply(word, ctx=None)

ignore ctx information right now

reason = 'சொல்லில் பிழை காரணம், இல்லாத தமிழ் எழுத்து..'
uyir_letters = {'அ', 'ஆ', 'இ', 'ஈ', 'உ', 'ஊ', 'எ', 'ஏ', 'ஐ', 'ஒ', 'ஓ', 'ஔ'}
class solthiruthi.heuristics.RepeatedLetters

Bases: solthiruthi.heuristics.Rule

donot allow more than one repetition of a letter in word

apply(word, ctx=None)

ignore ctx information right now

reason = 'ஒரே எழுத்து பல முரை (>= 2) தொடர்ச்சியாக வந்தால் அது பிழையான சொல் ஆகும்'
class solthiruthi.heuristics.Rule

Bases: object

apply(word, ctx)

@word is just that. @ctx is a dict of NwordsPrevious, NwordsNext, and a list of surrounding words for as items. e.g. ctx = {'NPrev' : 4, 'Prev' : [w1,w2,w3,w4],'NNext':2,'Next':[w1,w2]} return value should be boolean (False if error found) and an optional reason as second argument

class solthiruthi.heuristics.Sequential

Bases: object

static in_sequence(word, ref_set, ref_reason, freq_threshold=2)

ignore ctx information right now. If repetition/match length >= @freq_threshold then we flag-it

solthiruthi.heuristics.get_letters(word)

solthiruthi.morphology module

class solthiruthi.morphology.CaseFilter(*filter_obj_list)

Bases: object

apply(word_in)
class solthiruthi.morphology.RemoveCaseSuffix

Bases: solthiruthi.morphology.RemoveSuffix

apply(word)
setSuffixes()
class solthiruthi.morphology.RemoveHyphenatesNumberDate

Bases: solthiruthi.morphology.RemoveCaseSuffix

Done correctly (மேல்) 65536-மேல், ivan paritchayil இரண்டாவது, 2-வது

class solthiruthi.morphology.RemoveNegationSuffix

Bases: solthiruthi.morphology.RemoveCaseSuffix

setSuffixes()
class solthiruthi.morphology.RemovePluralSuffix

Bases: solthiruthi.morphology.RemoveSuffix

apply(word)
setSuffixes()
class solthiruthi.morphology.RemovePrefix

Bases: solthiruthi.morphology.RemoveSuffix

apply(word)
removePrefix(word)
setSuffixes()
class solthiruthi.morphology.RemoveSuffix

Bases: object

apply(word)
prepareSuffixes()
removeSuffix(word)
setSuffixes()
class solthiruthi.morphology.RemoveVerbSuffixTense

Bases: solthiruthi.morphology.RemoveCaseSuffix

setSuffixes()
solthiruthi.morphology.xkcd()

solthiruthi.resources module

solthiruthi.resources.get_data_categories()
solthiruthi.resources.get_data_dictionaries()
solthiruthi.resources.get_data_dir()
solthiruthi.resources.mk_path(srcfile)

solthiruthi.scoring module

class solthiruthi.scoring.NGStats

Bases: object

bigram_score(letters)
load()
unigram_score(letters)
solthiruthi.scoring.bigram_scores(letters)
solthiruthi.scoring.unigram_score(letters)

solthiruthi.solthiruthi module

class solthiruthi.solthiruthi.Solthiruthi

Bases: object

static get_CLI_options(do_parse=True, DEBUG=False)

solthiruthi.suggestions module

solthiruthi.suggestions.kombu_suggestor()
solthiruthi.suggestions.mayangoli_suggestor()
Rules:

ண, ன - mayakkam ல, ழ, ள - mayakkam ர, ற - mayakkam

ivattrilum ithan uyirmei varisayilum mayakkangalai kaanalaam.
solthiruthi.suggestions.norvig_suggestor(word, alphabets=None, nedits=1, limit=inf)

solthiruthi.vinaisorkal module

class solthiruthi.vinaisorkal.VerbClass(classify, words)

Bases: object

class solthiruthi.vinaisorkal.VinaiSorkal

Bases: object

Doublets = <solthiruthi.vinaisorkal.VerbClass object>
IrregularVerbs = <solthiruthi.vinaisorkal.VerbClass object>
class solthiruthi.vinaisorkal.struct

Bases: object

static build(**kwargs)

Module contents