tamil package

Submodules

tamil.date module

class tamil.date.BasicTamilTimeFormat[source]
static format(month, month_day, week_day, hour, minute, second)[source]
class tamil.date.DateUtils[source]
DAY = u'\u0ba8\u0bbe\u0bb3\u0bcd'
DAY_SUFFIX = u'\u0b95\u0bbf\u0bb4\u0bae\u0bc8'
HOUR = u'\u0bae\u0ba3\u0bbf'
MINUTE = u'\u0ba8\u0bbf\u0bae\u0bbf\u0b9f\u0bae\u0bcd'
MONTH = u'\u0bae\u0bbe\u0ba4\u0bae\u0bcd'
MONTHS = {u'April': u'\u0b8f\u0baa\u0bcd\u0bb0\u0bb2\u0bcd', u'August': u'\u0b86\u0b95\u0bb8\u0bcd\u0b9f\u0bcd', u'December': u'\u0b9f\u0bbf\u0b9a\u0bae\u0bcd\u0baa\u0bb0\u0bcd', u'February': u'\u0baa\u0bbf\u0baa\u0bcd\u0bb0\u0bb5\u0bb0\u0bbf', u'January': u'\u0b9c\u0ba9\u0bb5\u0bb0\u0bbf', u'July': u'\u0b9c\u0bc2\u0bb2\u0bc8', u'June': u'\u0b9c\u0bc2\u0ba9\u0bcd', u'March': u'\u0bae\u0bbe\u0bb0\u0bcd\u0b9a\u0bcd', u'May': u'\u0bae\u0bc7', u'November': u'\u0ba8\u0bb5\u0bae\u0bcd\u0baa\u0bb0\u0bcd', u'October': u'\u0b85\u0b95\u0bcd\u0b9f\u0bc7\u0bbe\u0baa\u0bb0\u0bcd', u'September': u'\u0b9a\u0bc6\u0baa\u0bcd\u0b9f\u0bae\u0bcd\u0baa\u0bb0\u0bcd'}
MONTHS_INDEX = [None, u'January', u'February', u'March', u'April', u'May', u'June', u'July', u'August', u'September', u'October', u'November', u'December']
TIME = u'\u0ba8\u0bc7\u0bb0\u0bae\u0bcd'
WEEK = u'\u0bb5\u0bbe\u0bb0\u0bae\u0bcd'
WEEKDAYS = {u'friday': u'\u0bb5\u0bc6\u0bb3\u0bcd\u0bb3\u0bbf', u'monday': u'\u0ba4\u0bbf\u0b99\u0bcd\u0b95\u0bb3\u0bcd', u'saturday': u'\u0b9a\u0ba9\u0bbf\u0b95\u0bcd\u0b95\u0bbf\u0bb4\u0bae\u0bc8', u'sunday': u'\u0b9e\u0bbe\u0baf\u0bbf\u0bb1\u0bc1', u'thursday': u'\u0bb5\u0bbf\u0baf\u0bbe\u0bb4\u0ba9\u0bcd', u'tuesday': u'\u0b9a\u0bc6\u0bb5\u0bcd\u0bb5\u0bbe\u0baf\u0bcd', u'wednesday': u'\u0baa\u0bc1\u0ba4\u0ba9\u0bcd'}
WEEKDAYS_INDEX = [u'monday', u'tuesday', u'wednesday', u'thursday', u'friday', u'saturday', u'sunday']
YEAR = u'\u0b86\u0ba3\u0bcd\u0b9f\u0bc1'
static get_hour_prefix()[source]
static get_time(fmt=None)[source]
static tamil_month()[source]
static tamil_weekday()[source]

tamil.iscii module

tamil.iscii.convert_to_unicode(tscii_input)[source]

convert a byte-ASCII encoded string into equivalent Unicode string in the UTF-8 notation.

tamil.iscii.print_table()[source]

tamil.numeral module

tamil.numeral.num2tamilstr(*args)[source]

work till one lakh crore - i.e 1e5*1e7 = 1e12. turn number into a numeral, Indian style. Fractions upto 1e-30

tamil.numeral.num2tamilstr_american(*args)[source]

tamil.regexp module

tamil.regexp.expand_sequence(start, end, seq)[source]
tamil.regexp.expand_tamil(start, end)[source]

expand uyir or mei-letter range etc. i.e. அ-ஔ gets converted to அ,ஆ,இ,ஈ,உ,ஊ,எ,ஏ,ஐ,ஒ,ஓ,ஔ etc.

tamil.regexp.is_containing_seq(start, end, seq)[source]
tamil.regexp.make_pattern(patt, flags=0)[source]

returns a compile regular expression object

tamil.regexp.match(patt, inputstr)[source]
tamil.regexp.search(patt, inputstr)[source]

tamil.tscii module

tamil.tscii.convert_to_unicode(tscii_input)[source]

convert a byte-ASCII encoded string into equivalent Unicode string in the UTF-8 notation.

tamil.tscii.print_table()[source]

tamil.tscii2utf8 module

tamil.tscii2utf8.usage()[source]

tamil.tweetparser module

class tamil.tweetparser.TamilTweetParser(timeline_owner, tweet)[source]

Bases: tamil.tweetparser.TweetParser

static cleanupPunct()[source]

NonEnglishOrTamilOr

static getTamilWords()[source]

” word needs to all be in the same tamil language

static isTamilPredicate()[source]

is Tamil word : boolean True/False

class tamil.tweetparser.TweetParser(timeline_owner, tweet)[source]
static getAttributeMT()[source]

see if tweet is a MT

static getAttributeRT()[source]

see if tweet is a RT

static getHashtags()[source]

return all hashtags

static getURLs()[source]

URL : [http://]?[w.?/]+

static getUserHandles()[source]

given a tweet we try and extract all user handles in order of occurrence

tamil.utf8 module

tamil.utf8.accent_len()[source]
tamil.utf8.agaram(idx)[source]
tamil.utf8.agaram_len()[source]
tamil.utf8.all_tamil(word_in)[source]

predicate checks if all letters of the input word are Tamil letters

tamil.utf8.ayudha_len()[source]
tamil.utf8.classify_letter(letter)[source]
tamil.utf8.compare_words_lexicographic(word_a, word_b)[source]

compare words in Tamil lexicographic order

tamil.utf8.get_letters(word)[source]

splits the word into a character-list of tamil/english characters present in the stream

tamil.utf8.get_letters_elementary(word)[source]
tamil.utf8.get_letters_elementary_iterable(word)[source]
tamil.utf8.get_letters_iterable(word)[source]

splits the word into a character-list of tamil/english characters present in the stream

tamil.utf8.get_tamil_words(letters)[source]

reverse a Tamil word according to letters, not unicode-points

tamil.utf8.get_words(letters, tamil_only=False)[source]
tamil.utf8.get_words_iterable(letters, tamil_only=False)[source]

given a list of UTF-8 letters section them into words, grouping them at spaces

tamil.utf8.getidx(letter)[source]
tamil.utf8.has_tamil(word)[source]

check if the word has any occurance of any tamil letter

tamil.utf8.is_normalized(text)[source]
tamil.utf8.is_tamil_unicode(sequence)[source]
tamil.utf8.is_tamil_unicode_predicate(x)
tamil.utf8.istamil(tchar)[source]

check if the letter tchar is prefix of any of tamil-letter. It suggests we have a tamil identifier

tamil.utf8.istamil_alnum(tchar)[source]

check if the character is alphanumeric, or tamil. This saves time from running through istamil() check.

tamil.utf8.istamil_prefix(word)[source]

check if the given word has a tamil prefix. Returns either a True/False flag

tamil.utf8.joinMeiUyir(mei_char, uyir_char)[source]

This function join mei character and uyir character, and retuns as compound uyirmei unicode character.

Inputs:
mei_char : It must be unicode tamil mei char. uyir_char : It must be unicode tamil uyir char.

Written By : Arulalan.T Date : 22.09.2014

tamil.utf8.letters_to_py(_letters)[source]

return list of letters e.g. uyir_letters as a Python list

tamil.utf8.mei(idx)[source]
tamil.utf8.mei_len()[source]
tamil.utf8.mei_to_agaram(in_syllable)[source]
tamil.utf8.print_tamil_words(tatext, use_frequencies=False)[source]
tamil.utf8.reverse_word(word)[source]

reverse a Tamil word according to letters not unicode-points

tamil.utf8.splitMeiUyir(uyirmei_char)[source]

This function split uyirmei compound character into mei + uyir characters and returns in tuple.

Input : It must be unicode tamil char.

Written By : Arulalan.T Date : 22.09.2014

tamil.utf8.tamil(idx)[source]

retrieve Tamil letter at canonical index from array utf8.tamil_letters

tamil.utf8.tamil_len()[source]
tamil.utf8.tamil_sorted(list_data)[source]
tamil.utf8.to_unicode_repr(_letter)[source]

helpful in situations where browser/app may recognize Unicode encoding in the u0b8e type syntax but not actual unicode glyph/code-point

tamil.utf8.unicode_normalize(cplxchar)[source]
tamil.utf8.uyir(idx)[source]
tamil.utf8.uyir_len()[source]
tamil.utf8.uyirmei(idx)[source]
tamil.utf8.uyirmei_constructed(mei_idx, uyir_idx)[source]

construct uyirmei letter give mei index and uyir index

tamil.utf8.uyirmei_len()[source]
tamil.utf8.word_intersection(word_a, word_b)[source]

return a list of tuples where word_a, word_b intersect

tamil.wordutils module

class tamil.wordutils.DictionaryFixedWordList(wlist)[source]

Bases: object

hasWordsStartingWith(pfx)[source]
isWord(word)[source]
class tamil.wordutils.DictionaryWithPredicate(isWord)

Bases: tuple

isWord

Alias for field number 0

tamil.wordutils.all_plaindromes(dictionary)[source]
tamil.wordutils.anagrams(word, dictionary, permutations=<function tamil_permutations>)[source]
tamil.wordutils.anagrams_in_dictionary(dictionary)[source]
tamil.wordutils.combinagrams(word, dictionary, limit=inf)[source]
tamil.wordutils.combinations(symbols_in)[source]
tamil.wordutils.default_true(*args)[source]
tamil.wordutils.greedy_split(inword, dictionary)[source]
tamil.wordutils.is_anagram(wordA, wordB)[source]
tamil.wordutils.is_palindrome(*args)[source]
tamil.wordutils.palindrome(symbols_in)[source]
tamil.wordutils.permutagrams(word, dictionary)[source]
tamil.wordutils.permutations(symbols, predicate=<function default_true>, prefix=u'')[source]
tamil.wordutils.rhymes_with(inword, reverse_dictionary)[source]
tamil.wordutils.tamil_permutations(inword)[source]
tamil.wordutils.word_split(inword, dictionary)[source]

Module contents