Source code for ngram.WordModels

# -*- coding: utf-8 -*-
# 
# (C) முத்தையா அண்ணாமலை 2015
# 
# N-gram language model for Tamil words

import tamil
[docs]def get_ngram_groups( word, n=1): # makes no sense - but can happen letters = tamil.utf8.get_letters( word ) L = len( letters ) if n >= L: return [word] if n == 1: return letters result = [] for i in range(0,L-n+1): result.append(u"".join(letters[i:min(i+n,L)])) return result