module Data.Text.KGrams where

import qualified Data.Text as T

textRemovePunc :: T.Text -> T.Text
textRemovePunc = T.map (\ch -> if ch `elem` ".?!-;\'\"" then ' ' else ch)

textNormalizedWords :: T.Text -> [T.Text]
textNormalizedWords = filter (not . T.null) . T.words . T.toLower . textRemovePunc

textKGrams :: Int -> T.Text -> [T.Text]
textKGrams size = concatMap (kgrams size) . textNormalizedWords

kgrams :: Int -> T.Text -> [T.Text]
kgrams size inp =
    let loop txt =
            if T.length txt < size
            then []
            else T.take size txt : loop (T.drop 1 txt)
    in loop inp