> {-# OPTIONS_HADDOCK show-extensions #-} > {-| > Module : LTK.Learn.StringExt > Copyright : (c) 2019 Dakotah Lambert > License : MIT > > A generic implementation of Jeff Heinz's "String Extension Learning". > For details, see https://www.aclweb.org/anthology/P10-1092 > > @since 0.3 > -} > module LTK.Learn.StringExt ( Grammar(..) > , learn > , augmentSE > , isRecognizedBy > ) where > import Control.DeepSeq (NFData) > import LTK.Containers > import LTK.FSA (FSA) > -- |Construct a grammar from a finite initial segment of a positive text. > -- The function argument \(f\) should be such that > -- if \(G=f(L)\) and \(f(w)\in G\) then \(w\in L\). > -- In order to be able to learn the empty set, > -- the text argument allows for time points at which > -- no data is provided. > learn :: (Grammar g, Ord b, Collapsible s) => > (a -> g b) -> s (Maybe a) -> g b > learn f = augmentSE f emptyG > -- |Add more data to a grammar generated by @learn@. > augmentSE :: (Grammar g, Ord b, Collapsible s) => > (a -> g b) -> g b -> s (Maybe a) -> g b > augmentSE f = collapse (augmentG . maybe emptyG f) > -- |True iff the given object is in the set represented by the given grammar. > isRecognizedBy :: (Grammar g, Ord b) => (a -> g b) -> g b -> a -> Bool > isRecognizedBy f g = isSubGOf g . f > -- |A grammar is a representation of a mechanism > -- by which one can determine > -- whether or not a given object is in a given set. > class Grammar g > where genFSA :: (NFData a, Ord a) => g a -> FSA Integer a > augmentG :: Ord a => g a -> g a -> g a > isSubGOf :: Ord a => g a -> g a -> Bool > emptyG :: Ord a => g a