> {-# OPTIONS_HADDOCK show-extensions #-}
> {-|
> Module    : LTK.Learn.StringExt
> Copyright : (c) 2019-2020 Dakotah Lambert
> License   : MIT
> 
> A generic implementation of Jeff Heinz's "String Extension Learning".
> For details, see https://www.aclweb.org/anthology/P10-1092
>
> @since 0.3
> -}
> module LTK.Learn.StringExt ( Grammar(..)
>                            , learn
>                            , augmentSE
>                            , isRecognizedBy
>                            ) where

> import Control.DeepSeq (NFData)

> import LTK.Containers
> import LTK.FSA (FSA)

> -- |Construct a grammar from a finite initial segment of a positive text.
> -- The function argument \(f\) should be such that
> -- if \(G=f(L)\) and \(f(w)\in G\) then \(w\in L\).
> -- In order to be able to learn the empty set,
> -- the text argument allows for time points at which
> -- no data is provided.
> learn :: (Grammar g, Ord b, Collapsible s) =>
>          (a -> g b) -> s (Maybe a) -> g b
> learn f = augmentSE f emptyG

> -- |Add more data to a grammar generated by @learn@.
> augmentSE :: (Grammar g, Ord b, Collapsible s) =>
>              (a -> g b) -> g b -> s (Maybe a) -> g b
> augmentSE f = collapse (augmentG . maybe emptyG f)

> -- |True iff the given object is in the set represented by the given grammar.
> isRecognizedBy :: (Grammar g, Ord b) => (a -> g b) -> g b -> a -> Bool
> isRecognizedBy f g = isSubGOf g . f

> -- |A grammar is a representation of a mechanism
> -- by which one can determine
> -- whether or not a given object is in a given set.
> class Grammar g
>     where genFSA :: (NFData a, Ord a) => g a -> FSA Integer a
>           augmentG :: Ord a => g a -> g a -> g a
>           isSubGOf :: Ord a => g a -> g a -> Bool
>           emptyG :: Ord a => g a
