> {-# OPTIONS_HADDOCK show-extensions #-}
> {-|
> Module    : LTK.Learn.SL
> Copyright : (c) 2019 Dakotah Lambert
> License   : MIT

> This module implements a string extension learner for the SL class.
>
> @since 0.3
> -}

> module LTK.Learn.SL (SLG(..), fSL) where

> import Data.Set (Set)
> import qualified Data.Set as Set

> import LTK.Factors
> import LTK.FSA
> import LTK.Learn.StringExt

> -- |Return the set of \(k\)-factors under successor in the given word.
> -- Factors are triples, where the first and last components are
> -- Booleans that indicate whether the factor is anchored at
> -- its head or tail, respectively, and the central component is
> -- the factor itself.
> -- If a word is short enough to not contain any \(k\)-factors,
> -- the entire word, appropriately anchored, is included in the set.
> fSL :: Ord a => Int -> [a] -> SLG a
> fSL :: Int -> [a] -> SLG a
fSL = Bool -> Int -> [a] -> SLG a
forall a. Ord a => Bool -> Int -> [a] -> SLG a
fSL' Bool
True

> fSL' :: Ord a => Bool -> Int -> [a] -> SLG a
> fSL' :: Bool -> Int -> [a] -> SLG a
fSL' Bool
h Int
k [a]
w
>     | [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (Int -> [a] -> [a]
forall a. Int -> [a] -> [a]
drop (Int
k' Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) [a]
w)  =  Int -> (Bool, [a], Bool) -> SLG a
forall a. Ord a => Int -> (Bool, [a], Bool) -> SLG a
mkSLG Int
k (Bool
h, [a]
w, Bool
True)
>     | Bool
otherwise               =  SLG a -> SLG a -> SLG a
forall (g :: * -> *) a. (Grammar g, Ord a) => g a -> g a -> g a
augmentG (Int -> (Bool, [a], Bool) -> SLG a
forall a. Ord a => Int -> (Bool, [a], Bool) -> SLG a
mkSLG Int
k (Bool
h, Int -> [a] -> [a]
forall a. Int -> [a] -> [a]
take Int
k' [a]
w, Bool
False)) (SLG a -> SLG a) -> SLG a -> SLG a
forall a b. (a -> b) -> a -> b
$
>                                  Bool -> Int -> [a] -> SLG a
forall a. Ord a => Bool -> Int -> [a] -> SLG a
fSL' Bool
False Int
k [a]
w'
>     where k' :: Int
k' = if Bool
h then Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1 else Int
k
>           w' :: [a]
w' = if Bool
h then [a]
w else Int -> [a] -> [a]
forall a. Int -> [a] -> [a]
drop Int
1 [a]
w

> -- |A representation of an SL grammar.
> data SLG a = SLG { SLG a -> Set a
slgAlpha :: Set a
>                  , SLG a -> Int
slgK :: Int
>                  , SLG a -> Set (Bool, [a], Bool)
slg :: Set (Bool, [a], Bool)
>                  }
>              deriving (SLG a -> SLG a -> Bool
(SLG a -> SLG a -> Bool) -> (SLG a -> SLG a -> Bool) -> Eq (SLG a)
forall a. Eq a => SLG a -> SLG a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: SLG a -> SLG a -> Bool
$c/= :: forall a. Eq a => SLG a -> SLG a -> Bool
== :: SLG a -> SLG a -> Bool
$c== :: forall a. Eq a => SLG a -> SLG a -> Bool
Eq, Eq (SLG a)
Eq (SLG a)
-> (SLG a -> SLG a -> Ordering)
-> (SLG a -> SLG a -> Bool)
-> (SLG a -> SLG a -> Bool)
-> (SLG a -> SLG a -> Bool)
-> (SLG a -> SLG a -> Bool)
-> (SLG a -> SLG a -> SLG a)
-> (SLG a -> SLG a -> SLG a)
-> Ord (SLG a)
SLG a -> SLG a -> Bool
SLG a -> SLG a -> Ordering
SLG a -> SLG a -> SLG a
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
forall a. Ord a => Eq (SLG a)
forall a. Ord a => SLG a -> SLG a -> Bool
forall a. Ord a => SLG a -> SLG a -> Ordering
forall a. Ord a => SLG a -> SLG a -> SLG a
min :: SLG a -> SLG a -> SLG a
$cmin :: forall a. Ord a => SLG a -> SLG a -> SLG a
max :: SLG a -> SLG a -> SLG a
$cmax :: forall a. Ord a => SLG a -> SLG a -> SLG a
>= :: SLG a -> SLG a -> Bool
$c>= :: forall a. Ord a => SLG a -> SLG a -> Bool
> :: SLG a -> SLG a -> Bool
$c> :: forall a. Ord a => SLG a -> SLG a -> Bool
<= :: SLG a -> SLG a -> Bool
$c<= :: forall a. Ord a => SLG a -> SLG a -> Bool
< :: SLG a -> SLG a -> Bool
$c< :: forall a. Ord a => SLG a -> SLG a -> Bool
compare :: SLG a -> SLG a -> Ordering
$ccompare :: forall a. Ord a => SLG a -> SLG a -> Ordering
$cp1Ord :: forall a. Ord a => Eq (SLG a)
Ord, ReadPrec [SLG a]
ReadPrec (SLG a)
Int -> ReadS (SLG a)
ReadS [SLG a]
(Int -> ReadS (SLG a))
-> ReadS [SLG a]
-> ReadPrec (SLG a)
-> ReadPrec [SLG a]
-> Read (SLG a)
forall a. (Read a, Ord a) => ReadPrec [SLG a]
forall a. (Read a, Ord a) => ReadPrec (SLG a)
forall a. (Read a, Ord a) => Int -> ReadS (SLG a)
forall a. (Read a, Ord a) => ReadS [SLG a]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [SLG a]
$creadListPrec :: forall a. (Read a, Ord a) => ReadPrec [SLG a]
readPrec :: ReadPrec (SLG a)
$creadPrec :: forall a. (Read a, Ord a) => ReadPrec (SLG a)
readList :: ReadS [SLG a]
$creadList :: forall a. (Read a, Ord a) => ReadS [SLG a]
readsPrec :: Int -> ReadS (SLG a)
$creadsPrec :: forall a. (Read a, Ord a) => Int -> ReadS (SLG a)
Read, Int -> SLG a -> ShowS
[SLG a] -> ShowS
SLG a -> String
(Int -> SLG a -> ShowS)
-> (SLG a -> String) -> ([SLG a] -> ShowS) -> Show (SLG a)
forall a. Show a => Int -> SLG a -> ShowS
forall a. Show a => [SLG a] -> ShowS
forall a. Show a => SLG a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [SLG a] -> ShowS
$cshowList :: forall a. Show a => [SLG a] -> ShowS
show :: SLG a -> String
$cshow :: forall a. Show a => SLG a -> String
showsPrec :: Int -> SLG a -> ShowS
$cshowsPrec :: forall a. Show a => Int -> SLG a -> ShowS
Show)

> mkSLG :: Ord a => Int -> (Bool, [a], Bool) -> SLG a
> mkSLG :: Int -> (Bool, [a], Bool) -> SLG a
mkSLG Int
k x :: (Bool, [a], Bool)
x@(Bool
_,[a]
b,Bool
_) = SLG :: forall a. Set a -> Int -> Set (Bool, [a], Bool) -> SLG a
SLG { slgAlpha :: Set a
slgAlpha  =  [a] -> Set a
forall a. Ord a => [a] -> Set a
Set.fromList [a]
b
>                         , slgK :: Int
slgK      =  Int
k
>                         , slg :: Set (Bool, [a], Bool)
slg       =  (Bool, [a], Bool) -> Set (Bool, [a], Bool)
forall c a. Container c a => a -> c
singleton (Bool, [a], Bool)
x
>                         }

> instance HasAlphabet SLG
>     where alphabet :: SLG e -> Set e
alphabet = SLG e -> Set e
forall e. SLG e -> Set e
slgAlpha

> instance Grammar SLG
>     where emptyG :: SLG a
emptyG = Set a -> Int -> Set (Bool, [a], Bool) -> SLG a
forall a. Set a -> Int -> Set (Bool, [a], Bool) -> SLG a
SLG Set a
forall c a. Container c a => c
empty Int
0 Set (Bool, [a], Bool)
forall c a. Container c a => c
empty
>           augmentG :: SLG a -> SLG a -> SLG a
augmentG SLG a
g1 SLG a
g2
>               = SLG :: forall a. Set a -> Int -> Set (Bool, [a], Bool) -> SLG a
SLG { slgAlpha :: Set a
slgAlpha = Set a -> Set a -> Set a
forall c a. Container c a => c -> c -> c
union (SLG a -> Set a
forall (g :: * -> *) e. HasAlphabet g => g e -> Set e
alphabet SLG a
g1) (SLG a -> Set a
forall (g :: * -> *) e. HasAlphabet g => g e -> Set e
alphabet SLG a
g2)
>                     , slgK :: Int
slgK = Int -> Int -> Int
forall a. Ord a => a -> a -> a
max (SLG a -> Int
forall a. SLG a -> Int
slgK SLG a
g1) (SLG a -> Int
forall a. SLG a -> Int
slgK SLG a
g2)
>                     , slg :: Set (Bool, [a], Bool)
slg = Set (Bool, [a], Bool)
-> Set (Bool, [a], Bool) -> Set (Bool, [a], Bool)
forall c a. Container c a => c -> c -> c
union (SLG a -> Set (Bool, [a], Bool)
forall a. SLG a -> Set (Bool, [a], Bool)
slg SLG a
g1) (SLG a -> Set (Bool, [a], Bool)
forall a. SLG a -> Set (Bool, [a], Bool)
slg SLG a
g2)
>                     }
>           isSubGOf :: SLG a -> SLG a -> Bool
isSubGOf SLG a
g1 SLG a
g2 = Set (Bool, [a], Bool) -> Set (Bool, [a], Bool) -> Bool
forall c a. (Container c a, Eq a) => c -> c -> Bool
isSubsetOf (SLG a -> Set (Bool, [a], Bool)
forall a. SLG a -> Set (Bool, [a], Bool)
slg SLG a
g1) (SLG a -> Set (Bool, [a], Bool)
forall a. SLG a -> Set (Bool, [a], Bool)
slg SLG a
g2)
>           genFSA :: SLG a -> FSA Integer a
genFSA SLG a
g = FSA Integer a -> FSA Integer a
forall e. Ord e => FSA Integer e -> FSA Integer e
n (FSA Integer a -> FSA Integer a)
-> (Set (Bool, [a], Bool) -> FSA Integer a)
-> Set (Bool, [a], Bool)
-> FSA Integer a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [FSA Integer a] -> FSA Integer a
forall n e.
(Enum n, Ord n, NFData n, Ord e, NFData e) =>
[FSA n e] -> FSA n e
flatIntersection ([FSA Integer a] -> FSA Integer a)
-> (Set (Bool, [a], Bool) -> [FSA Integer a])
-> Set (Bool, [a], Bool)
-> FSA Integer a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (FSA Integer a
free FSA Integer a -> [FSA Integer a] -> [FSA Integer a]
forall a. a -> [a] -> [a]
:)
>                      ([FSA Integer a] -> [FSA Integer a])
-> (Set (Bool, [a], Bool) -> [FSA Integer a])
-> Set (Bool, [a], Bool)
-> [FSA Integer a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Bool, [a], Bool) -> FSA Integer a)
-> [(Bool, [a], Bool)] -> [FSA Integer a]
forall a b. (a -> b) -> [a] -> [b]
map (Set a -> Literal a -> FSA Integer a
forall n e. (Enum n, Ord n, Ord e) => Set e -> Literal e -> FSA n e
buildLiteral (SLG a -> Set a
forall (g :: * -> *) e. HasAlphabet g => g e -> Set e
alphabet SLG a
g) (Literal a -> FSA Integer a)
-> ((Bool, [a], Bool) -> Literal a)
-> (Bool, [a], Bool)
-> FSA Integer a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Factor a -> Literal a
forall e. Factor e -> Literal e
forbidden (Factor a -> Literal a)
-> ((Bool, [a], Bool) -> Factor a)
-> (Bool, [a], Bool)
-> Literal a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Bool, [a], Bool) -> Factor a
forall a. Ord a => (Bool, [a], Bool) -> Factor a
f)
>                      ([(Bool, [a], Bool)] -> [FSA Integer a])
-> (Set (Bool, [a], Bool) -> [(Bool, [a], Bool)])
-> Set (Bool, [a], Bool)
-> [FSA Integer a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Set (Bool, [a], Bool) -> [(Bool, [a], Bool)]
forall a. Set a -> [a]
Set.toList (Set (Bool, [a], Bool) -> FSA Integer a)
-> Set (Bool, [a], Bool) -> FSA Integer a
forall a b. (a -> b) -> a -> b
$ SLG a -> Set (Bool, [a], Bool)
forall a. Ord a => SLG a -> Set (Bool, [a], Bool)
complG SLG a
g
>               where f :: (Bool, [a], Bool) -> Factor a
f (Bool
h, [a]
b, Bool
t) = [Set a] -> Bool -> Bool -> Factor a
forall e. [Set e] -> Bool -> Bool -> Factor e
Substring ((a -> Set a) -> [a] -> [Set a]
forall a b. (a -> b) -> [a] -> [b]
map a -> Set a
forall c a. Container c a => a -> c
singleton [a]
b) Bool
h Bool
t
>                     n :: FSA Integer e -> FSA Integer e
n FSA Integer e
x = FSA Integer e -> FSA Integer e
forall e n. (Ord e, Ord n) => FSA n e -> FSA Integer e
normalize FSA Integer e
x FSA Integer e -> FSA Integer e -> FSA Integer e
forall a. a -> a -> a
`asTypeOf` FSA Integer e
x
>                     free :: FSA Integer a
free = Set a -> FSA Integer a
forall e n. (Ord e, Enum n, Ord n) => Set e -> FSA n e
totalWithAlphabet (Set a -> FSA Integer a) -> Set a -> FSA Integer a
forall a b. (a -> b) -> a -> b
$ SLG a -> Set a
forall (g :: * -> *) e. HasAlphabet g => g e -> Set e
alphabet SLG a
g

> complG :: Ord a => SLG a -> Set (Bool, [a], Bool)
> complG :: SLG a -> Set (Bool, [a], Bool)
complG SLG a
g = Set (Bool, [a], Bool)
-> Set (Bool, [a], Bool) -> Set (Bool, [a], Bool)
forall c a. (Container c a, Eq a) => c -> c -> c
difference (Int -> Set a -> Set (Bool, [a], Bool)
forall a. Ord a => Int -> Set a -> Set (Bool, [a], Bool)
allFs (SLG a -> Int
forall a. SLG a -> Int
slgK SLG a
g) (SLG a -> Set a
forall (g :: * -> *) e. HasAlphabet g => g e -> Set e
alphabet SLG a
g)) (SLG a -> Set (Bool, [a], Bool)
forall a. SLG a -> Set (Bool, [a], Bool)
slg SLG a
g)

> astrings :: Int -> [a] -> [(Bool, [a], Bool)]
> astrings :: Int -> [a] -> [(Bool, [a], Bool)]
astrings Int
k = ([a] -> [(Bool, [a], Bool)]) -> [[a]] -> [(Bool, [a], Bool)]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap [a] -> [(Bool, [a], Bool)]
forall (t :: * -> *) a. Foldable t => t a -> [(Bool, t a, Bool)]
f ([[a]] -> [(Bool, [a], Bool)])
-> ([a] -> [[a]]) -> [a] -> [(Bool, [a], Bool)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([a] -> Bool) -> [[a]] -> [[a]]
forall a. (a -> Bool) -> [a] -> [a]
takeWhile ((Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
k) (Int -> Bool) -> ([a] -> Int) -> [a] -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length) ([[a]] -> [[a]]) -> ([a] -> [[a]]) -> [a] -> [[a]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> [[a]]
forall a. [a] -> [[a]]
sequencesOver
>     where f :: t a -> [(Bool, t a, Bool)]
f t a
s = case Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (t a -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length t a
s) (Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
>                 of Ordering
LT -> [(Bool
True, t a
s, Bool
True)]
>                    Ordering
EQ -> [(Bool
True, t a
s, Bool
False), (Bool
False, t a
s, Bool
True)]
>                    Ordering
GT -> [(Bool
False, t a
s, Bool
False)]

> -- |All possible factors of width \(k\) under adjacency,
> -- as well as shorter fully-anchored factors.
> allFs :: Ord a => Int -> Set a -> Set (Bool, [a], Bool)
> allFs :: Int -> Set a -> Set (Bool, [a], Bool)
allFs Int
k = [(Bool, [a], Bool)] -> Set (Bool, [a], Bool)
forall a. Ord a => [a] -> Set a
Set.fromList ([(Bool, [a], Bool)] -> Set (Bool, [a], Bool))
-> (Set a -> [(Bool, [a], Bool)]) -> Set a -> Set (Bool, [a], Bool)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> [a] -> [(Bool, [a], Bool)]
forall a. Int -> [a] -> [(Bool, [a], Bool)]
astrings Int
k ([a] -> [(Bool, [a], Bool)])
-> (Set a -> [a]) -> Set a -> [(Bool, [a], Bool)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Set a -> [a]
forall a. Set a -> [a]
Set.toList