> {-# OPTIONS_HADDOCK show-extensions #-}
> {-|
> Module    : LTK.Learn.SP
> Copyright : (c) 2019-2020,2023 Dakotah Lambert
> License   : MIT

> This module implements a string extension learner for the SP class.
>
> @since 0.3
> -}

> module LTK.Learn.SP (SPG, fSP) where

> import Data.Set (Set)
> import qualified Data.Set as Set

> import LTK.Factors
> import LTK.FSA
> import LTK.Learn.StringExt

When gathering subsequences of words to build a positive grammar,
we should keep in mind that if a given subsequence is considered
acceptable, the definition of SP guarantees that in turn all of
its subsequences are also acceptable.  Therefore unlike for SL, it
makes sense to also gather the factors of width less than \(k\)
when generating a grammar from positive data.

> -- |Return the set of factors under precedence of length \(k\) or less
> -- in the given word.
> fSP :: Ord a => Int -> [a] -> SPG a
> fSP :: forall a. Ord a => Int -> [a] -> SPG a
fSP Int
k = forall {a}. (Set a, Set [a]) -> SPG a
f forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Ord a => Bool -> Int -> [a] -> (Set a, Set [a])
fSP' Bool
True Int
k
>     where f :: (Set a, Set [a]) -> SPG a
f (Set a
s, Set [a]
g) = SPG { spgAlpha :: Set a
spgAlpha  =  Set a
s
>                          , spgK :: Int
spgK      =  Int
k
>                          , spg :: Set [a]
spg       =  Set [a]
g
>                          }

> -- |Auxiliary function to gather subsequences.
> -- If the first argument is True,
> -- gather those of length less than or equal to \(k\).
> -- Otherwise, only gather those of length exactly \(k\).
> fSP' :: Ord a => Bool -> Int -> [a] -> (Set a, Set [a])
> fSP' :: forall a. Ord a => Bool -> Int -> [a] -> (Set a, Set [a])
fSP' Bool
lt Int
k = forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr forall {a}. Ord a => [a] -> (Set a, Set [a]) -> (Set a, Set [a])
g (forall c a. Container c a => c
empty, forall c a. Container c a => c
empty) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [[a]]
ssqs
>     where f :: [a] -> Bool
f = if Bool
lt then forall (t :: * -> *) a. Foldable t => t a -> Bool
null forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Int -> [a] -> [a]
drop Int
k else forall a. Eq a => a -> a -> Bool
(==) Int
k forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a. Foldable t => t a -> Int
length
>           g :: [a] -> (Set a, Set [a]) -> (Set a, Set [a])
g [a]
x (Set a
xs, Set [a]
ys)
>             = ( case [a]
x
>                 of [a
s]  -> forall a. Ord a => a -> Set a -> Set a
Set.insert a
s Set a
xs
>                    [a]
_    -> Set a
xs
>               , (if forall {a}. [a] -> Bool
f [a]
x then forall a. Ord a => a -> Set a -> Set a
Set.insert [a]
x else forall a. a -> a
id) Set [a]
ys
>               )

> -- |A representation of an SP grammar.
> data SPG a = SPG { forall a. SPG a -> Set a
spgAlpha :: Set a
>                  , forall a. SPG a -> Int
spgK :: Int
>                  , forall a. SPG a -> Set [a]
spg :: Set [a]
>                  }
>              deriving (SPG a -> SPG a -> Bool
forall a. Eq a => SPG a -> SPG a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: SPG a -> SPG a -> Bool
$c/= :: forall a. Eq a => SPG a -> SPG a -> Bool
== :: SPG a -> SPG a -> Bool
$c== :: forall a. Eq a => SPG a -> SPG a -> Bool
Eq, SPG a -> SPG a -> Bool
SPG a -> SPG a -> Ordering
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
forall {a}. Ord a => Eq (SPG a)
forall a. Ord a => SPG a -> SPG a -> Bool
forall a. Ord a => SPG a -> SPG a -> Ordering
forall a. Ord a => SPG a -> SPG a -> SPG a
min :: SPG a -> SPG a -> SPG a
$cmin :: forall a. Ord a => SPG a -> SPG a -> SPG a
max :: SPG a -> SPG a -> SPG a
$cmax :: forall a. Ord a => SPG a -> SPG a -> SPG a
>= :: SPG a -> SPG a -> Bool
$c>= :: forall a. Ord a => SPG a -> SPG a -> Bool
> :: SPG a -> SPG a -> Bool
$c> :: forall a. Ord a => SPG a -> SPG a -> Bool
<= :: SPG a -> SPG a -> Bool
$c<= :: forall a. Ord a => SPG a -> SPG a -> Bool
< :: SPG a -> SPG a -> Bool
$c< :: forall a. Ord a => SPG a -> SPG a -> Bool
compare :: SPG a -> SPG a -> Ordering
$ccompare :: forall a. Ord a => SPG a -> SPG a -> Ordering
Ord, ReadPrec [SPG a]
ReadPrec (SPG a)
ReadS [SPG a]
forall a. (Read a, Ord a) => ReadPrec [SPG a]
forall a. (Read a, Ord a) => ReadPrec (SPG a)
forall a. (Read a, Ord a) => Int -> ReadS (SPG a)
forall a. (Read a, Ord a) => ReadS [SPG a]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [SPG a]
$creadListPrec :: forall a. (Read a, Ord a) => ReadPrec [SPG a]
readPrec :: ReadPrec (SPG a)
$creadPrec :: forall a. (Read a, Ord a) => ReadPrec (SPG a)
readList :: ReadS [SPG a]
$creadList :: forall a. (Read a, Ord a) => ReadS [SPG a]
readsPrec :: Int -> ReadS (SPG a)
$creadsPrec :: forall a. (Read a, Ord a) => Int -> ReadS (SPG a)
Read, Int -> SPG a -> ShowS
forall a. Show a => Int -> SPG a -> ShowS
forall a. Show a => [SPG a] -> ShowS
forall a. Show a => SPG a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [SPG a] -> ShowS
$cshowList :: forall a. Show a => [SPG a] -> ShowS
show :: SPG a -> String
$cshow :: forall a. Show a => SPG a -> String
showsPrec :: Int -> SPG a -> ShowS
$cshowsPrec :: forall a. Show a => Int -> SPG a -> ShowS
Show)

> instance HasAlphabet SPG
>     where alphabet :: forall a. SPG a -> Set a
alphabet = forall a. SPG a -> Set a
spgAlpha

> instance Grammar SPG
>     where emptyG :: forall a. Ord a => SPG a
emptyG = forall a. Set a -> Int -> Set [a] -> SPG a
SPG forall c a. Container c a => c
empty Int
0 forall c a. Container c a => c
empty
>           augmentG :: forall a. Ord a => SPG a -> SPG a -> SPG a
augmentG SPG a
g1 SPG a
g2
>               = SPG { spgAlpha :: Set a
spgAlpha = forall (g :: * -> *) e. HasAlphabet g => g e -> Set e
alphabet SPG a
g1 forall c a. Container c a => c -> c -> c
`union` forall (g :: * -> *) e. HasAlphabet g => g e -> Set e
alphabet SPG a
g2
>                     , spgK :: Int
spgK = forall a. Ord a => a -> a -> a
max (forall a. SPG a -> Int
spgK SPG a
g1) (forall a. SPG a -> Int
spgK SPG a
g2)
>                     , spg :: Set [a]
spg = forall a. SPG a -> Set [a]
spg SPG a
g1 forall c a. Container c a => c -> c -> c
`union` forall a. SPG a -> Set [a]
spg SPG a
g2
>                     }
>           isSubGOf :: forall a. Ord a => SPG a -> SPG a -> Bool
isSubGOf SPG a
g1 SPG a
g2 = forall c a. (Container c a, Eq a) => c -> c -> Bool
isSubsetOf (forall a. SPG a -> Set [a]
spg SPG a
g1) (forall a. SPG a -> Set [a]
spg SPG a
g2)
>           genFSA :: forall a. (NFData a, Ord a) => SPG a -> FSA Integer a
genFSA SPG a
g = forall {e}. Ord e => FSA Integer e -> FSA Integer e
n forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall n e.
(Enum n, Ord n, NFData n, Ord e, NFData e) =>
[FSA n e] -> FSA n e
flatIntersection forall b c a. (b -> c) -> (a -> b) -> a -> c
. (FSA Integer a
free forall a. a -> [a] -> [a]
:) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
>                      forall a b. (a -> b) -> [a] -> [b]
map (forall n e. (Enum n, Ord n, Ord e) => Set e -> Literal e -> FSA n e
buildLiteral (forall (g :: * -> *) e. HasAlphabet g => g e -> Set e
alphabet SPG a
g) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall e. Factor e -> Literal e
forbidden forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> Factor a
f) forall b c a. (b -> c) -> (a -> b) -> a -> c
.
>                      forall a. Set a -> [a]
Set.toList forall a b. (a -> b) -> a -> b
$ forall a. Ord a => SPG a -> Set [a]
complG SPG a
g
>               where f :: [a] -> Factor a
f = forall e. [Set e] -> Factor e
Subsequence forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map forall c a. Container c a => a -> c
singleton
>                     n :: FSA Integer e -> FSA Integer e
n FSA Integer e
x = forall e n. (Ord e, Ord n) => FSA n e -> FSA Integer e
normalize FSA Integer e
x forall a. a -> a -> a
`asTypeOf` FSA Integer e
x
>                     free :: FSA Integer a
free = forall e n. (Ord e, Enum n, Ord n) => Set e -> FSA n e
totalWithAlphabet forall a b. (a -> b) -> a -> b
$ forall (g :: * -> *) e. HasAlphabet g => g e -> Set e
alphabet SPG a
g

> complG :: Ord a => SPG a -> Set [a]
> complG :: forall a. Ord a => SPG a -> Set [a]
complG SPG a
g = forall c a. (Container c a, Eq a) => c -> c -> c
difference (forall a. Ord a => Int -> Set a -> Set [a]
allFs (forall a. SPG a -> Int
spgK SPG a
g) (forall (g :: * -> *) e. HasAlphabet g => g e -> Set e
alphabet SPG a
g)) (forall a. SPG a -> Set [a]
spg SPG a
g)

> allFs :: Ord a => Int -> Set a -> Set [a]
> allFs :: forall a. Ord a => Int -> Set a -> Set [a]
allFs Int
k = forall a. Ord a => [a] -> Set a
Set.fromList forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
takeWhile ((forall a. Ord a => a -> a -> Bool
<= Int
k) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a. Foldable t => t a -> Int
length)
>           forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [[a]]
sequencesOver forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Set a -> [a]
Set.toList


Efficient subsequence finding for omega-words
=============================================

The @ssqs'@ function computes non-empty subsequences with multiplicity.
For example, the sequence "a" appears twice for "aba".
We then add in the empty subsequence for @ssqs@

> ssqs' :: [a] -> [[a]]
> ssqs' :: forall a. [a] -> [[a]]
ssqs' [] = []
> ssqs' (a
x:[a]
xs) = [a
x] forall a. a -> [a] -> [a]
: forall (c :: * -> *) a.
(Linearizable c, Container (c a) a) =>
c a -> c a -> c a
interleave (forall a b. (a -> b) -> [a] -> [b]
map (a
xforall a. a -> [a] -> [a]
:) [[a]]
ys) [[a]]
ys
>     where ys :: [[a]]
ys = forall a. [a] -> [[a]]
ssqs' [a]
xs

> ssqs :: [a] -> [[a]]
> ssqs :: forall a. [a] -> [[a]]
ssqs = ([]forall a. a -> [a] -> [a]
:) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [[a]]
ssqs'