\documentclass[12pt,twoside]{article}

%include polycode.fmt

%format delta = "\delta"
%format delta_prime = delta "^\prime"
%format empty = "\varnothing"
%format Epsilon = "\varepsilon"
%format (FSA a b c d e) = "\Tup{Q," a "," b "," c "," d "}"
%format FSAt = "\textit{FSA}"
%format isSP' = isSP
%format isSSQ = "(\sqsubseteq)"
%format isIn (a) = \ _ -> _ "\in" a
%format isNotIn (a) = "\not\in" a
%format `isSSQ` = "\sqsubseteq"
%format `isNotSSQ` = "\not\sqsubseteq"
%format `isomorphic` = "\cong"
%format keep = filter
%format mergeBy f = merge "_{" f "}"
%format minimizeOver r = minimize "_{" r "}"
%format qf = "F"
%format q_0 = "q_0"
%format (Set (a)) = "\{" a "\}"
%format size (a) = "\left\|" a "\right\|"
%format isize (a) = "\left\|" a "\right\|"
%format sortBy f = sort "_{" f "}"
%format ssigma = "\Sigma"
%format subsequenceClosure' = subsequenceClosure
%format ForbiddenSubsequences e = Set [e]
%format tmap = "\textit{map}"
%format Transition a b c = "\Transition[" a "]{" b "}{" c "}"
%format `union` = "\cup"
%format unionAll = "\bigcup{}"
%format _ = "\chi"

\usepackage[letterpaper,margin=1in]{geometry}
\usepackage[T1]{fontenc}
\usepackage{newtxtext,newtxmath}\let\openbox\relax
\usepackage[english]{babel}
\usepackage{amsmath}
\usepackage{amsthm}
\usepackage{draftnote}\draftcp
\usepackage{mathtools}
\usepackage{microtype}

\DeclareMathOperator{\SI}{SI}

\newcommand*{\Automaton}[1]{\ensuremath{\mathcal{\MakeUppercase{#1}}}}
\newcommand*{\Language}[2][]{\ensuremath{L#1(#2#1)}}

\newcommand*{\EmptyString}{\varepsilon}
\newcommand*{\holyzero}{\mathord{\mathbf{0}}}
\newcommand*{\Transition}[3][]{#2 \stackrel{#1}{\rightarrow} #3}
\newcommand*{\Tup}[1]{\langle #1 \rangle}
\newcommand*{\vocab}[1]{\emph{#1}}

% Complexity classes
\newcommand*{\ComplexityClass}[1]{\mathrm{#1}}
\newcommand*{\SP}[1][]{\ComplexityClass{SP}_{#1}}

% Environments
\theoremstyle{plain}
\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}

\theoremstyle{definition}
\newtheorem{definition}{Definition}

\renewcommand{\qedsymbol}{\rule{0.5\baselineskip}{0.5\baselineskip}}

\author{Dakotah Lambert}
\title{Extracting FSSQs from Regular Stringsets}
\date{}

\begin{document}

\maketitle\thispagestyle{empty}

%if false


> {-# OPTIONS_HADDOCK show-extensions #-}
> {-# Language
>   MultiParamTypeClasses
>   #-}
> {-|
> Module    : LTK.Extract.SP
> Copyright : (c) 2017-2020 Dakotah Lambert
> License   : MIT
> 
> Find forbidden subsequences of an automaton.
> Formerly known as @LTK.ExtractSP@.
>
> @since 0.2
> -}

> module LTK.Extract.SP
>        ( ForbiddenSubsequences(..)
>        , forbiddenSubsequences
>        , fsaFromForbiddenSubsequences
>        , isSP
>        , isSSQ
>        , subsequenceClosure
>        ) where

> import Control.DeepSeq (NFData)
> import Data.List (sortBy)
> import Data.Ord (comparing)
> import Data.Set (Set)
> import qualified Data.Set as Set

> import LTK.Factors
> import LTK.FSA
> import LTK.Traversals

> -- | A convenience-type for declaring collections of forbidden subsequences.
> -- The member types are (lists of) the raw alphabet type (not (Symbol .))
> data ForbiddenSubsequences e
>     = ForbiddenSubsequences
>       { ForbiddenSubsequences e -> Set e
attestedAlphabet  ::  Set e
>       , ForbiddenSubsequences e -> Set [e]
getSubsequences   ::  Set [e]
>       } deriving (ForbiddenSubsequences e -> ForbiddenSubsequences e -> Bool
(ForbiddenSubsequences e -> ForbiddenSubsequences e -> Bool)
-> (ForbiddenSubsequences e -> ForbiddenSubsequences e -> Bool)
-> Eq (ForbiddenSubsequences e)
forall e.
Eq e =>
ForbiddenSubsequences e -> ForbiddenSubsequences e -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: ForbiddenSubsequences e -> ForbiddenSubsequences e -> Bool
$c/= :: forall e.
Eq e =>
ForbiddenSubsequences e -> ForbiddenSubsequences e -> Bool
== :: ForbiddenSubsequences e -> ForbiddenSubsequences e -> Bool
$c== :: forall e.
Eq e =>
ForbiddenSubsequences e -> ForbiddenSubsequences e -> Bool
Eq, Eq (ForbiddenSubsequences e)
Eq (ForbiddenSubsequences e)
-> (ForbiddenSubsequences e -> ForbiddenSubsequences e -> Ordering)
-> (ForbiddenSubsequences e -> ForbiddenSubsequences e -> Bool)
-> (ForbiddenSubsequences e -> ForbiddenSubsequences e -> Bool)
-> (ForbiddenSubsequences e -> ForbiddenSubsequences e -> Bool)
-> (ForbiddenSubsequences e -> ForbiddenSubsequences e -> Bool)
-> (ForbiddenSubsequences e
    -> ForbiddenSubsequences e -> ForbiddenSubsequences e)
-> (ForbiddenSubsequences e
    -> ForbiddenSubsequences e -> ForbiddenSubsequences e)
-> Ord (ForbiddenSubsequences e)
ForbiddenSubsequences e -> ForbiddenSubsequences e -> Bool
ForbiddenSubsequences e -> ForbiddenSubsequences e -> Ordering
ForbiddenSubsequences e
-> ForbiddenSubsequences e -> ForbiddenSubsequences e
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
forall e. Ord e => Eq (ForbiddenSubsequences e)
forall e.
Ord e =>
ForbiddenSubsequences e -> ForbiddenSubsequences e -> Bool
forall e.
Ord e =>
ForbiddenSubsequences e -> ForbiddenSubsequences e -> Ordering
forall e.
Ord e =>
ForbiddenSubsequences e
-> ForbiddenSubsequences e -> ForbiddenSubsequences e
min :: ForbiddenSubsequences e
-> ForbiddenSubsequences e -> ForbiddenSubsequences e
$cmin :: forall e.
Ord e =>
ForbiddenSubsequences e
-> ForbiddenSubsequences e -> ForbiddenSubsequences e
max :: ForbiddenSubsequences e
-> ForbiddenSubsequences e -> ForbiddenSubsequences e
$cmax :: forall e.
Ord e =>
ForbiddenSubsequences e
-> ForbiddenSubsequences e -> ForbiddenSubsequences e
>= :: ForbiddenSubsequences e -> ForbiddenSubsequences e -> Bool
$c>= :: forall e.
Ord e =>
ForbiddenSubsequences e -> ForbiddenSubsequences e -> Bool
> :: ForbiddenSubsequences e -> ForbiddenSubsequences e -> Bool
$c> :: forall e.
Ord e =>
ForbiddenSubsequences e -> ForbiddenSubsequences e -> Bool
<= :: ForbiddenSubsequences e -> ForbiddenSubsequences e -> Bool
$c<= :: forall e.
Ord e =>
ForbiddenSubsequences e -> ForbiddenSubsequences e -> Bool
< :: ForbiddenSubsequences e -> ForbiddenSubsequences e -> Bool
$c< :: forall e.
Ord e =>
ForbiddenSubsequences e -> ForbiddenSubsequences e -> Bool
compare :: ForbiddenSubsequences e -> ForbiddenSubsequences e -> Ordering
$ccompare :: forall e.
Ord e =>
ForbiddenSubsequences e -> ForbiddenSubsequences e -> Ordering
$cp1Ord :: forall e. Ord e => Eq (ForbiddenSubsequences e)
Ord, ReadPrec [ForbiddenSubsequences e]
ReadPrec (ForbiddenSubsequences e)
Int -> ReadS (ForbiddenSubsequences e)
ReadS [ForbiddenSubsequences e]
(Int -> ReadS (ForbiddenSubsequences e))
-> ReadS [ForbiddenSubsequences e]
-> ReadPrec (ForbiddenSubsequences e)
-> ReadPrec [ForbiddenSubsequences e]
-> Read (ForbiddenSubsequences e)
forall e. (Read e, Ord e) => ReadPrec [ForbiddenSubsequences e]
forall e. (Read e, Ord e) => ReadPrec (ForbiddenSubsequences e)
forall e. (Read e, Ord e) => Int -> ReadS (ForbiddenSubsequences e)
forall e. (Read e, Ord e) => ReadS [ForbiddenSubsequences e]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [ForbiddenSubsequences e]
$creadListPrec :: forall e. (Read e, Ord e) => ReadPrec [ForbiddenSubsequences e]
readPrec :: ReadPrec (ForbiddenSubsequences e)
$creadPrec :: forall e. (Read e, Ord e) => ReadPrec (ForbiddenSubsequences e)
readList :: ReadS [ForbiddenSubsequences e]
$creadList :: forall e. (Read e, Ord e) => ReadS [ForbiddenSubsequences e]
readsPrec :: Int -> ReadS (ForbiddenSubsequences e)
$creadsPrec :: forall e. (Read e, Ord e) => Int -> ReadS (ForbiddenSubsequences e)
Read, Int -> ForbiddenSubsequences e -> ShowS
[ForbiddenSubsequences e] -> ShowS
ForbiddenSubsequences e -> String
(Int -> ForbiddenSubsequences e -> ShowS)
-> (ForbiddenSubsequences e -> String)
-> ([ForbiddenSubsequences e] -> ShowS)
-> Show (ForbiddenSubsequences e)
forall e. Show e => Int -> ForbiddenSubsequences e -> ShowS
forall e. Show e => [ForbiddenSubsequences e] -> ShowS
forall e. Show e => ForbiddenSubsequences e -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [ForbiddenSubsequences e] -> ShowS
$cshowList :: forall e. Show e => [ForbiddenSubsequences e] -> ShowS
show :: ForbiddenSubsequences e -> String
$cshow :: forall e. Show e => ForbiddenSubsequences e -> String
showsPrec :: Int -> ForbiddenSubsequences e -> ShowS
$cshowsPrec :: forall e. Show e => Int -> ForbiddenSubsequences e -> ShowS
Show)

> instance Ord e => Container (ForbiddenSubsequences e) [e]
>     where isEmpty :: ForbiddenSubsequences e -> Bool
isEmpty ForbiddenSubsequences e
c     =  Set [e] -> Bool
forall c a. Container c a => c -> Bool
isEmpty (ForbiddenSubsequences e -> Set [e]
forall e. ForbiddenSubsequences e -> Set [e]
getSubsequences ForbiddenSubsequences e
c)
>           isIn :: ForbiddenSubsequences e -> [e] -> Bool
isIn ForbiddenSubsequences e
c        =  Set [e] -> [e] -> Bool
forall c a. (Container c a, Eq a) => c -> a -> Bool
isIn (ForbiddenSubsequences e -> Set [e]
forall e. ForbiddenSubsequences e -> Set [e]
getSubsequences ForbiddenSubsequences e
c)
>           union :: ForbiddenSubsequences e
-> ForbiddenSubsequences e -> ForbiddenSubsequences e
union         =  (Set [e] -> Set [e] -> Set [e])
-> ForbiddenSubsequences e
-> ForbiddenSubsequences e
-> ForbiddenSubsequences e
forall e.
Ord e =>
(Set [e] -> Set [e] -> Set [e])
-> ForbiddenSubsequences e
-> ForbiddenSubsequences e
-> ForbiddenSubsequences e
g Set [e] -> Set [e] -> Set [e]
forall c a. Container c a => c -> c -> c
union
>           intersection :: ForbiddenSubsequences e
-> ForbiddenSubsequences e -> ForbiddenSubsequences e
intersection  =  (Set [e] -> Set [e] -> Set [e])
-> ForbiddenSubsequences e
-> ForbiddenSubsequences e
-> ForbiddenSubsequences e
forall e.
Ord e =>
(Set [e] -> Set [e] -> Set [e])
-> ForbiddenSubsequences e
-> ForbiddenSubsequences e
-> ForbiddenSubsequences e
g Set [e] -> Set [e] -> Set [e]
forall c a. (Container c a, Eq a) => c -> c -> c
intersection
>           difference :: ForbiddenSubsequences e
-> ForbiddenSubsequences e -> ForbiddenSubsequences e
difference    =  (Set [e] -> Set [e] -> Set [e])
-> ForbiddenSubsequences e
-> ForbiddenSubsequences e
-> ForbiddenSubsequences e
forall e.
Ord e =>
(Set [e] -> Set [e] -> Set [e])
-> ForbiddenSubsequences e
-> ForbiddenSubsequences e
-> ForbiddenSubsequences e
g Set [e] -> Set [e] -> Set [e]
forall c a. (Container c a, Eq a) => c -> c -> c
difference
>           empty :: ForbiddenSubsequences e
empty         =  ForbiddenSubsequences :: forall e. Set e -> Set [e] -> ForbiddenSubsequences e
ForbiddenSubsequences
>                            { attestedAlphabet :: Set e
attestedAlphabet = Set e
forall c a. Container c a => c
empty
>                            , getSubsequences :: Set [e]
getSubsequences = Set [e]
forall c a. Container c a => c
empty
>                            }
>           singleton :: [e] -> ForbiddenSubsequences e
singleton [e]
a   =  ForbiddenSubsequences :: forall e. Set e -> Set [e] -> ForbiddenSubsequences e
ForbiddenSubsequences
>                            { attestedAlphabet :: Set e
attestedAlphabet = [e] -> Set e
forall (s :: * -> *) c a.
(Collapsible s, Container c a) =>
s a -> c
fromCollapsible [e]
a
>                            , getSubsequences :: Set [e]
getSubsequences = [e] -> Set [e]
forall c a. Container c a => a -> c
singleton [e]
a
>                            }

> g :: Ord e => (Set [e] -> Set [e] -> Set [e]) ->
>      ForbiddenSubsequences e -> ForbiddenSubsequences e ->
>      ForbiddenSubsequences e
> g :: (Set [e] -> Set [e] -> Set [e])
-> ForbiddenSubsequences e
-> ForbiddenSubsequences e
-> ForbiddenSubsequences e
g Set [e] -> Set [e] -> Set [e]
f ForbiddenSubsequences e
c1 ForbiddenSubsequences e
c2 = ForbiddenSubsequences :: forall e. Set e -> Set [e] -> ForbiddenSubsequences e
ForbiddenSubsequences
>             { attestedAlphabet :: Set e
attestedAlphabet = Set e -> Set e -> Set e
forall c a. Container c a => c -> c -> c
union (ForbiddenSubsequences e -> Set e
forall e. ForbiddenSubsequences e -> Set e
attestedAlphabet ForbiddenSubsequences e
c1) (Set e -> Set e) -> Set e -> Set e
forall a b. (a -> b) -> a -> b
$
>                                  ForbiddenSubsequences e -> Set e
forall e. ForbiddenSubsequences e -> Set e
attestedAlphabet ForbiddenSubsequences e
c2
>             , getSubsequences :: Set [e]
getSubsequences = Set [e] -> Set [e] -> Set [e]
f (ForbiddenSubsequences e -> Set [e]
forall e. ForbiddenSubsequences e -> Set [e]
getSubsequences ForbiddenSubsequences e
c1) (Set [e] -> Set [e]) -> Set [e] -> Set [e]
forall a b. (a -> b) -> a -> b
$ ForbiddenSubsequences e -> Set [e]
forall e. ForbiddenSubsequences e -> Set [e]
getSubsequences ForbiddenSubsequences e
c2
>             }

> type FSAt = FSA

%endif

\section{Introduction}

In this module we define and implement a method for extracting
strictly piecewise ($\SP$) constraints from arbitrary regular stringsets.
These constraints are also known as forbidden subsequences,
due to the nature of $\SP$ stringsets.
Although extracting these constraints is an exponential-time operation,
the implementation clarified a linear-time algorithm to
generate an $\SP$ approximation of a stringset.

\begin{definition}
Let $v=\sigma_1\sigma_2\dots\sigma_n\in\Sigma^*$.
The \vocab{shuffle ideal} of $v$, $\SI(v)$, is the set:
\[
  \SI(v) =
  \{
    u_0\sigma_1 u_1\sigma_2 u_2\dots\sigma_n u_n
    : u_i\in\Sigma^*
  \}\mbox{.}
\]
\end{definition}

If $w\in\SI(v)$, we say $v$ is a \emph{subsequence} of $w$,
$v\sqsubseteq w$.
Determining whether $v$ is a subsequence of $w$ is quite a simple algorithm:

%if false
Documentation


> -- |@(isSSQ a b)@ returns true iff @b@ contains the symbols of @a@
> -- in order, but not necessarily adjacently.

%endif


> isSSQ :: (Eq a) => [a] -> [a] -> Bool
> [] isSSQ :: [a] -> [a] -> Bool
`isSSQ` [a]
_   =  Bool
True
> (a
v:[a]
vs) `isSSQ` [a]
ws
>     | [a] -> Bool
forall c a. Container c a => c -> Bool
isEmpty [a]
notV  =  Bool
False
>     | Bool
otherwise     =  [a]
vs [a] -> [a] -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isSSQ` [a] -> [a]
forall a. [a] -> [a]
tail [a]
notV
>     where notV :: [a]
notV = (a -> Bool) -> [a] -> [a]
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (a -> a -> Bool
forall a. Eq a => a -> a -> Bool
/= a
v) [a]
ws

%if false
For formatting's sake:


> isNotSSQ :: (Eq a) => [a] -> [a] -> Bool
> [a]
a isNotSSQ :: [a] -> [a] -> Bool
`isNotSSQ` [a]
b = Bool -> Bool
not ([a]
a [a] -> [a] -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isSSQ` [a]
b)

%endif


\section{Augmenting a regular stringset}
A regular stringset can be augmented
by adding transitions to its representative DFA,
or by augmenting the set of accepting states.
By the nature of the construction described herein,
we only need the first method.

Let $\Automaton{M}=\Tup{Q,\Sigma,\delta,q_0,F}$ be an automaton.
Define the \vocab{subsequence-closure} of $\Language{\Automaton{M}}$ as
the smallest superset of $\Language{\Automaton{M}}$
that is closed under deletion.  This can be implemented as a function
that inserts a transition on $\EmptyString$
wherever a transition occurred in $\delta$.



> subsequenceClosure' :: (Ord n, Ord e) => FSAt n e -> FSAt n e
> subsequenceClosure' :: FSAt n e -> FSAt n e
subsequenceClosure' (FSA Set e
ssigma Set (Transition n e)
delta Set (State n)
q_0 Set (State n)
qf Bool
_)
>     =  Set e
-> Set (Transition n e)
-> Set (State n)
-> Set (State n)
-> Bool
-> FSAt n e
forall n e.
Set e
-> Set (Transition n e)
-> Set (State n)
-> Set (State n)
-> Bool
-> FSA n e
FSA Set e
ssigma (Set (Transition n e)
delta Set (Transition n e)
-> Set (Transition n e) -> Set (Transition n e)
forall c a. Container c a => c -> c -> c
`union` Set (Transition n e)
delta_prime) Set (State n)
q_0 Set (State n)
qf Bool
False
>     where  delta_prime :: Set (Transition n e)
delta_prime  =  (Transition n e -> Transition n e)
-> Set (Transition n e) -> Set (Transition n e)
forall (s :: * -> *) b1 b a.
(Collapsible s, Container (s b1) b) =>
(a -> b) -> s a -> s b1
tmap (\(Transition Symbol e
_ State n
a State n
b) -> (Symbol e -> State n -> State n -> Transition n e
forall n e. Symbol e -> State n -> State n -> Transition n e
Transition Symbol e
forall e. Symbol e
Epsilon State n
a State n
b)) Set (Transition n e)
delta

%if false

> -- |Returns an 'FSA' that accepts every string accepted by the
> -- original, as well as every subsequence of these strings.
> subsequenceClosure :: (Ord n, Ord e) => FSA n e -> FSA n e
> subsequenceClosure :: FSA n e -> FSA n e
subsequenceClosure = FSA n e -> FSA n e
forall n e. (Ord n, Ord e) => FSAt n e -> FSAt n e
subsequenceClosure'

%endif

Let $\Automaton{M}^\prime=\operatorname{subsequenceClosure} \Automaton{M}$,
and let $u,v,w\in\Sigma^*$
such that $uvw\in\Language{\Automaton{M}^\prime}$.
Then $uv\in\Language{\Automaton{M}^\prime}$, as every transition taken in
$w$ can be replaced by transitions on $\EmptyString$.
Similary $vw$ and $uw$ are in $\Language{\Automaton{M}^\prime}$.
Thus $\Language{\Automaton{M}^\prime}$ is $\SP$.
Note that since all of the original transitions remain intact,
$\Language{\Automaton{M}}\subseteq\Language{\Automaton{M}^\prime}$.

\section{Extracting forbidden subsequences}

\begin{lemma}
The set of forbidden subsequences of
$\Language{\Automaton{M}^\prime}$ is a subset of
that of $\Language{\Automaton{M}}$.
\end{lemma}
\begin{proof}
Suppose the set of forbidden subsequences of
$\Language{\Automaton{M}^\prime}$ is not a subset of
the set of forbidden subsequences of
$\Language{\Automaton{M}}$.
Then there exists some sequence $u$
that is forbidden in $\Language{\Automaton{M}^\prime}$
but not forbidden in $\Language{\Automaton{M}}$.
It follows that some string $w$ in $\Language{\Automaton{M}}$
contains $u$ as a subsequence.
But since $\Language{\Automaton{M}}\subseteq\Language{\Automaton{M}^\prime}$,
this string $w$ also occurs in $\Language{\Automaton{M}^\prime}$.
This contradicts the assumption that $u$ is a forbidden subsequence of
$\Language{\Automaton{M}^\prime}$.
Thus the set of forbidden subsequences of
$\Language{\Automaton{M}^\prime}$ is a subset of
the set of forbidden subsequences of
$\Language{\Automaton{M}}$.
\end{proof}

\begin{lemma}
The set of forbidden subsequences of
$\Language{\Automaton{M}}$ is a subset of
that of $\Language{\Automaton{M}^\prime}$.
\end{lemma}
\begin{proof}
Suppose the set of forbidden subsequences of
$\Language{\Automaton{M}}$ is not a subset of
the set of forbidden subsequences of
$\Language{\Automaton{M}^\prime}$.
Then there exists some sequence $u$
that is forbidden in $\Language{\Automaton{M}}$
but not forbidden in $\Language{\Automaton{M}^\prime}$.
It follows that some string $v$ in $\Language{\Automaton{M}^\prime}$
contains $u$ as a subsequence.
But since $\Language{\Automaton{M}^\prime}$ was formed
by allowing a computation to skip transitions of $\Automaton{M}$,
there exists a string $w$ in $\Language{\Automaton{M}}$
such that $v\sqsubseteq w$.
By the transitive property of subsequences,
this means $u\sqsubseteq w$.
This contradicts the assumption that $u$ is a forbidden subsequence of
$\Language{\Automaton{M}}$.
Thus the set of forbidden subsequences of
$\Language{\Automaton{M}}$ is a subset of
the set of forbidden subsequences of
$\Language{\Automaton{M}^\prime}$.
\end{proof}

Since each is a subset of the other, it follows that
the set of forbidden subsequences in $\Language{\Automaton{M}^\prime}$
is exactly the same set as those forbidden in $\Language{\Automaton{M}}$.
Because of this, collecting the minimal forbidden subsequences of
$\Language{\Automaton{M}^\prime}$
will give us the strictly piecewise constraints on
$\Language{\Automaton{M}}$.

%if false

> -- |Given an 'FSA' \(A\),
> -- returns the set of subsequences \(v\) such that
> -- for all words \(w\), \(v\sqsubseteq w\) implies
> -- that \(w\) is not accepted by \(A\).
> forbiddenSubsequences :: (Ord n, Ord e) => FSA n e -> ForbiddenSubsequences e
> forbiddenSubsequences :: FSA n e -> ForbiddenSubsequences e
forbiddenSubsequences FSA n e
fsa
>     = ForbiddenSubsequences :: forall e. Set e -> Set [e] -> ForbiddenSubsequences e
ForbiddenSubsequences
>       { attestedAlphabet :: Set e
attestedAlphabet = FSA n e -> Set e
forall (g :: * -> *) e. HasAlphabet g => g e -> Set e
alphabet FSA n e
fsa
>       , getSubsequences :: Set [e]
getSubsequences  = FSA n e -> Set [e]
forall n e. (Ord n, Ord e) => FSAt n e -> Set [e]
collectMinimalFSSQs FSA n e
fsa
>       }

%endif

Since $\Automaton{M}^\prime$ is $\SP$,
it suffices to simply traverse $\Automaton{M}^\prime$,
finding all minimal (in terms of subsequence) paths
from $q_0$ to the fail-state ($\holyzero$).
A cyclic path is necessarily non-minimal,
so it suffices to check only the acyclic paths.
Since every state in an $\SP$ automaton is accepting
save for a possible unique non-accepting sink,
we can merely check paths in the complement.
Since this complement has exactly one accepting state,
and exactly one initial state,
we can also use the acyclic paths of its reversal,
which prevents having to reverse every extracted string.

> collectFSSQs :: (Ord n, Ord e) => FSAt n e -> Set [e]
> collectFSSQs :: FSAt n e -> Set [e]
collectFSSQs FSAt n e
f =  (Path Integer e -> [e]) -> Set (Path Integer e) -> Set [e]
forall (s :: * -> *) b1 b a.
(Collapsible s, Container (s b1) b) =>
(a -> b) -> s a -> s b1
tmap ([Symbol e] -> [e]
forall (s :: * -> *) c e.
(Collapsible s, Container c e, Monoid c) =>
s (Symbol e) -> c
unsymbols ([Symbol e] -> [e])
-> (Path Integer e -> [Symbol e]) -> Path Integer e -> [e]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Path Integer e -> [Symbol e]
forall n e. Path n e -> [Symbol e]
labels)
>                   (Set (Path Integer e) -> Set [e])
-> (Set (Path Integer e) -> Set (Path Integer e))
-> Set (Path Integer e)
-> Set [e]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Path Integer e -> Bool)
-> Set (Path Integer e) -> Set (Path Integer e)
forall (s :: * -> *) a.
(Collapsible s, Container (s a) a) =>
(a -> Bool) -> s a -> s a
keep (Bool -> (State Integer -> Bool) -> Maybe (State Integer) -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False (Set (State Integer) -> State Integer -> Bool
forall c a. (Container c a, Eq a) => c -> a -> Bool
isIn (FSA Integer e -> Set (State Integer)
forall n e. FSA n e -> Set (State n)
finals FSA Integer e
f')) (Maybe (State Integer) -> Bool)
-> (Path Integer e -> Maybe (State Integer))
-> Path Integer e
-> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Path Integer e -> Maybe (State Integer)
forall n e. Path n e -> Maybe (State n)
endstate)
>                   (Set (Path Integer e) -> Set [e])
-> Set (Path Integer e) -> Set [e]
forall a b. (a -> b) -> a -> b
$ FSA Integer e -> Set (Path Integer e)
forall e n. (Ord e, Ord n) => FSA n e -> Set (Path n e)
acyclicPaths FSA Integer e
f'
>     where f' :: FSA Integer e
f' =  FSA (Set n) e -> FSA Integer e
forall e n. (Ord e, Ord n) => FSA n e -> FSA Integer e
normalize (FSA (Set n) e -> FSA Integer e)
-> (FSAt n e -> FSA (Set n) e) -> FSAt n e -> FSA Integer e
forall b c a. (b -> c) -> (a -> b) -> a -> c
. FSA (Set n) e -> FSA (Set n) e
forall e n. (Ord e, Ord n) => FSA n e -> FSA n e
LTK.FSA.reverse (FSA (Set n) e -> FSA (Set n) e)
-> (FSAt n e -> FSA (Set n) e) -> FSAt n e -> FSA (Set n) e
forall b c a. (b -> c) -> (a -> b) -> a -> c
. FSAt n e -> FSA (Set n) e
forall e n. (Ord e, Ord n) => FSA n e -> FSA (Set n) e
complement
>                 (FSAt n e -> FSA Integer e) -> FSAt n e -> FSA Integer e
forall a b. (a -> b) -> a -> b
$ FSAt n e -> FSAt n e
forall n e. (Ord n, Ord e) => FSAt n e -> FSAt n e
subsequenceClosure FSAt n e
f

Further, we can derive a minimal set of forbidden subsequences by
filtering out elements that are generated by others.  Formally, if
$v\sqsubseteq w$, for $v$ and $w$ forbidden subsequences, $w$ is not a
minimal forbidden subsequence.


> collectMinimalFSSQs :: (Ord n, Ord e) => FSAt n e -> Set [e]
> collectMinimalFSSQs :: FSAt n e -> Set [e]
collectMinimalFSSQs  =  [[e]] -> Set [e]
forall a. Ord a => [a] -> Set a
Set.fromList
>                         ([[e]] -> Set [e]) -> (FSAt n e -> [[e]]) -> FSAt n e -> Set [e]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [[e]] -> [[e]]
forall a. Eq a => [[a]] -> [[a]]
absorb
>                         ([[e]] -> [[e]]) -> (FSAt n e -> [[e]]) -> FSAt n e -> [[e]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([e] -> [e] -> Ordering) -> [[e]] -> [[e]]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (([e] -> Integer) -> [e] -> [e] -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing [e] -> Integer
forall (c :: * -> *) b. Collapsible c => c b -> Integer
isize)
>                         ([[e]] -> [[e]]) -> (FSAt n e -> [[e]]) -> FSAt n e -> [[e]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Set [e] -> [[e]]
forall a. Set a -> [a]
Set.toList
>                         (Set [e] -> [[e]]) -> (FSAt n e -> Set [e]) -> FSAt n e -> [[e]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. FSAt n e -> Set [e]
forall n e. (Ord n, Ord e) => FSAt n e -> Set [e]
collectFSSQs
>     where  absorb :: [[a]] -> [[a]]
absorb ([a]
x:[[a]]
xs)  =  [a]
x [a] -> [[a]] -> [[a]]
forall a. a -> [a] -> [a]
: [[a]] -> [[a]]
absorb (([a] -> Bool) -> [[a]] -> [[a]]
forall (s :: * -> *) a.
(Collapsible s, Container (s a) a) =>
(a -> Bool) -> s a -> s a
keep (\[a]
y -> [a]
x [a] -> [a] -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isNotSSQ` [a]
y) [[a]]
xs)
>            absorb [[a]]
_       =  []

This concludes the algorithm for extracting minimal forbidden subsequences,
and thus for extracting strictly-piecewise factors
from an arbirary regular stringset.

\section{Testing whether a stringset is strictly piecewise}

If $\Automaton{M}$ already represents an $\SP$ stringset,
then $\Automaton{M}^\prime$ will recognize
the same stringset as its source.
This makes for quite the simple test to determine
whether a stringset is $\SP$:

> isSP' :: (Ord n, Ord e) => FSAt n e -> Bool
> isSP' :: FSAt n e -> Bool
isSP' FSAt n e
f = FSAt n e
f FSAt n e -> FSAt n e -> Bool
forall a. Eq a => a -> a -> Bool
== FSAt n e -> FSAt n e
forall n e. (Ord n, Ord e) => FSAt n e -> FSAt n e
subsequenceClosure FSAt n e
f

%if false

> -- |Returns @True@ iff the stringset represented by the given 'FSA'
> -- is Strictly Piecewise, that is,
> -- if the 'FSA' accepts all subsequences of every string it accepts.
> isSP :: (Ord n, Ord e) => FSA n e -> Bool
> isSP :: FSA n e -> Bool
isSP = FSA n e -> Bool
forall n e. (Ord n, Ord e) => FSAt n e -> Bool
isSP'

> -- |The stringset represented by the forbiddenSubsequences.
> fsaFromForbiddenSubsequences :: (Ord e, NFData e) =>
>                                 ForbiddenSubsequences e -> FSA Integer e
> fsaFromForbiddenSubsequences :: ForbiddenSubsequences e -> FSA Integer e
fsaFromForbiddenSubsequences ForbiddenSubsequences e
fssqs
>     = Set e -> Set (Conjunction e) -> FSA Integer e
forall n e.
(Enum n, NFData n, Ord n, NFData e, Ord e) =>
Set e -> Set (Conjunction e) -> FSA n e
build (ForbiddenSubsequences e -> Set e
forall e. ForbiddenSubsequences e -> Set e
attestedAlphabet ForbiddenSubsequences e
fssqs) (Set (Conjunction e) -> FSA Integer e)
-> (Set [e] -> Set (Conjunction e)) -> Set [e] -> FSA Integer e
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Conjunction e -> Set (Conjunction e)
forall c a. Container c a => a -> c
singleton (Conjunction e -> Set (Conjunction e))
-> (Set [e] -> Conjunction e) -> Set [e] -> Set (Conjunction e)
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
>       [[Literal e]] -> Conjunction e
forall e. Ord e => [[Literal e]] -> Conjunction e
makeConstraint ([[Literal e]] -> Conjunction e)
-> (Set [e] -> [[Literal e]]) -> Set [e] -> Conjunction e
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
>       ([e] -> [Literal e]) -> [[e]] -> [[Literal e]]
forall (s :: * -> *) b1 b a.
(Collapsible s, Container (s b1) b) =>
(a -> b) -> s a -> s b1
tmap (Literal e -> [Literal e]
forall c a. Container c a => a -> c
singleton (Literal e -> [Literal e])
-> ([e] -> Literal e) -> [e] -> [Literal e]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Factor e -> Literal e
forall e. Factor e -> Literal e
forbidden (Factor e -> Literal e) -> ([e] -> Factor e) -> [e] -> Literal e
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Set e] -> Factor e
forall e. [Set e] -> Factor e
Subsequence ([Set e] -> Factor e) -> ([e] -> [Set e]) -> [e] -> Factor e
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (e -> Set e) -> [e] -> [Set e]
forall (s :: * -> *) b1 b a.
(Collapsible s, Container (s b1) b) =>
(a -> b) -> s a -> s b1
tmap e -> Set e
forall c a. Container c a => a -> c
singleton) ([[e]] -> [[Literal e]])
-> (Set [e] -> [[e]]) -> Set [e] -> [[Literal e]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
>       Set [e] -> [[e]]
forall (s :: * -> *) c a.
(Collapsible s, Container c a) =>
s a -> c
fromCollapsible (Set [e] -> FSA Integer e) -> Set [e] -> FSA Integer e
forall a b. (a -> b) -> a -> b
$ ForbiddenSubsequences e -> Set [e]
forall e. ForbiddenSubsequences e -> Set [e]
getSubsequences ForbiddenSubsequences e
fssqs

%endif

\end{document}