----------------------------------------------------------------------
-- |
-- Module      : Str
-- Maintainer  : AR
-- Stability   : (stable)
-- Portability : (portable)
--
-- > CVS $Date: 2005/04/21 16:22:09 $ 
-- > CVS $Author: bringert $
-- > CVS $Revision: 1.8 $
--
-- (Description of the module)
-----------------------------------------------------------------------------

module GF.Data.Str (
  Str, Tok,        --- constructors no longer needed in PrGrammar
  str2strings, str, sstr, --sstrV, str2allStrings,
  plusStr, glueStr, --prStr, isZeroTok,
  strTok --, allItems
) where

--import GF.Data.Operations(prQuotedString)
import Data.List (isPrefixOf) --, intersperse, isSuffixOf

-- | abstract token list type. AR 2001, revised and simplified 20\/4\/2003
newtype Str = Str [Tok]  deriving (ReadPrec [Str]
ReadPrec Str
Int -> ReadS Str
ReadS [Str]
(Int -> ReadS Str)
-> ReadS [Str] -> ReadPrec Str -> ReadPrec [Str] -> Read Str
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [Str]
$creadListPrec :: ReadPrec [Str]
readPrec :: ReadPrec Str
$creadPrec :: ReadPrec Str
readList :: ReadS [Str]
$creadList :: ReadS [Str]
readsPrec :: Int -> ReadS Str
$creadsPrec :: Int -> ReadS Str
Read, Int -> Str -> ShowS
[Str] -> ShowS
Str -> String
(Int -> Str -> ShowS)
-> (Str -> String) -> ([Str] -> ShowS) -> Show Str
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Str] -> ShowS
$cshowList :: [Str] -> ShowS
show :: Str -> String
$cshow :: Str -> String
showsPrec :: Int -> Str -> ShowS
$cshowsPrec :: Int -> Str -> ShowS
Show, Str -> Str -> Bool
(Str -> Str -> Bool) -> (Str -> Str -> Bool) -> Eq Str
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Str -> Str -> Bool
$c/= :: Str -> Str -> Bool
== :: Str -> Str -> Bool
$c== :: Str -> Str -> Bool
Eq, Eq Str
Eq Str
-> (Str -> Str -> Ordering)
-> (Str -> Str -> Bool)
-> (Str -> Str -> Bool)
-> (Str -> Str -> Bool)
-> (Str -> Str -> Bool)
-> (Str -> Str -> Str)
-> (Str -> Str -> Str)
-> Ord Str
Str -> Str -> Bool
Str -> Str -> Ordering
Str -> Str -> Str
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Str -> Str -> Str
$cmin :: Str -> Str -> Str
max :: Str -> Str -> Str
$cmax :: Str -> Str -> Str
>= :: Str -> Str -> Bool
$c>= :: Str -> Str -> Bool
> :: Str -> Str -> Bool
$c> :: Str -> Str -> Bool
<= :: Str -> Str -> Bool
$c<= :: Str -> Str -> Bool
< :: Str -> Str -> Bool
$c< :: Str -> Str -> Bool
compare :: Str -> Str -> Ordering
$ccompare :: Str -> Str -> Ordering
$cp1Ord :: Eq Str
Ord)

-- | notice that having both pre and post would leave to inconsistent situations:
--
-- > pre {"x" ; "y" / "a"} ++ post {"b" ; "a" / "x"}
--
-- always violates a condition expressed by the one or the other
data Tok = 
   TK String
 | TN Ss [(Ss, [String])] -- ^ variants depending on next string 
--- | TP Ss [(Ss, [String])] -- variants depending on previous string
    deriving (Tok -> Tok -> Bool
(Tok -> Tok -> Bool) -> (Tok -> Tok -> Bool) -> Eq Tok
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Tok -> Tok -> Bool
$c/= :: Tok -> Tok -> Bool
== :: Tok -> Tok -> Bool
$c== :: Tok -> Tok -> Bool
Eq, Eq Tok
Eq Tok
-> (Tok -> Tok -> Ordering)
-> (Tok -> Tok -> Bool)
-> (Tok -> Tok -> Bool)
-> (Tok -> Tok -> Bool)
-> (Tok -> Tok -> Bool)
-> (Tok -> Tok -> Tok)
-> (Tok -> Tok -> Tok)
-> Ord Tok
Tok -> Tok -> Bool
Tok -> Tok -> Ordering
Tok -> Tok -> Tok
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Tok -> Tok -> Tok
$cmin :: Tok -> Tok -> Tok
max :: Tok -> Tok -> Tok
$cmax :: Tok -> Tok -> Tok
>= :: Tok -> Tok -> Bool
$c>= :: Tok -> Tok -> Bool
> :: Tok -> Tok -> Bool
$c> :: Tok -> Tok -> Bool
<= :: Tok -> Tok -> Bool
$c<= :: Tok -> Tok -> Bool
< :: Tok -> Tok -> Bool
$c< :: Tok -> Tok -> Bool
compare :: Tok -> Tok -> Ordering
$ccompare :: Tok -> Tok -> Ordering
$cp1Ord :: Eq Tok
Ord, Int -> Tok -> ShowS
[Tok] -> ShowS
Tok -> String
(Int -> Tok -> ShowS)
-> (Tok -> String) -> ([Tok] -> ShowS) -> Show Tok
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Tok] -> ShowS
$cshowList :: [Tok] -> ShowS
show :: Tok -> String
$cshow :: Tok -> String
showsPrec :: Int -> Tok -> ShowS
$cshowsPrec :: Int -> Tok -> ShowS
Show, ReadPrec [Tok]
ReadPrec Tok
Int -> ReadS Tok
ReadS [Tok]
(Int -> ReadS Tok)
-> ReadS [Tok] -> ReadPrec Tok -> ReadPrec [Tok] -> Read Tok
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [Tok]
$creadListPrec :: ReadPrec [Tok]
readPrec :: ReadPrec Tok
$creadPrec :: ReadPrec Tok
readList :: ReadS [Tok]
$creadList :: ReadS [Tok]
readsPrec :: Int -> ReadS Tok
$creadsPrec :: Int -> ReadS Tok
Read)


-- | a variant can itself be a token list, but for simplicity only a list of strings
-- i.e. not itself containing variants
type Ss = [String]

-- matching functions in both ways

matchPrefix :: Ss -> [(Ss,[String])] -> [String] -> Ss
matchPrefix :: Ss -> [(Ss, Ss)] -> Ss -> Ss
matchPrefix Ss
s [(Ss, Ss)]
vs Ss
t =
  [Ss] -> Ss
forall a. [a] -> a
head ([Ss] -> Ss) -> [Ss] -> Ss
forall a b. (a -> b) -> a -> b
$ [Ss
u | String
t':Ss
_ <- [Ss -> Ss
unmarkup Ss
t],
              (Ss
u,Ss
as) <- [(Ss, Ss)]
vs,
              (String -> Bool) -> Ss -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` String
t') Ss
as]
         [Ss] -> [Ss] -> [Ss]
forall a. [a] -> [a] -> [a]
++ [Ss
s]
{-
matchSuffix :: String -> Ss -> [(Ss,[String])] -> Ss
matchSuffix t s vs = 
  head ([u | (u,as) <- vs, any (\c -> isSuffixOf c t) as] ++ [s])
-}
unmarkup :: [String] -> [String]
unmarkup :: Ss -> Ss
unmarkup = (String -> Bool) -> Ss -> Ss
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (String -> Bool) -> String -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Bool
isXMLtag) where
  isXMLtag :: String -> Bool
isXMLtag String
s = case String
s of
    Char
'<':cs :: String
cs@(Char
_:String
_) -> String -> Char
forall a. [a] -> a
last String
cs Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'>'
    String
_ -> Bool
False

str2strings :: Str -> Ss
str2strings :: Str -> Ss
str2strings (Str [Tok]
st) = [Tok] -> Ss
alls [Tok]
st where 
  alls :: [Tok] -> Ss
alls [Tok]
st = case [Tok]
st of
    TK String
s     : [Tok]
ts   -> String
s                   String -> Ss -> Ss
forall a. a -> [a] -> [a]
: [Tok] -> Ss
alls [Tok]
ts
    TN Ss
ds [(Ss, Ss)]
vs : [Tok]
ts   -> Ss -> [(Ss, Ss)] -> Ss -> Ss
matchPrefix Ss
ds [(Ss, Ss)]
vs Ss
t Ss -> Ss -> Ss
forall a. [a] -> [a] -> [a]
++ Ss
t where t :: Ss
t = [Tok] -> Ss
alls [Tok]
ts
----    u :TP ds vs: ts -> [u] ++ matchSuffix u ds vs ++ alls ts
    []              -> []
{-
str2allStrings :: Str -> [Ss]
str2allStrings (Str st) = alls st where 
  alls st = case st of
    TK s     : ts -> [s        : t | t <- alls ts]
    TN ds vs : [] -> [ds      ++ v | v <- map fst vs]
    TN ds vs : ts -> [matchPrefix ds vs t ++ t | t <- alls ts]
    []            -> [[]]
-}
sstr :: Str -> String
sstr :: Str -> String
sstr = Ss -> String
unwords (Ss -> String) -> (Str -> Ss) -> Str -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Str -> Ss
str2strings
{-
-- | to handle a list of variants
sstrV :: [Str] -> String
sstrV ss = case ss of
  []  -> "*"
  _   -> unwords $ intersperse "/" $ map (unwords . str2strings) ss
-}
str :: String -> Str
str :: String -> Str
str String
s = if String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
s then [Tok] -> Str
Str [] else [Tok] -> Str
Str [String -> Tok
itS String
s]

itS :: String -> Tok
itS :: String -> Tok
itS String
s = String -> Tok
TK String
s
{-
isZeroTok :: Str -> Bool
isZeroTok t = case t of
  Str [] -> True
  Str [TK []] -> True
  _ -> False
-}
strTok :: Ss -> [(Ss,[String])] -> Str
strTok :: Ss -> [(Ss, Ss)] -> Str
strTok Ss
ds [(Ss, Ss)]
vs = [Tok] -> Str
Str [Ss -> [(Ss, Ss)] -> Tok
TN Ss
ds [(Ss, Ss)]
vs]
{-
prStr :: Str -> String
prStr = prQuotedString . sstr
-}
plusStr :: Str -> Str -> Str
plusStr :: Str -> Str -> Str
plusStr (Str [Tok]
ss) (Str [Tok]
tt) = [Tok] -> Str
Str ([Tok]
ss [Tok] -> [Tok] -> [Tok]
forall a. [a] -> [a] -> [a]
++ [Tok]
tt)

glueStr :: Str -> Str -> Str
glueStr :: Str -> Str -> Str
glueStr (Str [Tok]
ss) (Str [Tok]
tt) = [Tok] -> Str
Str ([Tok] -> Str) -> [Tok] -> Str
forall a b. (a -> b) -> a -> b
$ case ([Tok]
ss,[Tok]
tt) of
  ([],[Tok]
_) -> [Tok]
tt
  ([Tok]
_,[]) -> [Tok]
ss
  ([Tok], [Tok])
_ -> [Tok] -> [Tok]
forall a. [a] -> [a]
init [Tok]
ss [Tok] -> [Tok] -> [Tok]
forall a. [a] -> [a] -> [a]
++ Tok -> Tok -> [Tok]
glueIt ([Tok] -> Tok
forall a. [a] -> a
last [Tok]
ss) ([Tok] -> Tok
forall a. [a] -> a
head [Tok]
tt) [Tok] -> [Tok] -> [Tok]
forall a. [a] -> [a] -> [a]
++ [Tok] -> [Tok]
forall a. [a] -> [a]
tail [Tok]
tt
 where
   glueIt :: Tok -> Tok -> [Tok]
glueIt Tok
t Tok
u = case (Tok
t,Tok
u) of
     (TK String
s, TK String
s') -> Tok -> [Tok]
forall (m :: * -> *) a. Monad m => a -> m a
return (Tok -> [Tok]) -> Tok -> [Tok]
forall a b. (a -> b) -> a -> b
$ String -> Tok
TK (String -> Tok) -> String -> Tok
forall a b. (a -> b) -> a -> b
$ String
s String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
s'
     (TN Ss
ds [(Ss, Ss)]
vs, TN Ss
es [(Ss, Ss)]
ws) -> Tok -> [Tok]
forall (m :: * -> *) a. Monad m => a -> m a
return (Tok -> [Tok]) -> Tok -> [Tok]
forall a b. (a -> b) -> a -> b
$ Ss -> [(Ss, Ss)] -> Tok
TN (Ss -> Ss -> Ss
forall a. [[a]] -> [[a]] -> [[a]]
glues (Ss -> [(Ss, Ss)] -> Ss -> Ss
matchPrefix Ss
ds [(Ss, Ss)]
vs Ss
es) Ss
es) 
                               [(Ss -> Ss -> Ss
forall a. [[a]] -> [[a]] -> [[a]]
glues (Ss -> [(Ss, Ss)] -> Ss -> Ss
matchPrefix Ss
ds [(Ss, Ss)]
vs Ss
w) Ss
w,Ss
cs) | (Ss
w,Ss
cs) <- [(Ss, Ss)]
ws]
     (TN Ss
ds [(Ss, Ss)]
vs, TK String
s) -> (String -> Tok) -> Ss -> [Tok]
forall a b. (a -> b) -> [a] -> [b]
map String -> Tok
TK (Ss -> [Tok]) -> Ss -> [Tok]
forall a b. (a -> b) -> a -> b
$ Ss -> Ss -> Ss
forall a. [[a]] -> [[a]] -> [[a]]
glues (Ss -> [(Ss, Ss)] -> Ss -> Ss
matchPrefix Ss
ds [(Ss, Ss)]
vs [String
s]) [String
s]
     (TK String
s, TN Ss
es [(Ss, Ss)]
ws) -> Tok -> [Tok]
forall (m :: * -> *) a. Monad m => a -> m a
return (Tok -> [Tok]) -> Tok -> [Tok]
forall a b. (a -> b) -> a -> b
$ Ss -> [(Ss, Ss)] -> Tok
TN (Ss -> Ss -> Ss
forall a. [[a]] -> [[a]] -> [[a]]
glues [String
s] Ss
es) [(Ss -> Ss -> Ss
forall a. [[a]] -> [[a]] -> [[a]]
glues [String
s] Ss
w, Ss
c) | (Ss
w,Ss
c) <- [(Ss, Ss)]
ws]

glues :: [[a]] -> [[a]] -> [[a]]
glues :: [[a]] -> [[a]] -> [[a]]
glues [[a]]
ss [[a]]
tt = case ([[a]]
ss,[[a]]
tt) of
  ([],[[a]]
_) -> [[a]]
tt
  ([[a]]
_,[]) -> [[a]]
ss
  ([[a]], [[a]])
_ -> [[a]] -> [[a]]
forall a. [a] -> [a]
init [[a]]
ss [[a]] -> [[a]] -> [[a]]
forall a. [a] -> [a] -> [a]
++ [[[a]] -> [a]
forall a. [a] -> a
last [[a]]
ss [a] -> [a] -> [a]
forall a. [a] -> [a] -> [a]
++ [[a]] -> [a]
forall a. [a] -> a
head [[a]]
tt] [[a]] -> [[a]] -> [[a]]
forall a. [a] -> [a] -> [a]
++ [[a]] -> [[a]]
forall a. [a] -> [a]
tail [[a]]
tt
{-
-- | to create the list of all lexical items
allItems :: Str -> [String]
allItems (Str s) = concatMap allOne s where
  allOne t = case t of
    TK s -> [s]
    TN ds vs -> ds ++ concatMap fst vs
-}