-- |
-- The data structures (and associated functions) used in the
-- parser. For a normal usage, it should be enough
-- to import only 'Text.Syntactical', not directly this module.
module Text.Syntactical.Data (
  SExpr(..), Tree(..), Op(..), Opening(..),
  Associativity(..), Hole(..), Part(..), Table, Priority(..),
  infx, prefx, postfx, closed,
  infx_, prefx_, postfx_, closed_,
  sexpr, distfix,
  buildTable, cut, setPrecedence,
  begin, end, leftOpen, rightOpen, rightHole, discard,
  applicator, applicator', continue, original, priority,
  arity, symbol, symbols, next, previous, current,
  findBoth, findBegin, FindBegin(..), FindBoth(..), Ambiguity(..),
  Token(..),
  showPart, showSExpr, showTree
  ) where

import Data.List

----------------------------------------------------------------------
-- Data structures to represent trees, operators, and parts
----------------------------------------------------------------------

-- | The s-expression data type used as input and output of the parser.
-- The type is parametrized by the type of the token.
data SExpr a = List [SExpr a]
             | Atom a
  deriving (Eq, Show)

-- | The s-expression data type augmented to represent parts (used in
-- the operator stack).
data Tree a = Branch [Tree a]
            | Leaf a
            | Part (Part a)
  deriving (Eq, Show)

-- | The class of the types that can be parsed.
class Token a where
  toString :: a -> String
  -- ^ convert to a string (for showing purpose)
  operator :: Op a -> [SExpr a] -> SExpr a
  -- ^ create an output node from an operator and its arguments
  consider :: a -> a -> Bool
  -- ^ test if two tokens are the same (used to find match from the
  -- operator table). A default definition that compares the result
  -- of toString is provided.

  -- default definition for consider tests the string representation
  consider a b = toString a == toString b

considers :: Token a => [a] -> [a] -> Bool
considers a b = length a == length b && and (zipWith consider a b)

-- | The operator representation, parametrized by the token type.
-- It allows infix, prefix, postfix,
-- and closed operators, with possibly multiple internal holes.
-- Different holes are possible, to drive the parse in specific ways.

-- The boolean is used to specify if the operator should show up
-- in the result or be discarded. The opening further specifies
-- in the non-closed variant if the operator is prefix, infix, or postfix.
data Op a =
    Op1 Bool a [(Hole,a)] Opening Precedence
  | Op2 Bool a [(Hole,a)] Hole a
  deriving (Eq, Show)

-- | Set the precedence of a given operator.
setPrecedence :: Precedence -> Op a -> Op a
setPrecedence p (Op1 keep x xs opening _) = Op1 keep x xs opening p
setPrecedence _ c = c

-- | Return all the tokens of a given operator.
symbols :: Op a -> [a]
symbols (Op1 _ a xs _ _) = a : map snd xs
symbols (Op2 _ a xs _ b) = a : map snd xs ++ [b]

-- | Separate an operator in its different parts.
cut :: Op a -> [Part a]
cut (Op1 keep x [] opening p) =
  [Lone keep x opening p]
cut o@(Op1 _ x xs opening p) =
  First ma x [snd $ head xs] (fst $ head xs) :
  map f (zip4 ls ss rs ks) ++
  [Last o]
  where
    ma = case opening of
      Postfix -> Just (NonAssociative,p)
      Infix a -> Just (a,p)
      Prefix -> Nothing
    f (l, s, r, k) = Middle l s r k
    (_, xs') = holesAfter (init xs) (fst $ last xs)
    fxs = inits $ map fst xs'
    sxs = inits $ map snd xs'
    ls = map (x:) (init fxs)
    ss = map head (tail fxs)
    rs = map ((++[snd $ last xs]) . tail) (tail fxs)
    ks = map head (tail sxs)

cut o@(Op2 _ x [] h y) =
  [First Nothing x [y] h, Last o]
cut o@(Op2 _ x xs h y) =
  First Nothing x [snd $ head xs] (fst $ head xs) :
  map f (zip4 ls ss rs ks) ++
  [Last o]
  where
    f (l, s, r, k) = Middle l s r k
    (_, xs') = holesAfter xs h
    fxs = inits $ map fst xs'
    sxs = inits $ map snd xs'
    ls = map (x:) (init fxs)
    ss = map head (tail fxs)
    rs = map ((++[y]) . tail) (tail fxs)
    ks = map head (tail sxs)

-- Takes a list of pair (hole,string) and returns
-- a list of (string,hole) where the ordre and interleaving
-- is respected: the first hole is returned and the last hole
-- is an argument.
holesAfter :: [(Hole,s)] -> Hole -> (Hole, [(s,Hole)])
holesAfter [] h = (h, [])
holesAfter [(a,b)] h = (a, [(b,h)])
holesAfter ((a,b):xs@((c,_):_)) h = (a, (b,c) : snd (holesAfter xs h))

-- | 'buildTable' constructs an operator table that can be
-- used with the 'shunt' function. Operators are given
-- in decreasing precedence order.
buildTable :: [[Op a]] -> Table a
buildTable ls = Table . concat $ zipWith f ls [n, n - 1 .. 0]
  where n = length ls
        f l p = concatMap (cut . setPrecedence p) l

-- | The Hole is used to give various behaviours when dealing
-- with internal holes.
data Hole =
    SExpression
  -- ^ SExpression means the 'content' of the hole should be
  -- parsed as an s-expression. The resulting value is a List.
  -- This means the hole can be empty or contain one or more
  -- sub-expression(s).
  | Distfix
  -- ^ Distfix means the 'content' of the hole should be parsed
  -- as a distfix expression. In this case feeding an empty hole
  -- will generate a parse error.
  deriving (Eq, Show)

-- | Specify the associativity of an infix operator.
data Associativity = NonAssociative | LeftAssociative | RightAssociative
  deriving (Show, Eq)

type Precedence = Int

data Priority = Lower | Higher | NoPriority

-- | The type of the operator table.
newtype Table a = Table [Part a]

-- NoBegin: no parts with the requested symbol.
-- Begin: found a begin part.
-- MissingBegin: no begin part found but continuing part found.
data FindBegin a = NoBegin
                 | Begin (Part a)
                 | MissingBegin [[a]]
                 | AmbiguousBegin Ambiguity

data FindContinue a = NoContinue
                    | Continue (Part a)
                    | AmbiguousContinue Ambiguity

data Ambiguity = MiddleOrLast
               | NotSameHole
               | NotSameFirst
               | LoneOrFirst
               | MultipleLone
  deriving (Eq, Show)

data FindBoth a = BNothing
                | BContinue (Part a)
                | BBegin (Part a)
                | BMissingBegin [[a]]
                | BAmbiguous Ambiguity

findParts :: Token a => Table a -> a -> [Part a]
findParts (Table ps) x = filter (consider x . symbol) ps

findContinuing :: Token a => [Part a] -> Part a -> FindContinue a
findContinuing xs y = case as of
  [] -> NoContinue
  (a:as') -> if isLast a
    then if all isLast as'
      then Continue $ groupLast as
      else AmbiguousContinue MiddleOrLast
    else if all isMiddle as'
      then case groupMiddle as of
        Just pt -> Continue pt
        Nothing -> AmbiguousContinue NotSameHole
      else AmbiguousContinue MiddleOrLast
  where as = filter (`continue` y) xs

-- Search the operator stack for the top-most parts waiting to be completed
-- (i.e. on the left of an innner hole).
findIncompletePart :: [Tree a] -> Maybe (Part a)
findIncompletePart [] = Nothing
findIncompletePart (Part y:_) | not (end y) = Just y
findIncompletePart (_:ss) = findIncompletePart ss

-- - The operator doesn't contain any operator
-- -> returns (First,Nothing)
-- - The operator stack has an operator at its top and
-- no incomplete operator
-- -> returns (First,Just Top)
-- - The operator stack has no operator at its top but
-- has an incomplete operator below
-- -> return (Continuing of First, Nothing)
-- - The operator stack has an operator at its top and
-- an incomplete operator (at the top or below)
-- -> returns (Continuing or First, Just Top)
-- Actually, if there is no Continuing, returns what it can
-- find, even if it is not First; one of the rules will
-- generate a MissingBefore (in the [] case) or an Incomplete
-- (in the pts2 case).
findBoth :: Token a => Table a -> a -> [Tree a] -> FindBoth a
findBoth table x st = case findIncompletePart st of
  Nothing -> wrap $ findBegin table x
  Just y -> case findContinuing xs y of
    Continue a -> BContinue a
    NoContinue -> wrap $ findBegin table x
    AmbiguousContinue amb -> BAmbiguous amb
  where xs = findParts table x
        wrap a = case a of
          NoBegin -> BNothing
          MissingBegin b -> BMissingBegin b
          Begin b -> BBegin b
          AmbiguousBegin amb -> BAmbiguous amb

findBegin :: Token a => Table a -> a -> FindBegin a
findBegin table x = case filterParts $ findParts table x of
  ([],[],[],[]) -> NoBegin
  (_:_,_:_,_,_) -> AmbiguousBegin LoneOrFirst
  ([pt],_,_,_) -> Begin pt
  (_:_,_,_,_) -> AmbiguousBegin MultipleLone
  (_,f@(_:_),_,_) -> case groupFirst f of
    Left amb -> AmbiguousBegin amb
    Right pt -> Begin pt
  (_,_,m,l) -> MissingBegin $ map previous (m++l)

-- | A Part represent a single symbol of an operator.
data Part a = First (Maybe (Associativity,Precedence)) a [a] Hole
-- assoc/prec if it is open, possible successor parts, non-empty, s-expr/distfix
          | Last (Op a)
-- The Op1 case cannot have an empty list (this is the Lone case).
          | Lone Bool a Opening Precedence
-- Same as Op1 but without the list.
          | Middle [a] a [a] Hole
-- possible predecessor and successor parts, both non-empty, s-expr/distfix
  deriving (Show, Eq)

-- Specify if an Op1 or a Lone is prefix, postfix, or infix.
data Opening = Infix Associativity
             | Prefix
             | Postfix
  deriving (Show, Eq)

original :: Part a -> Op a
original (Lone keep x opening p) = Op1 keep x [] opening p
original (Last o) = o
original _ = error "can't happen"

priority :: Part a -> Part a -> Priority
priority pt1 pt2 = case (associativity pt1, associativity pt2) of
  (Just (a1,p1), Just (a2,p2)) | begin pt1 && end pt2 ->
    f a1 p1 a2 p2
  _ | isMiddle pt1 || end pt1 && not (isLone pt1) -> Lower
    | otherwise -> Higher
  where f a1 p1 a2 p2
          | p1 == p2 && (a1 == NonAssociative
            || a2 == NonAssociative || a1 /= a2) =
            NoPriority
          | p1 < p2 = Lower
          | p1 == p2 && a1 == LeftAssociative = Lower
          | otherwise = Higher

applicator :: Token a => Table a -> SExpr a -> Bool
applicator table (Atom x) = null $ findParts table x
applicator _ (List _) = True

applicator' :: Token a => Table a -> Tree a -> Bool
applicator' table (Leaf x) = null $ findParts table x
applicator' _ (Branch _) = True
applicator' _ _ = False

isLone :: Part a -> Bool
isLone (Lone _ _ _ _) = True
isLone _ = False

isFirst :: Part a -> Bool
isFirst (First _ _ _ _) = True
isFirst _ = False

isLast :: Part a -> Bool
isLast (Last _) = True
isLast _ = False

isMiddle :: Part a -> Bool
isMiddle (Middle _ _ _ _) = True
isMiddle _ = False

begin :: Part a -> Bool
begin (Lone _ _ _ _) = True
begin (First _ _ _ _) = True
begin _ = False

end :: Part a -> Bool
end (Lone _ _ _ _) = True
end (Last _) = True
end _ = False

discard :: Part a -> Bool
discard (First _ _ _ _) = False
discard (Last (Op1 keep _ _ _ _)) = not keep
discard (Last (Op2 keep _ _ _ _)) = not keep
discard (Lone keep _ _ _) = not keep
discard (Middle _ _ _ _) = False

-- | Return the token of a given Part.
symbol :: Part a -> a
symbol (First _ s _ _) = s
symbol (Last (Op1 _ _ xs _ _)) = snd $ last xs
symbol (Last (Op2 _ _ _ _ s)) = s
symbol (Lone _ s _ _) = s
symbol (Middle _ s _ _) = s

-- | Return the arity of a complete Part. It is an error to call this
-- function on a First or Middle part.
arity :: Part a -> Int
arity (First _ _ _ _) = error "arity: bad argument"
arity (Middle _ _ _ _) = error "arity: bad argument"
arity (Lone _ _ (Infix _) _) = 2
arity (Lone _ _ _ _) = 1
arity (Last (Op1 _ _ xs opening _)) = case opening of
  Postfix -> length xs + 1
  Infix _ -> length xs + 2
  Prefix -> length xs + 1
arity (Last (Op2 _ _ xs _ _)) = length xs + 1


leftOpen :: Part a -> Bool
leftOpen (First (Just _) _ _ _) = True
leftOpen (First _ _ _ _) = False
leftOpen (Last _) = True
leftOpen (Lone _ _ Prefix _) = False
leftOpen (Lone _ _ _ _) = True
leftOpen (Middle _ _ _ _) = True

rightOpen :: Part a -> Bool
rightOpen (First _ _ _ _) = True
rightOpen (Last (Op1 _ _ _ Prefix _)) = True
rightOpen (Last (Op1 _ _ _ (Infix _) _)) = True
rightOpen (Last _) = False
rightOpen (Lone _ _ Postfix _) = False
rightOpen (Lone _ _ _ _) = True
rightOpen (Middle _ _ _ _) = True

rightHole :: Part a -> Maybe Hole
rightHole (First _ _ _ k) = Just k
rightHole (Last _) = Nothing
rightHole (Lone _ _ _ _) = Nothing
rightHole (Middle _ _ _ k) = Just k

-- Not the true associativity of the original operator.
-- E.g. this will return Nothing for the last part of
-- a postfix operator, but a maybe for its first part.
associativity :: Part a -> Maybe (Associativity,Precedence)
associativity (First ap _ _ _) = ap
associativity (Last (Op1 _ _ _ opening p)) = case opening of
  Postfix -> Nothing
  Infix a -> Just (a,p)
  Prefix -> Just (NonAssociative,p)
associativity (Last (Op2 _ _ _ _ _)) = Nothing
associativity (Lone _ _ Postfix p) = Just (NonAssociative,p)
associativity (Lone _ _ Prefix p) = Just (NonAssociative,p)
associativity (Lone _ _ (Infix a) p) = Just (a,p)
associativity (Middle _ _ _ _) = Nothing

-- | Return the possible tokens continuing the given part.
next :: Part a -> [a]
next (First _ _ r _) = r
next (Last _) = []
next (Lone _ _ _ _) = []
next (Middle _ _ r _) = r

-- | Return the tokens preceding the given part.
previous :: Part a -> [a]
previous (First _ _ _ _) = []
previous (Last (Op1 _ _ [] _ _)) = error "can't happen"
previous (Last (Op1 _ a [_] _ _)) = [a]
previous (Last (Op1 _ a xs _ _)) = a : map snd (init xs)
previous (Last (Op2 _ a [] _ _)) = [a]
previous (Last (Op2 _ a xs _ _)) = a : map snd xs
previous (Lone _ _ _ _) = []
previous (Middle l _ _ _) = l

-- | Return the tokens of the given part.
current :: Part a -> [a]
current (First _ s _ _) = [s]
current (Last (Op1 _ _ [] _ _)) = error "can't happen"
current (Last (Op1 _ x xs _ _)) = x : map snd xs
current (Last (Op2 _ a xs _ b)) = a : map snd xs ++ [b]
current (Lone _ s _ _) = [s]
current (Middle l s _ _) = l ++ [s]

continue :: Token a => Part a -> Part a -> Bool
continue x y = considers (previous x) (current y)

filterParts :: [Part a] -> ([Part a],[Part a],[Part a],[Part a])
filterParts pts = (filter isLone pts, filter isFirst pts,
  filter isMiddle pts, filter isLast pts)

groupFirst :: Token a => [Part a] -> Either Ambiguity (Part a)
groupFirst [] = error "groupFirst: empty list"
groupFirst (First a' x s' k':pts) = go a' s' k' pts
  where go a s k [] = Right $ First a x s k
        go a s k (First a2 _ s2 k2:xs)
          | a == a2 && k == k2 = go a (unionBy consider s s2) k xs
          | a /= a2 = Left NotSameFirst
          | k /= k2 = Left NotSameHole
        go _ _ _ _ = error "groupFirst: not a First part"
groupFirst _ = error "groupFirst: not a First part"

groupMiddle :: Token a => [Part a] -> Maybe (Part a)
groupMiddle [] = error "groupMiddle: empty list"
groupMiddle (Middle ss' x s' k':pts) = go ss' s' k' pts
  where go ss s k [] = Just $ Middle ss x s k
        go ss s k (Middle ss2 _ s2 k2:xs)
          | not (considers ss ss2) = error "groupMiddle: different prefix"
          | k == k2 = go ss (unionBy consider s s2) k xs
        go _ _ _ _ = Nothing -- ambiguous middle parts
groupMiddle _ = error "groupMiddle: not a Middle part"

groupLast :: [Part a] -> Part a
groupLast [] = error "groupLast: empty list"
groupLast [l@(Last _)] = l
groupLast _ = error "groupLast: not a Last part"

----------------------------------------------------------------------
-- Combinators to construct the operator table
----------------------------------------------------------------------

-- | Build a infix operator. The precedence is set to 0.
infx :: Associativity -> a -> Op a
infx a f = Op1 True f [] (Infix a) 0

-- | Build a infix operator with the keep property set to False.
-- The precedence is set to 0.
infx_ :: Associativity -> a -> Op a
infx_ a f = Op1 False f [] (Infix a) 0

-- | Build a prefix operator. The precedence is set to 0.
prefx :: a -> Op a
prefx f = Op1 True f [] Prefix 0

-- | Build a prefix operator with the keep property set to False.
-- The precedence is set to 0.
prefx_ :: a -> Op a
prefx_ f = Op1 False f [] Prefix 0

-- | Build a postfix operator. The precedence is set to 0.
postfx :: a -> Op a
postfx f = Op1 True f [] Postfix 0

-- | Build a postfix operator with the keep property set to False.
-- The precedence is set to 0.
postfx_ :: a -> Op a
postfx_ f = Op1 False f [] Postfix 0

-- | Build a closed operator. The precedence is set to 0.
closed :: a -> Hole -> a -> Op a
closed f = Op2 True f []

-- | Build a closed operator with the keep property set to False.
-- The precedence is set to 0.
closed_ :: a -> Hole -> a -> Op a
closed_ f = Op2 False f []

-- | Add a new part separated by an SExpression hole to the right
-- of an operator.
sexpr :: Op a -> a -> Op a
sexpr (Op1 keep x rest opening p) y =
  Op1 keep x (rest++[(SExpression,y)]) opening p
sexpr (Op2 keep x rest k y) z =
  Op2 keep x (rest++[(k,y)]) SExpression z

-- | Add a new part separated by a Distfix hole to the right
-- of an operator.
distfix :: Op a -> a -> Op a
distfix (Op1 keep x rest opening p) y =
  Op1 keep x (rest++[(Distfix,y)]) opening p
distfix (Op2 keep x rest k y) z =
  Op2 keep x (rest++[(k,y)]) Distfix z

----------------------------------------------------------------------
-- A few 'show' functions for SExpr, and Tree
----------------------------------------------------------------------

-- | Show an s-expression using nested angle brackets.
showSExpr :: Token a => SExpr a -> String
showSExpr = tail . f
  where
  f (Atom s) = ' ' : toString s
  f (List []) = ' ' : "⟨⟩"
  f (List es) = ' ' : '⟨' : tail (concatMap f es) ++ "⟩"

-- Similar to showSExpr but for a Tree.
showTree :: Token a => Tree a -> String
showTree = tail . f
  where
  f (Leaf s) = ' ' : toString s
  f (Part y) = ' ' : concatMap toString (current y)
  f (Branch []) = ' ' : "⟨⟩"
  f (Branch es) = ' ' : '⟨' : tail (concatMap f es) ++ "⟩"

showPart :: Token a => Part a -> String
showPart = toString . symbol