{-|
Module:     Codec.Parser.Core
Copyright:  Jeremy List
License:    BSD-3
Maintainer: quick.dudley@gmail.com

Core functions and types.
-}
{-# LANGUAGE RankNTypes, MultiParamTypeClasses, FunctionalDependencies #-}
module Codec.Phaser.Core (
  Automaton,
  Phase,
  Link(..),
  get,
  put,
  put1,
  count,
  yield,
  eof,
  (<??>),
  (<?>),
  (>#>),
  starve,
  toAutomaton,
  fromAutomaton,
  beforeStep,
  step,
  extract,
  toReadS,
  run,
  parse_,
  parse1_,
  options,
  readCount,
  outputs
 ) where

import Control.Applicative
import Control.Monad

-- | Represents a nondeterministic computation in progress.
-- There are 4 type parameters: a counter type (may be used for tracking line
-- and column numbers), an input type, an incremental output type, and a final
-- output type.
data Automaton p i o a =
  Result a |
  Ready (i -> Automaton p i o a) ([String] -> [String]) |
  Failed ([String] -> [String]) |
  Automaton p i o a :+++ Automaton p i o a |
  Yield o (Automaton p i o a) |
  Count (p -> p) (Automaton p i o a)

-- | A type for building 'Automaton' values. 'Monad' and 'Applicative' instances
-- are defined for this type rather than for 'Automaton' in order to avoid
-- traversing the entire call stack for every input value.
newtype Phase p i o a =
  Phase (([String] -> [String]) ->
    forall b . (a -> Automaton p i o b) -> Automaton p i o b)

infixr 4 >>#
-- | Class for types which consume and produce incremental input and output.
class Link s d l | s d -> l where
  -- | Take the incremental output of the first argument and use it as input
  -- for the second argument. Discard the final output of the first argument.
  (>>#) :: s p b c x -> d p c t a -> l p b t a

instance Functor (Phase p i o) where
  fmap f (Phase x) = Phase (\e c -> x e (c . f))

instance Applicative (Phase p i o) where
  pure a = Phase (\e c -> c a)
  Phase f <*> Phase a = Phase (\e c -> f e (\f' -> a e (c . f')))
  Phase a <* Phase b = Phase (\e c -> a e (\a' -> b e (\_ -> c a')))
  (*>) = (>>)

-- This is the first time I've ever created a separate '>>' method for a monad
-- instance, but turns out in this case it's a significant optimization.
instance Monad (Phase p i o) where
  return = pure
  fail s = Phase (\e _ -> Failed (e . (s:)))
  Phase a >>= f = Phase (\e c -> a e (\a' -> let Phase b = f a' in b e c))
  Phase a >> Phase b = Phase (\e c -> a e (const (b e c)))

instance Alternative (Phase p i o) where
  empty = Phase (\e _ -> Failed e)
  Phase a <|> Phase b = Phase (\e c -> prune1 (a e c :+++ b id c))
  many a = some a <|> pure []
  some (Phase a) = Phase (\e c -> let
    go acc = a e (\x -> let acc' = acc . (x:) in prune1 (go acc' :+++ c (acc' [])))
    in go id
   )

instance MonadPlus (Phase p i o) where
  mzero = empty
  mplus = (<|>)

instance Functor (Automaton p i o) where
  fmap f = go where
    go (Result a) = Result (f a)
    go (Ready n e) = Ready (fmap go n) e
    go (Failed e) = Failed e
    go (a :+++ b) = go a :+++ go b
    go (Yield o r) = Yield o (go r)
    go (Count p r) = Count p (go r)

instance Link Phase Automaton Phase where
  (>>#) = link_p_a

link_p_a :: Phase p i t z -> Automaton p t o a -> Phase p i o a
{-# INLINE [1] link_p_a #-}
link_p_a s d = fromAutomaton (toAutomaton s >># d)

instance Link Phase Phase Phase where
  (>>#) = link_p_p

link_p_p :: Phase p i t z -> Phase p t o a -> Phase p i o a
{-# INLINE [1] link_p_p #-}
link_p_p s d = s >># toAutomaton d

{-# RULES
">>#/>>#/1.1"
  forall (a :: Phase p b c x) (b :: Phase p c t r) (c :: Phase p t g o) .
    link_p_p a (link_p_p b c) = link_p_a a (toAutomaton b >># toAutomaton c)
">>#/>>#/1.2"
  forall (a :: Phase p b c x) (b :: Phase p c t r) (c :: Automaton p t g o) .
    link_p_p a (link_p_a b c) = link_p_a a (toAutomaton b >># c)
">>#/>>#/2.1"
  forall
    (a :: Phase p b c x)
    (b :: Phase p c t y)
    (c :: Phase p t o z) .
     link_p_p (link_p_p a b) c = link_p_p a (link_p_p b c)
">>#/>>#/2.2"
  forall
    (a :: Phase p b c x)
    (b :: Phase p c t y)
    (c :: Automaton p t o z) .
     link_p_a (link_p_p a b) c = link_p_p a (link_p_a b c)
">>#/>>#/2.3"
  forall
    (a :: Phase p b c x)
    (b :: Automaton p c t y)
    (c :: Automaton p t o z) .
     link_p_a (link_p_a a b) c = link_p_a a (b >># c)
 #-}

instance Link Automaton Automaton Automaton where
  {-# INLINE [2] (>>#) #-}
  (>>#) = (!!!) where
    Yield o r !!! d = case beforeStep d of
      Left e -> e
      Right d' -> r !!! step d' o
    Failed e !!! _ = Failed e
    _ !!! Failed e = Failed e
    Result _ !!! d = starve d
    (a :+++ b) !!! d = prune1 ((a !!! d) :+++ (b !!! d))
    Count p r !!! d = prune1 (Count p (r !!! d))
    s !!! Count p r = prune1 (Count p (s !!! r))
    s !!! Yield o r = Yield o (s !!! r)
    Ready n e !!! d = Ready (\t -> n t !!! d) e

{- 
{-# RULES
"toAutomaton/fromAutomaton" forall a . toAutomaton (fromAutomaton a) = a
"fromAutomaton/toAutomaton" forall a . fromAutomaton (toAutomaton a) = a
 #-} -}

-- | When the right argument fails: apply the left argument to the list of
-- error messages. Depreciated because it doesn't work correctly for all
-- arguments and fixing it would break the 'Alternative' and 'MonadPlus'
-- instances.
infixr 1 <??>
{-# WARNING (<??>) "<??> is faulty and will be removed in future versions. \
  \Please use <?> instead" #-}
(<??>) :: ([String] -> [String]) -> Phase p i o a -> Phase p i o a
f <??> Phase s = Phase (\e -> s (f . e))

-- | If parsing fails in the right argument: prepend the left argument to the
-- errors
infixr 1 <?>
e <?> Phase s = Phase (\e1 -> s ((e :) . e1))

-- | Change the counter type of a Phase object.
infixr 1 >#>
(>#>) :: ((p0 -> p0) -> p -> p) -> Phase p0 i o a -> Phase p i o a
{-# INLINABLE [1] (>#>) #-}
f >#> p = fromAutomaton $ go $ toAutomaton p where
  go (Result a) = Result a
  go (Ready n e) = Ready (fmap go n) e
  go (Failed e) = Failed e
  go (a :+++ b) = go a :+++ go b
  go (Yield t r) = Yield t (go r)
  go (Count p r) = Count (f p) (go r)

{-# RULES
">#>/>#>" forall pt2 pt1 a . pt2 >#> (pt1 >#> a) = (pt2 . pt1) >#> a
  #-}

-- | Return one item of the input.
get :: Phase p i o i
get = Phase (flip Ready)

-- | Modify the counter
count :: (p -> p) -> Phase p i o ()
{-# INLINE [1] count #-}
count f = Phase (\_ c -> Count f (c ()))

-- | Yield one item for the incremental output
yield :: o -> Phase p i o ()
{-# INLINE [1] yield #-}
yield o = Phase (\_ c -> Yield o (c ()))

-- | Fail if any more input is provided.
eof :: Phase p i o ()
eof = Phase (\e c -> prune1 (Failed e :+++ starve (c ())))

-- | Insert one value back into the input. May be used for implementing lookahead
put1 :: i -> Phase p i o ()
{-# INLINE [1] put1 #-}
put1 i = Phase (\_ c -> case beforeStep (c ()) of
  Right n -> step n i
  Left e -> e
 )

-- | Put a list of values back into the input.
put :: [i] -> Phase p i o ()
{-# INLINE [1] put #-}
put i = Phase (\_ c -> run (c ()) i)

{-# RULES
"count/yield" forall p o . count p >> yield o = yield o >> count p
  #-}

{-# INLINABLE [1] prune1 #-}
prune1 (Failed e1 :+++ Failed e2) = Failed (e1 . e2)
prune1 (Failed e1 :+++ Ready n e2) = Ready n (e1 . e2)
prune1 (Ready n e1 :+++ Failed e2) = Ready n (e1 . e2)
prune1 (Ready n1 e1 :+++ Ready n2 e2) =
  Ready (\i -> prune1 $ n1 i :+++ n2 i) (e1 . e2)
prune1 (r@(Result _) :+++ Failed _) = r
prune1 (Failed _ :+++ r@(Result _)) = r
prune1 (f@(Failed _) :+++ (a :+++ b)) = prune1 (prune1 (f :+++ a) :+++ b)
prune1 ((a :+++ b) :+++ f@(Failed _)) = prune1 (a :+++ prune1 (b :+++ f))
prune1 (f@(Failed _) :+++ Yield o r) = prune1 (Yield o (prune1 (f :+++ r)))
prune1 (Yield o r :+++ f@(Failed _)) = prune1 (Yield o (prune1 (r :+++ f)))
prune1 (Count p (Count q r)) = prune1 $ Count (\w -> let
  w' = p w
  in w' `seq` q w') r
prune1 (Count p (Yield o r)) =
  prune1 (Yield o (prune1 (Count p r)))
prune1 (Yield _ f@(Failed _)) = f
prune1 (Yield _ f@(Count _ (Failed _))) = f
prune1 a = a

{-# RULES
"prune1/prune1" forall a . prune1 (prune1 a) = prune1 a
  #-}

-- | Remove an 'Automaton''s ability to consume further input
starve :: Automaton p i o a -> Automaton p z o a
{-# INLINABLE [1] starve #-}
starve (Result a) = Result a
starve (Ready _ e) = Failed e
starve (Failed e) = Failed e
starve (a :+++ b) = prune1 (starve a :+++ starve b)
starve (Yield o r) = prune1 (Yield o (starve r))
starve (Count p r) = prune1 (Count p (starve r))

{-# RULES
"starve/starve" forall a . starve (starve a) = starve a
  #-}

-- | Convert a 'Phase' to an 'Automaton'. Subject to fusion.
toAutomaton :: Phase p i o a -> Automaton p i o a
{-# INLINE[2] toAutomaton #-}
toAutomaton (Phase c) = c id Result

-- | Convert an 'Automaton' back to a 'Phase' (somewhat inefficient). Subject
-- to fusion.
fromAutomaton :: Automaton p i o a -> Phase p i o a
{-# INLINE[2] fromAutomaton #-}
fromAutomaton a = Phase (\e' c -> let
  continue (Result r) = c r
  continue (Ready n e) = Ready (fmap continue n) (e' . e)
  continue (Failed e) = Failed (e' . e)
  continue (l :+++ r) = prune1 (continue l :+++ continue r)
  continue (Count p r) = prune1 (Count p (continue r))
  continue (Yield o r) = prune1 (Yield o (continue r))
  in continue a
 )

-- | Optional pre-processing of an automaton before passing it more input.
-- Produces 'Right' with all "final outputs" and errors stripped if the
-- automaton can accept more input, and 'Left' with everything except errors
-- stripped if it cannot accept more input.
beforeStep :: Automaton p i o a ->
  Either (Automaton p v o a) (Automaton p i o a)
beforeStep = go where
  go :: Automaton p i o a ->
    Either (Automaton p v o a) (Automaton p i o a)
  go (Result _) = Left (Failed id)
  go r@(Ready _ _) = Right r
  go (Failed f) = Left $ Failed f 
  go (a :+++ b) = case (go a, go b) of
    (Right a', Right b') -> Right $ prune1 $ a' :+++ b'
    (a'@(Right _), Left _) -> a'
    (Left _, b'@(Right _)) -> b'
    (Left a', Left b') -> Left $ prune1 $ a' :+++ b'
  go (Yield o r) = case go r of
    r'@(Left _) -> r'
    Right r' -> Right (prune1 $ Yield o r')
  go (Count p r) = case go r of
    Left r' -> Left $ prune1 $ Count p r'
    Right r' -> Right $ prune1 $ Count p r'

-- | Pass one input to an automaton
step :: Automaton p i o a -> i -> Automaton p i o a
step a' i = go a' where
  go (Result _) = Failed id
  go (Ready n _) = n i
  go (Failed e) = Failed e
  go (a :+++ b) = prune1 (go a :+++ go b)
  go (Yield o r) = prune1 (Yield o (go r))
  go (Count p r) = prune1 (Count p (go r))

-- | Take either counters with errors or a list of possible results from an
-- automaton.
extract :: p -> Automaton p i o a -> Either [(p,[String])] [a]
extract p' a = case go p' a of
  Left e -> Left $ map (\(p,e') -> (p, e' [])) (e [])
  Right r -> Right $ r []
 where
  go _ (Result z) = Right (z:)
  go p (Ready _ e) = Left ((p,e):)
  go p (Failed e) = Left ((p,e):)
  go p (a :+++ b) = case (go p a, go p b) of
    (Right a', Right b') -> Right (a' . b')
    (a'@(Right _), Left _) -> a'
    (Left _, b'@(Right _)) -> b'
    (Left a', Left b') -> Left (a' . b')
  go p (Yield _ r) = go p r
  go p (Count i r) = let
    p' = i p
    in p' `seq` go p' r

-- | Create a 'ReadS' like value from an 'Automaton'. If the Automaton's input
-- type is 'Char', the result will be 'ReadS'
toReadS :: Automaton p i o a -> [i] -> [(a,[i])]
toReadS a i = go a i [] where
  go (Result r) i' = ((r,i'):)
  go (Ready _ _) [] = id
  go (Ready n _) (t:r) = go (n t) r
  go (Failed _) _ = id
  go (a :+++ b) i' = go a i' . go b i'
  go (Yield _ r) i' = go r i'
  go (Count _ r) i' = go r i'

-- | Pass a list of input values to an 'Automaton'
run :: Automaton p i o a -> [i] -> Automaton p i o a
run = go where
  go a [] = a
  go a (i:r) = case beforeStep a of
    Left a' -> a'
    Right a' -> go (step a' i) r

-- | Use a 'Phase' value similarly to a parser.
parse_ :: p -> Phase p i o a -> [i] -> Either [(p,[String])] [a]
parse_ p a i = extract p $ run (toAutomaton a) i

-- | Use a 'Phase' as a parser, but consuming a single input instead of a list
parse1_ :: p -> Phase p i o a -> i -> Either [(p,[String])] [a]
parse1_ p a i = extract p $ step (toAutomaton a) i

-- | Decompose an 'Automaton' into its component options.
options :: Automaton p i o a -> [Automaton p i o a]
options = ($ []) . go where
  go (a :+++ b) = go a . go b
  go (Yield o r) = (fmap . fmap) (Yield o) $ go r
  go (Count p r) = (fmap . fmap) (Count p) $ go r
  go a = (a :)

-- | Separate unconditional counter modifiers from an automaton
readCount :: Automaton p i o a -> (p -> p, Automaton p i o a)
readCount = go where
  go (Count p0 r) = let
    (p1, r') = go r
    p' c = let
      c' = p0 c
      in c' `seq` p1 c'
    in (p', r')
  go (Yield o r) = let
    (p, r') = go r
    in (p, prune1 $ Yield o r')
  go a = (id, a)

-- | Separate the values unconditionally yielded by an automaton
outputs :: Automaton p i o a -> ([o], Automaton p i o a)
outputs = go where
  go (Yield o r) = let
    (o', r') = go r
    in (o:o', r')
  go (Count p r) = let
    (o, r') = go r
    in (o, prune1 $ Count p r')
  go a = ([], a)