{-# LANGUAGE GADTs, RankNTypes, FlexibleInstances, UndecidableInstances #-}

module Text.Derp
  ( -- * Data Types
    Parser, Token(..)
  , -- * Parser construction
    (<|>), (<~>), (==>), (==>|), nul, pzip, ter, eps, epsM, emp
  , -- * Parser computation steps
    derive, compact, parseNull
  , -- * Full parsing and result extraction
    defaultCompactSteps, compactNum, deriveStepNum, runParseNum
  , deriveStep, runParse
  , -- * Demos
    xsR, xsL, xsIn, parens, parensIn, amb, ambIn, sexp, sexpIn

  ) where

import Control.Monad
import Data.Function
import Data.IORef
import Data.List
import Data.Map (Map)
import System.IO.Unsafe
import System.Mem.StableName
import Text.Printf
import Unsafe.Coerce
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set

-- | Represents both a formal context-free language and the
--   reduction of a member of that language to a value of type `a'.

--   Languages range of `Token' values.

data Parser a = Parser
  { parserRec      :: ParserRec Parser a
  , parserNullable :: FPValue Bool
  , parserDerive   :: Token -> Parser a
  , parserCompact  :: Parser a
  }

data ParserRec p a where
  Alt :: (ResultType a)               => p a -> p a -> ParserRec p a
  Con :: (ResultType a, ResultType b) => p a -> p b -> ParserRec p (a, b)
  Red :: (ResultType a, ResultType b) => (Set a -> Set b) -> p a -> ParserRec p b
  Nul :: (ResultType a)               => p a -> ParserRec p a
  Zip :: (ResultType a, ResultType b) => p a -> ContextR p a b -> ParserRec p b
  Ter ::                                 String -> ParserRec p String
  Eps :: (ResultType a)               => Set a -> ParserRec p a
  Emp :: (ResultType a)               => ParserRec p a

data ContextR p a b where
  ConContext :: (ResultType a, ResultType b) => p b -> ContextR p (a, b) c -> ContextR p a c
  RedContext :: (ResultType a, ResultType b) => (Set a -> Set b) -> ContextR p b c -> ContextR p a c
  TopContext :: (ResultType a) => ContextR p a a

type Context a b = ContextR Parser a b

class (Ord a) => ResultType a
instance (Ord a) => ResultType a

-- | The input type for parsing.  For example the parser:
--
-- > (ter "x") 
--
--   will parse:
--
-- > (Token "x" "foo") 
--
--   into:
--
-- > (eps "foo")

data Token = Token { tokenClass :: String, tokenValue :: String }
  deriving (Eq, Ord, Show)

parser :: (ResultType a) => ParserRec Parser a -> FPValue Bool -> Parser a
parser p n = fix $ \ self -> Parser p n (memoFun (deriveImp self)) (compactImp self)

-- | Alternation.
(<|>) :: (ResultType a) => Parser a -> Parser a -> Parser a
(<|>) a b = parser (Alt a b) FPUndecided 
-- | Concatenation.
(<~>) :: (ResultType a, ResultType b) => Parser a -> Parser b -> Parser (a, b)
(<~>) a b = parser (Con a b) FPUndecided 
-- | Reduction.
(==>) :: (ResultType a, ResultType b) => Parser a -> (a -> b) -> Parser b
(==>) p f = p ==>| Set.map f
-- | Set generalized version of `==>'.
(==>|) :: (ResultType a, ResultType b) => Parser a -> (Set a -> Set b) -> Parser b
(==>|) p f = parser (Red f p) FPUndecided 
-- | Null-parse extraction.
nul :: (ResultType a) => Parser a -> Parser a
nul p = parser (Nul p) FPUndecided 
-- | One-hole-context focus.
pzip :: (ResultType a, ResultType b) => Parser a -> Context a b -> Parser b
pzip p c = parser (Zip p c) (FPDecided False) 
-- | Terminal.
ter :: String -> Parser String
ter t = parser (Ter t) (FPDecided False) 
-- | Epsilon/empty-string.
eps :: (ResultType a) => a -> Parser a
eps = epsM . Set.singleton
-- | Set generalized version of `eps'.
epsM :: (ResultType a) => Set a -> Parser a
epsM e = parser (Eps e) (FPDecided True) 
-- | The empty language.
emp :: (ResultType a) => Parser a
emp = parser Emp (FPDecided False) 

infixr 3 <~>
infixr 1 <|>
infix 2 ==>, ==>|

-- | The main derivative function.

derive :: Parser a -> Token -> Parser a
derive = parserDerive

deriveImp :: Parser a -> Token -> Parser a
deriveImp p' x' = deriveImpRec (parserRec p') x'
  where
    deriveImpRec (Alt a b) x = derive a x <|> derive b x
    deriveImpRec (Con a b) x = derive a x <~> b <|> nul a <~> derive b x
    deriveImpRec (Red f a) x = derive a x ==>| f
    deriveImpRec (Nul _) _ = emp
    deriveImpRec (Zip p c) t = pzip (derive p t) c 
    deriveImpRec (Ter c) (Token x t) | c == x = eps t | otherwise = emp
    deriveImpRec (Eps _) _ = emp
    deriveImpRec Emp _ = emp

-- | The optimization step of the algorithm.

compact :: Parser a -> Parser a
compact = parserCompact

compactImp :: (Ord a) => Parser a -> Parser a
compactImp p = compactImpRec $ parserRec p
  where
    compactImpRec (Alt (Parser Emp _ _ _) (Parser Emp _ _ _)) = emp
    compactImpRec (Alt (Parser Emp _ _ _) b) = compact b
    compactImpRec (Alt a (Parser Emp _ _ _)) = compact a
    compactImpRec (Alt (Parser (Eps sM) _ _ _) (Parser (Eps tM) _ _ _)) = epsM (sM `Set.union` tM)
    compactImpRec (Alt a b) = (compact a <|> compact b) { parserNullable = parserNullable a <||> parserNullable b }
    compactImpRec (Con (Parser Emp _ _ _) _) = emp
    compactImpRec (Con _ (Parser Emp _ _ _)) = emp
    compactImpRec (Con (Parser (Eps sM) _ _ _) b) = compact b ==>| (\ xM -> Set.fromList [ (s, x) | s <- Set.toList sM, x <- Set.toList xM ])
    compactImpRec (Con a (Parser (Eps sM) _ _ _)) = compact a ==>| (\ xM -> Set.fromList [ (x, s) | x <- Set.toList xM, s <- Set.toList sM ])
    compactImpRec (Con a b) | parserNullable a == FPDecided False && parserNullable b == FPDecided False 
      = pzip (compact a) (ConContext (compact b) TopContext)
    compactImpRec (Con a b) = (compact a <~> compact b) { parserNullable = parserNullable a <&&> parserNullable b }
    compactImpRec (Red _ (Parser Emp _ _ _)) = emp
    compactImpRec (Red f (Parser (Eps sM) _ _ _)) = epsM (f sM)
    compactImpRec (Red f (Parser (Red g a) _ _ _)) = compact a ==>| f . g
    compactImpRec (Red f a) = (compact a ==>| f) { parserNullable = parserNullable a }
    compactImpRec (Nul (Parser (Con a b) _ _ _)) = nul (compact a) <~> nul (compact b)
    compactImpRec (Nul (Parser (Alt a b) _ _ _)) = nul (compact a) <|> nul (compact b)
    compactImpRec (Nul (Parser (Red f a) _ _ _)) = nul (compact a) ==>| f
    compactImpRec (Nul (Parser (Zip a c) _ _ _)) = pzip (nul a) (nulContext c)
    compactImpRec (Nul a@(Parser (Nul _) _ _ _)) = compact a
    compactImpRec (Nul (Parser (Eps sM) _ _ _)) = epsM sM
    compactImpRec (Nul (Parser (Ter _) _ _ _)) = emp
    compactImpRec (Nul (Parser Emp _ _ _)) = emp
    compactImpRec (Zip a TopContext) = compact a
    compactImpRec (Zip (Parser Emp _ _ _) _) = emp
    compactImpRec (Zip a c) | parserNullable a /= FPDecided False = unfoldOne (compactImp a) c
    compactImpRec (Zip (Parser (Zip a c) _ _ _) d) = pzip (compact a) (thread c d)
    compactImpRec (Zip (Parser (Red f a) _ _ _) c) = pzip (compact a) (RedContext f c)
    compactImpRec (Zip a c) = pzip (compact a) c
    compactImpRec (Ter _) = p
    compactImpRec (Eps sM) | sM == Set.empty = emp
    compactImpRec (Eps _) = p
    compactImpRec Emp = p

    nulContext :: Context a b -> Context a b
    nulContext (ConContext a c) = ConContext (nul a) (nulContext c)
    nulContext (RedContext f c) = RedContext f (nulContext c)
    nulContext TopContext = TopContext

    thread :: (ResultType a, ResultType b, ResultType c) => Context a b -> Context b c -> Context a c
    thread TopContext d = d
    thread (RedContext f c) d = RedContext f (thread c d)
    thread (ConContext a c) d = ConContext a (thread c d)

    unfoldOne :: (ResultType a, ResultType b) => Parser a -> Context a b -> Parser b
    unfoldOne a (ConContext b c) = pzip (a <~> b) c
    unfoldOne a (RedContext f c) = unfoldOne (a ==>| f) c
    unfoldOne _ TopContext = error "cannot unfold top"

-- | Extract the parse-null set of a parser.

parseNull :: (ResultType a) => Parser a -> Set a
parseNull p = work $ nul p
  where
    work (Parser (Eps sM) _ _ _) = sM
    work (Parser Emp _ _ _) = Set.empty
    work other = work $ compact other

-- running parsers

-- | The number of compact steps that usually keeps a parser constant in size
--   while parsing.
defaultCompactSteps :: Int
defaultCompactSteps = 10

-- | A specified number of compactions.
compactNum :: Int -> Parser a -> Parser a
compactNum 0 p = p
compactNum n p = compactNum (n - 1) (compact p)

-- | Derivation followed by specified number of compactions.
deriveStepNum :: Int -> Parser a -> Token -> Parser a
deriveStepNum n p i = compactNum n $ derive p i

-- | Parse using a specified number of intermediate compactions.
runParseNum :: (ResultType a) => Int -> Parser a -> [Token] -> Set a
runParseNum _ p [] = parseNull p
runParseNum n p (i:is) = runParseNum n (deriveStepNum n p i) is

-- | Derivation followed by the default number of compactions.
deriveStep :: Parser a -> Token -> Parser a
deriveStep = deriveStepNum defaultCompactSteps

-- | Parse using the default number of intermediate compactions.  This is the
--   main parsing function.  Examples:
--
-- > let e =     ter "num"
-- >         <|> e <~> ter "+" <~> e ==> (\(x1,(o,x2)) -> "(" ++ x1 ++ o ++ x2 ++ ")")
-- > in runParse e [Token "num" "1", Token "+" "+", Token "num" 3", Token "+" "+", Token "num" "5"]
--
-- evaluates to:
--
-- > Set.fromList ["((1+3)+5)", "(1+(3+5))"]
--
-- > let e =     ter "num" ==> read 
-- >         <|> e <~> ter "+" <~> e ==> (\(x1,(_,x2)) -> x1 + x2)
-- > in runParse e [Token "num" "1", Token "+" "+", Token "num" 3", Token "+" "+", Token "num" "5"]
--
-- evaluates to:
--
-- > Set.fromList [9]
--
runParse :: (ResultType a) => Parser a -> [Token] -> Set a
runParse = runParseNum defaultCompactSteps

-- inspecting parsers

parserChildren :: Parser a -> [GenParser]
parserChildren = parserRecChildren . parserRec
  where
    parserRecChildren (Con a b) = [genParser a, genParser b]
    parserRecChildren (Alt a b) = [genParser a, genParser b]
    parserRecChildren (Red _ a) = [genParser a]
    parserRecChildren (Nul a)   = [genParser a]
    parserRecChildren (Zip a _) = [genParser a]
    parserRecChildren (Ter _)   = []
    parserRecChildren (Eps _)   = []
    parserRecChildren Emp       = []

foldlParserChildrenM :: (forall b. t -> Parser b -> IO t) -> t -> Parser a -> IO t
foldlParserChildrenM f i p = foldM g i $ parserChildren p
  where
    g t (GenParser h) = h (f t)

newtype GenParser = GenParser { unGenParser :: forall c. (forall b. Parser b -> c) -> c }

genParser :: Parser a -> GenParser
genParser p = GenParser $ \ f -> f p

runGenParser :: (forall b. Parser b -> c) -> GenParser -> c
runGenParser f g = unGenParser g f

data ParserRecType = ConType | AltType | RedType | NulType | ZipType | TerType | EpsType | EmpType
  deriving (Eq, Ord, Show)

parserType :: Parser a -> ParserRecType
parserType = parserRecType . parserRec
  where
    parserRecType (Con _ _) = ConType
    parserRecType (Alt _ _) = AltType
    parserRecType (Red _ _) = RedType
    parserRecType (Nul _)   = NulType
    parserRecType (Zip _ _) = ZipType
    parserRecType (Ter _)   = TerType
    parserRecType (Eps _)   = EpsType
    parserRecType Emp       = EmpType

type ParserInspect t =  (forall a. Parser a -> IO Integer) 
                     -> (forall a. Parser a -> IO Bool) 
                     -> (forall a. Parser a -> IO t)

inspectParser :: ParserInspect t -> Parser a -> t
inspectParser f p = unsafePerformIO $ do
  reifiedPt <- newIORef Map.empty
  seenPt    <- newIORef Map.empty
  uidPt     <- newIORef 1
  f (lookupId reifiedPt uidPt) (seenId seenPt) p

lookupId :: IORef (Map Int [(StableName (), Integer)]) 
         -> IORef Integer 
         -> Parser a 
         -> IO Integer
lookupId reifiedPt uidPt p 
  | p `seq` True = do
    stblName <- genericStableName p
    let stblNameHashed = hashStableName stblName
    lookupValM <- liftM (extraLookup stblNameHashed stblName) $ readIORef reifiedPt
    case lookupValM of
      (Just lookupVal) -> return lookupVal
      Nothing          -> do
        thisId <- readIORef uidPt
        modifyIORef uidPt (+ 1)
        modifyIORef reifiedPt $ Map.insertWith (++) stblNameHashed [(stblName, thisId)]
        return thisId
  | otherwise = error "seq failed"

seenId :: IORef (Map Int [(StableName (), ())]) -> Parser a -> IO Bool
seenId seenPt p 
  | p `seq` True = do 
    stblName <- genericStableName p
    let stblNameHashed = hashStableName stblName
    lookupValM <- liftM (extraLookup stblNameHashed stblName) $ readIORef seenPt
    case lookupValM of
      (Just ()) -> return True
      Nothing -> do
        modifyIORef seenPt $ Map.insertWith (++) stblNameHashed [(stblName, ())]
        return False
  | otherwise = error "seq failed"

genericStableName :: a -> IO (StableName ())
genericStableName = liftM unsafeCoerce . makeStableName

extraLookup :: Int -> StableName () -> Map Int [(StableName (), a)] -> Maybe a
extraLookup hashed key m = process $ Map.lookup hashed m
  where
    process x = case x of
      (Just [])                                 -> Nothing
      (Just ((key', reified):xs)) | key == key' -> Just reified
                                  | otherwise   -> process (Just xs)
      Nothing                                   -> Nothing

type ParserFoldL t = forall a. t -> Parser a -> Integer -> Integer -> [Integer] -> t

parserDeepFoldL :: ParserFoldL t -> t -> Parser a -> t
parserDeepFoldL f i = inspectParser $ inspectf f i

inspectf :: ParserFoldL t -> t -> ParserInspect t
inspectf f i uidM isSeenM p = do
  isSeen <- isSeenM p
  if isSeen then return i else do
    uid <- uidM p
    cuids <- mapM (runGenParser uidM) $ parserChildren p
    let pid = hashStableName (unsafePerformIO (genericStableName p))
    let next = f i p uid (fromIntegral pid) cuids
    foldlParserChildrenM (\t p' -> inspectf f t uidM isSeenM p') next p

data ParserInfo = ParserInfo Integer -- uid
                             Integer -- pid
                             ParserRecType -- type
                             (FPValue Bool) -- nullable
                             [Integer] -- children

parserToGraph :: Parser a -> [ParserInfo]
parserToGraph = reverse . parserDeepFoldL f []
  where
    f :: ParserFoldL [ParserInfo]
    f others p uid pid childrenids = ParserInfo uid
                                                pid
                                                (parserType p)
                                                (parserNullable p)
                                                childrenids
                                   : others

showParserGraph :: [ParserInfo] -> String
showParserGraph ps = printf "SIZE: %s \n" (show (length ps)) ++ intercalate "\n" (map showParserGraphSingle ps)
  where
    showParserGraphSingle :: ParserInfo -> String
    showParserGraphSingle (ParserInfo uid pid ptype n children) = 
      printf "%-6s%-6s%-10s%-10s%-10s" 
             (show uid)
             (show pid)
             (show ptype)
             (showFPBool n)
             (show children)

instance Show (Parser a) where
  show = showParserGraph . parserToGraph

-- FPValue

data FPValue a = FPDecided a | FPUndecided
  deriving (Eq, Ord, Show)

showFPBool :: FPValue Bool -> String
showFPBool (FPDecided True) = "True"
showFPBool (FPDecided False) = "False"
showFPBool FPUndecided = "Undecided"

(<&&>) :: FPValue Bool -> FPValue Bool -> FPValue Bool
(<&&>) (FPDecided False) _ = FPDecided False
(<&&>) _ (FPDecided False) = FPDecided False
(<&&>) FPUndecided _ = FPUndecided
(<&&>) _ FPUndecided = FPUndecided
(<&&>) (FPDecided x) (FPDecided y) = FPDecided (x && y)

(<||>) :: FPValue Bool -> FPValue Bool -> FPValue Bool
(<||>) (FPDecided True) _ = FPDecided True
(<||>) _ (FPDecided True) = FPDecided True
(<||>) FPUndecided _ = FPUndecided
(<||>) _ FPUndecided = FPUndecided
(<||>) (FPDecided x) (FPDecided y) = FPDecided (x || y)

-- util


memoFun :: (Ord a) => (a -> b) -> a -> b
memoFun f = unsafePerformIO $ do
  mapRef <- newIORef Map.empty
  return $ \a -> unsafePerformIO $ do
  currMap <- readIORef mapRef
  let vM = Map.lookup a currMap
  case vM of
    Just b -> return b
    Nothing -> do
      let b = f a
      writeIORef mapRef $ Map.insert a b currMap
      return b

-- demos

xsR :: () -> Parser String
xsR () = p
  where
    p = eps "" <|> ter "x" <~> p ==> uncurry (++)

xsL :: () -> Parser String
xsL () = p
  where
    p = eps "" <|> p <~> ter "x" ==> uncurry (++)

xsIn :: [Token]
xsIn = replicate 60 (Token "x" "x")

parens :: () -> Parser String
parens () = p
  where
    p = eps "" <|> ter "(" <~> p <~> ter ")" ==> (\(s1,(s2,s3)) -> s1 ++ s2 ++ s3)

parensIn :: [Token]
parensIn = replicate 80 (Token "(" "(") ++ replicate 80 (Token ")" ")")

amb :: () -> Parser String
amb () = p
  where
    p = ter "1" <|> p <~> ter "+" <~> p ==> (\(s1,(s2,s3)) -> "(" ++ s1 ++ s2 ++ s3 ++ ")")

ambIn :: [Token]
ambIn = intersperse (Token "+" "+") (replicate 7 (Token "1" "1"))

sexp :: () -> Parser String
sexp () = p
  where
    p = ter "(" <~> pl <~> ter ")" ==> (\(s1,(s2,s3)) -> s1 ++ s2 ++ s3) <|> ter "s"
    pl = p <~> pl ==> uncurry (++) <|> eps ""

sexpIn :: [Token]
sexpIn = map (\x -> Token x x) $ words "( s ( s ( s s ( s s s ( s s s ( s ) ( s s ) s s ) s s ) s ) s ) )"