module Term where

import IO ( Input, Output, input, output, parsecReader )

import qualified Text.ParserCombinators.Parsec.Token as T
import qualified Text.ParserCombinators.Parsec.Language as L
import qualified Text.ParserCombinators.Parsec.Expr as Expr
import qualified Text.ParserCombinators.Parsec as Parsec
import Text.ParserCombinators.Parsec
           ( CharParser, Parser, getPosition, (<|>), (<?>), )
import Text.ParserCombinators.Parsec.Pos
           ( SourcePos, )
import Text.ParserCombinators.Parsec.Expr
           ( Assoc(AssocLeft, AssocRight, AssocNone) )
import Text.PrettyPrint.HughesPJ ( Doc, (<+>), fsep, parens, render, text )

import qualified Data.Set as S
import Control.Monad.Exception.Synchronous ( Exceptional(Success,Exception) )
import Control.Monad ( liftM2, mzero )
import Control.Functor.HT ( void )
import Data.Char (isUpper, isLower)
import Data.Ord (comparing)


data Range = Range { start :: SourcePos , end :: SourcePos }
    deriving (Eq, Ord, Show)

data Identifier =
     Identifier { range :: Range, name :: String }

instance Eq Identifier where
-- | FIXME: this is ignoring the module.
-- for a complete implementation, we'd need fully qualified names
    i == j = name i == name j

instance Ord Identifier where
    compare = comparing name

isConstructor :: Identifier -> Bool
isConstructor i =
    case name i of
        c:_ -> c == '[' || c == ':' || isUpper c
        _ -> error "isConstructor: identifier must be non-empty"

isVariable :: Identifier -> Bool
isVariable i =
    case name i of
        c:_ -> isLower c || elem c ('_':operatorSymbols)
        _ -> error "isVariable: identifier must be non-empty"


lexer :: T.TokenParser st
lexer =
   T.makeTokenParser $ L.emptyDef {
      L.commentStart = "{-",
      L.commentEnd = "-}",
      L.commentLine = "--",
      L.nestedComments = True,
      L.identStart = identifierStart,
      L.identLetter = identifierLetter,
      L.opStart = operatorStart,
      L.opLetter = operatorLetter,
      L.caseSensitive = True,
      L.reservedNames = [ "module", "where", "import", "qualified"
                        , "as", "data", "class", "instance", "case", "of"
                        , "infix", "infixl", "infixr" ],
      L.reservedOpNames = [ "=", "::", "|" ]
      }


-- FIXME: this should be read from a file (Prelude.hs).
-- but then we need a parser that correctly handles fixity information
-- on-the-fly.
-- A simplified solution could be:
-- Allow fixity definitions only between import and the first declaration.
-- With this restriction we could parse the preamble first
-- and then start with a fresh parser for the module body.
-- For now, we hard-code Prelude's fixities:
{-


infixr 9  .
infixr 8  ^, ^^, **
infixl 7  *, /, `quot`, `rem`, `div`, `mod`
infixl 6  +, -

-- The (:) operator is built-in syntax, and cannot legally be given
-- a fixity declaration; but its fixity is given by:
--   infixr 5  :

infix  4  ==, /=, <, <=, >=, >
infixr 3  &&
infixr 2  ||
infixl 1  >>, >>=
infixr 1  =<<
infixr 0  $, $!, `seq`
-}

operators :: [[([Char], Assoc)]]
operators =
  [ [ ( ".", AssocRight ), ( "!!", AssocLeft ) ]
  , [ ( "^", AssocRight) ]
  , [ ( "*", AssocLeft), ("/", AssocLeft), ("%", AssocLeft), ("+:+", AssocRight) ]
  , [ ( "+", AssocLeft), ("-", AssocLeft), ("=:=", AssocRight) ]
  , [ ( ":", AssocRight ), ( "++", AssocRight ) ]
  , map ( \ s -> (s, AssocNone) ) [ "==", "/=", "<", "<=", ">=", ">" ]
  , [ ( "&&", AssocRight ) ]
  , [ ( "||", AssocRight ) ]
  , [ ( "$",  AssocRight ) ]
  ]

identifierStart, identifierLetter :: CharParser st Char
identifierStart = Parsec.letter <|> Parsec.char '_'

-- FIXME: check the distinction between '.' in qualified names, and as operator
identifierLetter =
    Parsec.alphaNum <|> Parsec.char '_' <|> Parsec.char '.'

identifierCore :: Parser String
identifierCore =
    liftM2 (:) identifierStart (Parsec.many identifierLetter)

identifier :: Parser String
identifier = T.identifier lexer

parenOperator :: Parser Identifier
parenOperator =
    T.parens lexer $ T.lexeme lexer $
    fmap (uncurry Identifier) $ ranged $
    liftM2 (:) operatorStart (Parsec.many operatorLetter)

infixOperator :: Parser Identifier
infixOperator =
    T.lexeme lexer $
    fmap (uncurry Identifier) $ ranged $
       Parsec.between (Parsec.char '`') (Parsec.char '`') identifierCore
       <|>
       liftM2 (:) operatorStart (Parsec.many operatorLetter)

symbol :: String -> Parser ()
symbol = void . T.symbol lexer

ranged :: CharParser st a -> CharParser st (Range, a)
ranged p = do
    from <- getPosition
    x <- p
    to <- getPosition
    return $ (Range from to, x)


instance Input Identifier where
  input =
      T.lexeme lexer $
      fmap (uncurry Identifier) $ ranged identifierCore

instance Output Identifier where
  output i = text $ name i

instance Show Identifier where show = render . output
instance Read Identifier where readsPrec = parsecReader


data Term = Node Identifier [ Term ]
          | Number Range Integer
          | StringLiteral Range String
    deriving ( Eq, Ord )

instance Show Term where show = render . output
instance Read Term where readsPrec = parsecReader


mainName :: Term
mainName = read "main"


{- |
simplifies case analysis
-}
viewNode :: Term -> Maybe (String, [Term])
viewNode (Node f xs) = Just (Term.name f, xs)
viewNode _ = Nothing

appendArguments :: Term -> [Term] -> Exceptional String Term
appendArguments g ys =
    case (g, ys) of
        (Node f xs, _) -> return $ Node f $ xs ++ ys
        (t, []) -> return t
        (t, _) ->
            Exception $
            unwords [ "cannot apply ", show t,
                      "to arguments like a function" ]

{- |
I would like to use 'T.stringLiteral'
but this skips trailing spaces
and we need the precise range of the literal.
However this implementation is very simplistic,
since T.stringChar is not exported.
-}
parseStringLiteral :: Parsec.GenParser Char st String
parseStringLiteral =
    flip (<?>) "literal string" $
--    fmap catMaybes $
    Parsec.between
        (Parsec.char '"')
        (Parsec.char '"' <?> "end of string")
        (Parsec.many (Parsec.noneOf $ '"':"\n\r\\"))
--        (Parsec.many (T.stringChar lexer))


parseAtom :: Parser Term
parseAtom =
        (T.lexeme lexer $ fmap (uncurry Number) $
         ranged (fmap read $ Parsec.many1 Parsec.digit))
    <|> fmap (uncurry StringLiteral)
             (T.lexeme lexer (ranged parseStringLiteral))
--    <|> fmap (uncurry StringLiteral) (ranged (T.stringLiteral lexer))
    <|> T.parens lexer input
    <|> bracketedList
    <|> fmap (flip Node []) input

parse :: Parser Term
parse = do
    t <- liftM2 appendArguments parseAtom $ Parsec.many parseAtom
    case t of
        Success t' -> return t'
        Exception e -> fail e

instance Input Term where
  input = Expr.buildExpressionParser table parse

operatorStart, operatorLetter :: CharParser st Char
operatorStart  = Parsec.oneOf operatorSymbols
operatorLetter = Parsec.oneOf operatorSymbols

operatorSymbols :: [Char]
operatorSymbols = ":!#$%&*+./<=>?@\\^|-~"

table :: Expr.OperatorTable Char st Term
table = map ( map binary ) operators

binary :: (String, Assoc) -> Expr.Operator Char st Term
binary (s, assoc) = flip Expr.Infix assoc $ do
    rng <- Parsec.try $ T.lexeme lexer $ do
        (rng,_) <- ranged $ Parsec.string s
        Parsec.notFollowedBy operatorLetter <?> ("end of " ++ show s)
        return rng
    return $ \ l r -> Node ( Identifier { name = s, range = rng } ) [ l, r ]


bracketedList :: Parser Term
bracketedList = do
    (r,_) <- ranged $ symbol "["
    insideBracketedList r

insideBracketedList :: Range -> Parser Term
insideBracketedList rng =
        do (r,_) <- ranged $ symbol "]"
           return $ Node ( Identifier { name = "[]", range = r } ) []
    <|> do x <- input
           q <- getPosition
           xs <-   do symbol "]" ; r <- getPosition
                      return $ Node ( Identifier { name = "[]", range = Range q r } ) []
               <|> do symbol "," ; r <- getPosition
                      insideBracketedList $ Range q r
           return $ Node ( Identifier { name = ":", range = rng } ) [ x, xs ]

instance Output Term where
  output t = case t of
     Number _ n -> text $ show n
     StringLiteral _ s -> text $ show s
     Node f args -> output f <+> fsep ( map protected args )

protected :: Term -> Doc
protected t = case t of
  Node _f (_:_) -> parens $ output t
  _ -> output t


termRange :: Term -> Range
termRange (Node i _) = range i
termRange (Number rng _) = rng
termRange (StringLiteral rng _) = rng

{- |
compute the number of nodes in the same depth
-}
breadths :: Term -> [ Int ]
breadths t = 1 : case t of
    Node _f xs -> foldl addList [] $ map breadths xs
    _ -> []

addList :: [ Int ] -> [ Int ] -> [ Int ]
addList (x:xs) (y:ys) = (x+y) : addList xs ys
addList [] ys = ys
addList xs [] = xs


type Position = [ Int ]

subterms :: Term -> [ (Position, Term) ]
subterms t = ( [], t ) : case t of
    Node _f xs -> do
        (k, x) <- zip [ 0.. ] xs
        (p, s) <- subterms x
        return (k : p, s)
    _ -> []

signature :: Term -> S.Set Identifier
signature t = S.fromList $ do
    (_p, Node f _xs) <- subterms t
    return f

peek :: Term -> Position -> Maybe Term
peek t [] = return t
peek (Node _f xs) (k : ks) =
    case drop k xs of
        x:_ -> peek x ks
        [] -> mzero
peek _ _  = mzero

poke :: Term -> Position -> Term -> Maybe Term
poke _t [] s = return s
poke (Node f xs) (k : ks) s =
    case splitAt k xs of
        (pre, x : post) -> do
            y <- poke x ks s
            return $ Node f $ pre ++ y : post
        (_, []) -> error "Term.poke: index too large"
poke _ (_:_) _ =
    error "Term.poke: cannot access a leaf with an index"