{-
    gulcii -- graphical untyped lambda calculus interpreter
    Copyright (C) 2011, 2013  Claude Heiland-Allen

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-}

module Parse where

import Control.Applicative(Applicative, Alternative, pure, empty, (<|>), (<$>), (<*>), (<*))

{-
Strip comments
--------------

A comment is everything from '#' to the end of the line.
-}

decomment :: String -> String
decomment = concatMap (fst . break ('#'==)) . lines

{-
Tokenize
--------
-}

digits :: String
digits = "0123456789"

lowers :: String
lowers = "abcdefghijklmnopqrstuvwxyz"

uppers :: String
uppers = "ABCDEFGHIJKLMNOPQRSTUVWXYZ@"

alpha :: String
alpha = lowers ++ uppers

alphanum :: String
alphanum = alpha ++ digits

symbols :: String
symbols = "\\.!?()[],=~:{}"

spaces :: String
spaces = " "

{-
Split a string into tokens (each itself a string), such that each token
consists of either all digits, all letters, or a single symbol.  Use
whitespace to separate tokens.
-}

tokenize :: String -> Maybe [String]
tokenize [] = Just []
tokenize (c:cs)
  | c `elem` digits  = let (t,ts) = span (`elem` digits) cs
                       in ((c:t):) <$> tokenize ts
  | c `elem` lowers  = let (t,ts) = span (`elem` alphanum) cs
                       in ((c:t):) <$> tokenize ts
  | c `elem` symbols = ([c]:) <$> tokenize cs
  | c `elem` spaces  = tokenize cs
  | otherwise = Nothing

{-
Parsing primitives
------------------

A parser takes a list of tokens to a list of possible partial parses.
-}

newtype Parser s t = P{ unP :: [s] -> [(t, [s])] }

instance Functor (Parser s) where
  fmap f (P p) = P (\q -> [ (f v, s) | (v,s) <- p q ] )

instance Applicative (Parser s) where
  pure f = P (\q -> [(f, q)])
  P p1 <*> P p2 = P (\q -> [ (u v, t) | (u, s) <- p1 q, (v, t) <- p2 s ])

instance Alternative (Parser s) where
  empty = P (\_ -> [])
  P p1 <|> P p2 = P (\q -> p1 q ++ p2 q)

{-
Accept a token that satisfies a predicate.
-}

satisfy :: (s -> Bool) -> Parser s s
satisfy p = P (\q -> case q of
  (x:xs) | p x -> [(x,xs)]
  _ -> [])

{-
Accept a specific token.
-}

sym :: Eq s => s -> Parser s s
sym a = satisfy (== a)

{-
Accept some p's separated by s's.
-}

someSep :: Parser s a -> Parser s t -> Parser s [t]
someSep s p = ((:[]) <$> p) <|> ((:) <$> p <* s <*> someSep s p)

{-
Accept some p's separated by s's, or nothing.
-}

manySep :: Parser s a -> Parser s t -> Parser s [t]
manySep s p = pure [] <|> someSep s p

{-
Accept a name consisting of letters.
-}

name :: Parser String String
name = P (\q -> case q of
  (p@(r:_):ps) | r `elem` lowers -> [(p, ps)]
  _ -> [])

{-
Accept an integer consisting of digits.
-}

integer :: Parser String Integer
integer = P (\q -> case q of
  (p:ps) | all (`elem` digits) p -> [(read p, ps)]
  _ -> [])