{-# OPTIONS_GHC -fno-warn-name-shadowing -fno-warn-orphans #-}

module Hydrogen.Parsing (
    module Text.Parsec.Combinator
  , module Text.Parsec.Prim
  , module Text.Parsec.Pos
  , Parser
  , ParseError
  , SomethingBad
  , Tokens
  , runTokenParser
  , mkError
  , sourceToken
  , manyBetween
  , (>+>)
  , (<+<)
  , sya
  , ignoreUnderscores
  , tryRead
  , tryReads
  , tryReadDecimal
  , tryReadRational
  , tryReadHex
  , tryReadUUID
  , tryReadVersion
  , tryReadDateTime
  , tryReadDate
  , tryReadTime
  , tryReadBool
  , tryReadLink
  ) where

import Hydrogen.Prelude

import Text.Parsec.Combinator
import Text.Parsec.Error
import Text.Parsec.Pos
import Text.Parsec.Prim


type SomethingBad = [(SourcePos, String)]
type Parser source result = source -> Either SomethingBad result
type Tokens t = [(SourcePos, t)]


instance Serialize SourcePos where

    put pos = do
        let line = sourceLine pos
            col  = sourceColumn pos
            name = sourceName pos
        putWord32be (fromIntegral line)
        putWord32be (fromIntegral col)
        put name

    get = do
        line <- fromIntegral <$> getWord32be
        col  <- fromIntegral <$> getWord32be
        name <- get
        return (newPos name line col)


mkError :: ParseError -> Either SomethingBad b
mkError e = Left $ map ((errorPos e,) . messageToString) (errorMessages e)
  where
    messageToString = \case
        SysUnExpect msg -> "Unexpected " ++ msg
        UnExpect msg -> "Unexpected " ++ msg
        Expect msg -> "Expected " ++ msg
        Message msg -> msg


runTokenParser :: (Stream a Identity t) => ParsecT a () Identity b -> Parser a b
runTokenParser p = either mkError Right . runIdentity . runParserT p () ""


sourceToken :: (Show t, Stream (Tokens t) m (SourcePos, t))
    => (t -> Maybe a)
    -> ParsecT [(SourcePos, t)] u m a
sourceToken f = tokenPrim (show . snd) nextPos (f . snd)
  where
    nextPos p _ = \case
        ((p', _) : _) -> p'
        _ -> p


manyBetween :: (Monad m, Stream s m t)
    => ParsecT s u m open -> ParsecT s u m close -> ParsecT s u m p -> ParsecT s u m [p]
manyBetween o c p = o *> manyTill p c


(>+>) :: Parser a b -> Parser b c -> Parser a c
p1 >+> p2 = join <$> fmap p2 <$> p1


(<+<) :: Parser b c -> Parser a b -> Parser a c
(<+<) = flip (>+>)


tryRead :: (Monad m) => ReadS a -> String -> m a
tryRead p s = case p s of
    [(val, "")] -> return val
    [] -> fail "no parse"
    _ -> fail "ambiguous parse"

tryReads :: (Monad m, Read a) => String -> m a
tryReads = tryRead reads


ignoreUnderscores :: String -> String
ignoreUnderscores = \case
    x : xs -> x : ignore xs
    xs -> xs
  where
    ignore = \case
        xs@(x : '_' : _) | not (isAlphaNum x) -> xs
        '_' : x : xs | isAlphaNum x -> x : ignore xs
        x : xs -> x : ignore xs
        xs -> xs

tryReadDecimal :: String -> Maybe Rational
tryReadDecimal = \case
    ('-' : xs) -> negate <$> readRational xs
    ('+' : xs) -> readRational xs
    ('.' : xs) -> readRational ("0." ++ xs)
    xs -> readRational xs
  where
    readRational = tryRead readFloat . ignoreUnderscores

tryReadRational :: String -> Maybe Rational
tryReadRational xs = case right of
    (_ : right') -> liftM2 (%) numer denom
      where
        numer = tryRead reads left
        denom = tryRead reads right'

    _ -> Nothing
  where
    (left, right) = span (/= '/') (ignoreUnderscores xs)

tryReadHex :: String -> Maybe Rational
tryReadHex = tryRead readHex . ignoreUnderscores . hex
  where
    hex = \case
      '0' : 'x' : xs -> xs
      _ -> ""

tryReadUUID :: String -> Maybe UUID
tryReadUUID = tryRead reads

tryReadVersion :: String -> Maybe Version
tryReadVersion = \case
    ('v' : xs) -> tryRead reads xs
    _ -> fail "no version"

tryReadDateTime :: String -> Maybe (Maybe ZonedTime)
tryReadDateTime xs = case xs =~ dateTime of

    [[_, y, m, d, h, min, _, s, s', z, zm, _, zs]]
        -> Just (liftM2 ZonedTime (liftM2 LocalTime date time) zone)
      where
        (year, month, day, hour, minute) = (read y, read m, read d, read h, read min)
        sec = read ((if null s then "0" else s) ++ (if null s' then ".0" else s'))

        time = makeTimeOfDayValid hour minute sec
        date = fromGregorianValid year month day

        zone = Just $ case z of
            "Z" -> utc
            ('-' : _) -> minutesToTimeZone (negate zn)
            _ -> minutesToTimeZone zn
          where
            zn = read zm * 60 + (if zs == "" then 0 else read zs)

    _ -> Nothing

  where
    date = "([0-9]{4})-?([0-9]{2})-?([0-9]{2})"
    time = "([0-9]{2}):?([0-9]{2})(:?([0-9]{2})(\\.[0-9]{1,12})?)?"
    timeZone = "(Z|[+-]([0-9]{1,2})(:?([0-9]{2}))?)"
    dateTime = concat ["^", date, "T?", time, timeZone, "$"]

tryReadDate :: String -> Maybe (Maybe Day)
tryReadDate xs = case xs =~ date of
    [[_, y, _, m, d, ""]] -> Just (fromGregorianValid year month day)
      where
        (year, month, day) = (read y, read m, read d)

    [[_, y, _, _, _, d]] -> Just (fromOrdinalDateValid year day)
      where
        (year, day) = (read y, read d)

    _ -> Nothing
  where
    date = "^([0-9]{4})-(([0-9]{2})-([0-9]{2})|([0-9]{3}))$"

tryReadTime :: String -> Maybe (Maybe TimeOfDay)
tryReadTime xs = case xs =~ time of
    [[_, h, m, _, s]] -> Just (makeTimeOfDayValid hour min sec)
      where
        (hour, min, sec) = (read h, read m, if null s then 0 else read s)

    _ -> Nothing
  where
    time = "^([0-9]{2}):([0-9]{2})(:([0-9]{2}))?$"

tryReadBool :: String -> Maybe Bool
tryReadBool = \case
    "true" -> return True
    "TRUE" -> return True
    "True" -> return True
    "false" -> return False
    "False" -> return False
    "FALSE" -> return False
    _ -> Nothing

tryReadLink :: String -> Maybe String
tryReadLink xs
    | xs =~ url = Just xs
    | otherwise = Nothing
  where
    url = concat [
        "^[a-z](-?[a-z0-9])*(\\.[a-z](-?[a-z0-9])*)+"
      , "(/([a-z0-9_.=-]|%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})*)+"
      , "(\\?([a-z0-9_.=-]|%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})*)?$"
      ]
  
-- | Infix to postfix notation (an implementation of the Shunting-Yard-Algorithm)
sya :: (Ord p, Eq o)
    => (a -> Maybe o)   -- ^ Determine operator
    -> (o -> Bool)      -- ^ Is left precedence?
    -> (o -> p)         -- ^ Precedence of given operator
    -> [a]              -- ^ The input stream (infix notation)
    -> [a]              -- ^ The output stream (postfix notation)
sya mkOp isL p = sy []
  where
    sy (t : ts) (x : xs)
        | isOp x && isOp t && cmp t x = t : sy ts (x : xs)

    sy ts (x : xs)
        | isOp x    = sy (x : ts) xs
        | otherwise = x : sy ts xs

    sy ts [] = ts

    isOp = isJust . mkOp

    cmp o1 o2 = isL o1' && p o1' == p o2' || p o1' > p o2'
      where
        Just o1' = mkOp o1
        Just o2' = mkOp o2