{-# OPTIONS_GHC -fno-warn-name-shadowing -fno-warn-orphans #-} module Hydrogen.Parsing ( module Text.Parsec.Combinator , module Text.Parsec.Prim , module Text.Parsec.Pos , Parser , ParseError , SomethingBad , Tokens , runTokenParser , mkError , sourceToken , manyBetween , (>+>) , (<+<) , sya , ignoreUnderscores , tryRead , tryReads , tryReadDecimal , tryReadRational , tryReadHex , tryReadUUID , tryReadVersion , tryReadDateTime , tryReadDate , tryReadTime , tryReadBool , tryReadLink ) where import Hydrogen.Prelude import Text.Parsec.Combinator import Text.Parsec.Error import Text.Parsec.Pos import Text.Parsec.Prim type SomethingBad = [(SourcePos, String)] type Parser source result = source -> Either SomethingBad result type Tokens t = [(SourcePos, t)] instance Serialize SourcePos where put pos = do let line = sourceLine pos col = sourceColumn pos name = sourceName pos putWord32be (fromIntegral line) putWord32be (fromIntegral col) put name get = do line <- fromIntegral <$> getWord32be col <- fromIntegral <$> getWord32be name <- get return (newPos name line col) mkError :: ParseError -> Either SomethingBad b mkError e = Left $ map ((errorPos e,) . messageToString) (errorMessages e) where messageToString = \case SysUnExpect msg -> "Unexpected " ++ msg UnExpect msg -> "Unexpected " ++ msg Expect msg -> "Expected " ++ msg Message msg -> msg runTokenParser :: (Stream a Identity t) => ParsecT a () Identity b -> Parser a b runTokenParser p = either mkError Right . runIdentity . runParserT p () "" sourceToken :: (Show t, Stream (Tokens t) m (SourcePos, t)) => (t -> Maybe a) -> ParsecT [(SourcePos, t)] u m a sourceToken f = tokenPrim (show . snd) nextPos (f . snd) where nextPos p _ = \case ((p', _) : _) -> p' _ -> p manyBetween :: (Monad m, Stream s m t) => ParsecT s u m open -> ParsecT s u m close -> ParsecT s u m p -> ParsecT s u m [p] manyBetween o c p = o *> manyTill p c (>+>) :: Parser a b -> Parser b c -> Parser a c p1 >+> p2 = join <$> fmap p2 <$> p1 (<+<) :: Parser b c -> Parser a b -> Parser a c (<+<) = flip (>+>) tryRead :: (Monad m) => ReadS a -> String -> m a tryRead p s = case p s of [(val, "")] -> return val [] -> fail "no parse" _ -> fail "ambiguous parse" tryReads :: (Monad m, Read a) => String -> m a tryReads = tryRead reads ignoreUnderscores :: String -> String ignoreUnderscores = \case x : xs -> x : ignore xs xs -> xs where ignore = \case xs@(x : '_' : _) | not (isAlphaNum x) -> xs '_' : x : xs | isAlphaNum x -> x : ignore xs x : xs -> x : ignore xs xs -> xs tryReadDecimal :: String -> Maybe Rational tryReadDecimal = \case ('-' : xs) -> negate <$> readRational xs ('+' : xs) -> readRational xs ('.' : xs) -> readRational ("0." ++ xs) xs -> readRational xs where readRational = tryRead readFloat . ignoreUnderscores tryReadRational :: String -> Maybe Rational tryReadRational xs = case right of (_ : right') -> liftM2 (%) numer denom where numer = tryRead reads left denom = tryRead reads right' _ -> Nothing where (left, right) = span (/= '/') (ignoreUnderscores xs) tryReadHex :: String -> Maybe Rational tryReadHex = tryRead readHex . ignoreUnderscores . hex where hex = \case '0' : 'x' : xs -> xs _ -> "" tryReadUUID :: String -> Maybe UUID tryReadUUID = tryRead reads tryReadVersion :: String -> Maybe Version tryReadVersion = \case ('v' : xs) -> tryRead reads xs _ -> fail "no version" tryReadDateTime :: String -> Maybe (Maybe ZonedTime) tryReadDateTime xs = case xs =~ dateTime of [[_, y, m, d, h, min, _, s, s', z, zm, _, zs]] -> Just (liftM2 ZonedTime (liftM2 LocalTime date time) zone) where (year, month, day, hour, minute) = (read y, read m, read d, read h, read min) sec = read ((if null s then "0" else s) ++ (if null s' then ".0" else s')) time = makeTimeOfDayValid hour minute sec date = fromGregorianValid year month day zone = Just $ case z of "Z" -> utc ('-' : _) -> minutesToTimeZone (negate zn) _ -> minutesToTimeZone zn where zn = read zm * 60 + (if zs == "" then 0 else read zs) _ -> Nothing where date = "([0-9]{4})-?([0-9]{2})-?([0-9]{2})" time = "([0-9]{2}):?([0-9]{2})(:?([0-9]{2})(\\.[0-9]{1,12})?)?" timeZone = "(Z|[+-]([0-9]{1,2})(:?([0-9]{2}))?)" dateTime = concat ["^", date, "T?", time, timeZone, "$"] tryReadDate :: String -> Maybe (Maybe Day) tryReadDate xs = case xs =~ date of [[_, y, _, m, d, ""]] -> Just (fromGregorianValid year month day) where (year, month, day) = (read y, read m, read d) [[_, y, _, _, _, d]] -> Just (fromOrdinalDateValid year day) where (year, day) = (read y, read d) _ -> Nothing where date = "^([0-9]{4})-(([0-9]{2})-([0-9]{2})|([0-9]{3}))$" tryReadTime :: String -> Maybe (Maybe TimeOfDay) tryReadTime xs = case xs =~ time of [[_, h, m, _, s]] -> Just (makeTimeOfDayValid hour min sec) where (hour, min, sec) = (read h, read m, if null s then 0 else read s) _ -> Nothing where time = "^([0-9]{2}):([0-9]{2})(:([0-9]{2}))?$" tryReadBool :: String -> Maybe Bool tryReadBool = \case "true" -> return True "TRUE" -> return True "True" -> return True "false" -> return False "False" -> return False "FALSE" -> return False _ -> Nothing tryReadLink :: String -> Maybe String tryReadLink xs | xs =~ url = Just xs | otherwise = Nothing where url = concat [ "^[a-z](-?[a-z0-9])*(\\.[a-z](-?[a-z0-9])*)+" , "(/([a-z0-9_.=-]|%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})*)+" , "(\\?([a-z0-9_.=-]|%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})*)?$" ] -- | Infix to postfix notation (an implementation of the Shunting-Yard-Algorithm) sya :: (Ord p, Eq o) => (a -> Maybe o) -- ^ Determine operator -> (o -> Bool) -- ^ Is left precedence? -> (o -> p) -- ^ Precedence of given operator -> [a] -- ^ The input stream (infix notation) -> [a] -- ^ The output stream (postfix notation) sya mkOp isL p = sy [] where sy (t : ts) (x : xs) | isOp x && isOp t && cmp t x = t : sy ts (x : xs) sy ts (x : xs) | isOp x = sy (x : ts) xs | otherwise = x : sy ts xs sy ts [] = ts isOp = isJust . mkOp cmp o1 o2 = isL o1' && p o1' == p o2' || p o1' > p o2' where Just o1' = mkOp o1 Just o2' = mkOp o2