module Hydrogen.Parsing (
module Text.Parsec.Combinator
, module Text.Parsec.Prim
, module Text.Parsec.Pos
, Parser
, ParseError
, SomethingBad
, Tokens
, runTokenParser
, mkError
, sourceToken
, manyBetween
, (>+>)
, (<+<)
, sya
, ignoreUnderscores
, tryRead
, tryReads
, tryReadDecimal
, tryReadRational
, tryReadHex
, tryReadUUID
, tryReadVersion
, tryReadDateTime
, tryReadDate
, tryReadTime
, tryReadBool
, tryReadLink
) where
import Hydrogen.Prelude
import Text.Parsec.Combinator
import Text.Parsec.Error
import Text.Parsec.Pos
import Text.Parsec.Prim
type SomethingBad = [(SourcePos, String)]
type Parser source result = source -> Either SomethingBad result
type Tokens t = [(SourcePos, t)]
instance Serialize SourcePos where
put pos = do
let line = sourceLine pos
col = sourceColumn pos
name = sourceName pos
putWord32be (fromIntegral line)
putWord32be (fromIntegral col)
put name
get = do
line <- fromIntegral <$> getWord32be
col <- fromIntegral <$> getWord32be
name <- get
return (newPos name line col)
mkError :: ParseError -> Either SomethingBad b
mkError e = Left $ map ((errorPos e,) . messageToString) (errorMessages e)
where
messageToString = \case
SysUnExpect msg -> "Unexpected " ++ msg
UnExpect msg -> "Unexpected " ++ msg
Expect msg -> "Expected " ++ msg
Message msg -> msg
runTokenParser :: (Stream a Identity t) => ParsecT a () Identity b -> Parser a b
runTokenParser p = either mkError Right . runIdentity . runParserT p () ""
sourceToken :: (Show t, Stream (Tokens t) m (SourcePos, t))
=> (t -> Maybe a)
-> ParsecT [(SourcePos, t)] u m a
sourceToken f = tokenPrim (show . snd) nextPos (f . snd)
where
nextPos p _ = \case
((p', _) : _) -> p'
_ -> p
manyBetween :: (Monad m, Stream s m t)
=> ParsecT s u m open -> ParsecT s u m close -> ParsecT s u m p -> ParsecT s u m [p]
manyBetween o c p = o *> manyTill p c
(>+>) :: Parser a b -> Parser b c -> Parser a c
p1 >+> p2 = join <$> fmap p2 <$> p1
(<+<) :: Parser b c -> Parser a b -> Parser a c
(<+<) = flip (>+>)
tryRead :: (Monad m) => ReadS a -> String -> m a
tryRead p s = case p s of
[(val, "")] -> return val
[] -> fail "no parse"
_ -> fail "ambiguous parse"
tryReads :: (Monad m, Read a) => String -> m a
tryReads = tryRead reads
ignoreUnderscores :: String -> String
ignoreUnderscores = \case
x : xs -> x : ignore xs
xs -> xs
where
ignore = \case
xs@(x : '_' : _) | not (isAlphaNum x) -> xs
'_' : x : xs | isAlphaNum x -> x : ignore xs
x : xs -> x : ignore xs
xs -> xs
tryReadDecimal :: String -> Maybe Rational
tryReadDecimal = \case
('-' : xs) -> negate <$> readRational xs
('+' : xs) -> readRational xs
('.' : xs) -> readRational ("0." ++ xs)
xs -> readRational xs
where
readRational = tryRead readFloat . ignoreUnderscores
tryReadRational :: String -> Maybe Rational
tryReadRational xs = case right of
(_ : right') -> liftM2 (%) numer denom
where
numer = tryRead reads left
denom = tryRead reads right'
_ -> Nothing
where
(left, right) = span (/= '/') (ignoreUnderscores xs)
tryReadHex :: String -> Maybe Rational
tryReadHex = tryRead readHex . ignoreUnderscores . hex
where
hex = \case
'0' : 'x' : xs -> xs
_ -> ""
tryReadUUID :: String -> Maybe UUID
tryReadUUID = tryRead reads
tryReadVersion :: String -> Maybe Version
tryReadVersion = \case
('v' : xs) -> tryRead reads xs
_ -> fail "no version"
tryReadDateTime :: String -> Maybe (Maybe ZonedTime)
tryReadDateTime xs = case xs =~ dateTime of
[[_, y, m, d, h, min, _, s, s', z, zm, _, zs]]
-> Just (liftM2 ZonedTime (liftM2 LocalTime date time) zone)
where
(year, month, day, hour, minute) = (read y, read m, read d, read h, read min)
sec = read ((if null s then "0" else s) ++ (if null s' then ".0" else s'))
time = makeTimeOfDayValid hour minute sec
date = fromGregorianValid year month day
zone = Just $ case z of
"Z" -> utc
('-' : _) -> minutesToTimeZone (negate zn)
_ -> minutesToTimeZone zn
where
zn = read zm * 60 + (if zs == "" then 0 else read zs)
_ -> Nothing
where
date = "([0-9]{4})-?([0-9]{2})-?([0-9]{2})"
time = "([0-9]{2}):?([0-9]{2})(:?([0-9]{2})(\\.[0-9]{1,12})?)?"
timeZone = "(Z|[+-]([0-9]{1,2})(:?([0-9]{2}))?)"
dateTime = concat ["^", date, "T?", time, timeZone, "$"]
tryReadDate :: String -> Maybe (Maybe Day)
tryReadDate xs = case xs =~ date of
[[_, y, _, m, d, ""]] -> Just (fromGregorianValid year month day)
where
(year, month, day) = (read y, read m, read d)
[[_, y, _, _, _, d]] -> Just (fromOrdinalDateValid year day)
where
(year, day) = (read y, read d)
_ -> Nothing
where
date = "^([0-9]{4})-(([0-9]{2})-([0-9]{2})|([0-9]{3}))$"
tryReadTime :: String -> Maybe (Maybe TimeOfDay)
tryReadTime xs = case xs =~ time of
[[_, h, m, _, s]] -> Just (makeTimeOfDayValid hour min sec)
where
(hour, min, sec) = (read h, read m, if null s then 0 else read s)
_ -> Nothing
where
time = "^([0-9]{2}):([0-9]{2})(:([0-9]{2}))?$"
tryReadBool :: String -> Maybe Bool
tryReadBool = \case
"true" -> return True
"TRUE" -> return True
"True" -> return True
"false" -> return False
"False" -> return False
"FALSE" -> return False
_ -> Nothing
tryReadLink :: String -> Maybe String
tryReadLink xs
| xs =~ url = Just xs
| otherwise = Nothing
where
url = concat [
"^[a-z](-?[a-z0-9])*(\\.[a-z](-?[a-z0-9])*)+"
, "(/([a-z0-9_.=-]|%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})*)+"
, "(\\?([a-z0-9_.=-]|%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})*)?$"
]
sya :: (Ord p, Eq o)
=> (a -> Maybe o)
-> (o -> Bool)
-> (o -> p)
-> [a]
-> [a]
sya mkOp isL p = sy []
where
sy (t : ts) (x : xs)
| isOp x && isOp t && cmp t x = t : sy ts (x : xs)
sy ts (x : xs)
| isOp x = sy (x : ts) xs
| otherwise = x : sy ts xs
sy ts [] = ts
isOp = isJust . mkOp
cmp o1 o2 = isL o1' && p o1' == p o2' || p o1' > p o2'
where
Just o1' = mkOp o1
Just o2' = mkOp o2