{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeSynonymInstances #-} module Network.URI.Template.Parser ( parseTemplate, ) where import qualified Data.ByteString as BS import qualified Data.Char as C import qualified Data.String as S import qualified Data.Text as T import qualified Data.Text.Encoding as TE import qualified Data.Vector as V import FlatParse.Basic import Network.URI.Template.Error import Network.URI.Template.Types -- | Parse a character in a specific range {-# INLINE charRange #-} charRange :: Char -> Char -> Parser e Char charRange l r = satisfy (\c -> l <= c && c <= r) -- | Parse a character matching any of the given ranges -- Uses fusedSatisfy for better performance when possible charRanges :: [(Char, Char)] -> Parser e Char charRanges [] = empty charRanges ((l, r) : rest) = satisfy (\c -> l <= c && c <= r) <|> charRanges rest -- | Parse Unicode characters as specified in RFC 6570 ucschar :: Parser e Char ucschar = charRanges [ ('\xA0', '\xD7FF') , ('\xF900', '\xFDCF') , ('\xFDF0', '\xFFEF') , ('\x10000', '\x1FFFD') , ('\x20000', '\x2FFFD') , ('\x30000', '\x3FFFD') , ('\x40000', '\x4FFFD') , ('\x50000', '\x5FFFD') , ('\x60000', '\x6FFFD') , ('\x70000', '\x7FFFD') , ('\x80000', '\x8FFFD') , ('\x90000', '\x9FFFD') , ('\xA0000', '\xAFFFD') , ('\xB0000', '\xBFFFD') , ('\xC0000', '\xCFFFD') , ('\xD0000', '\xDFFFD') , ('\xE1000', '\xEFFFD') ] -- | Parse private use area characters iprivate :: Parser e Char iprivate = charRanges [ ('\xE000', '\xF8FF') , ('\xF0000', '\xFFFFD') , ('\x100000', '\x10FFFD') ] -- | Parse a percent-encoded sequence {-# INLINE pctEncoded #-} pctEncoded :: Parser e String pctEncoded = do $(char '%') d1 <- hexDigit d2 <- hexDigit return ['%', d1, d2] where {-# INLINE hexDigit #-} hexDigit = satisfy (\c -> C.isDigit c || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f')) -- | Parse a literal character literalChar :: Parser e Char literalChar = ($(char '\x21') >> pure '\x21') <|> ($(char '\x23') >> pure '\x23') <|> ($(char '\x24') >> pure '\x24') <|> ($(char '\x26') >> pure '\x26') <|> ($(char '\x3D') >> pure '\x3D') <|> ($(char '\x5D') >> pure '\x5D') <|> ($(char '\x5F') >> pure '\x5F') <|> ($(char '\x7E') >> pure '\x7E') <|> charRanges [('\x28', '\x3B'), ('\x3F', '\x5B'), ('\x61', '\x7A')] <|> ucschar <|> iprivate -- | Parse a literal segment -- Uses byteStringOf to avoid intermediate list allocations {-# INLINE literal #-} literal :: Parser e TemplateSegment literal = Literal . TE.decodeUtf8 <$> byteStringOf (skipSome literalCharOrPct) where {-# INLINE literalCharOrPct #-} literalCharOrPct = (() <$ literalChar) <|> (() <$ pctEncoded) -- | Parse variables in an embed {-# INLINE variables #-} variables :: Parser e TemplateSegment variables = do mod <- modifier vars <- sepBy1 variable (ws *> $(char ',') *> ws) return $ Embed mod vars -- | Parse optional whitespace {-# INLINE ws #-} ws :: Parser e () ws = skipMany (satisfy (\c -> c == ' ' || c == '\t' || c == '\n' || c == '\r')) -- | Parse a modifier character modifier :: Parser e Modifier modifier = ($(char '+') *> pure Reserved) <|> ($(char '#') *> pure Fragment) <|> ($(char '.') *> pure Label) <|> ($(char '/') *> pure PathSegment) <|> ($(char ';') *> pure PathParameter) <|> ($(char '?') *> pure Query) <|> ($(char '&') *> pure QueryContinuation) <|> ($(char '=') *> pure Reserved) <|> ($(char '@') *> pure Reserved) <|> ($(char '!') *> pure Reserved) <|> ($(char '|') *> pure Reserved) <|> pure Simple -- | Parse a variable variable :: Parser e Variable variable = do nm <- name valMod <- valueModifier return $ Variable nm valMod where {-# INLINE name #-} name = TE.decodeUtf8 <$> byteStringOf (skipSome nameChar) where {-# INLINE nameChar #-} nameChar = (() <$ alphaNum) <|> (() <$ $(char '_')) <|> (() <$ pctEncoded) {-# INLINE valueModifier #-} valueModifier = ($(char '*') *> pure Explode) <|> ($(char ':') *> (MaxLength <$> parseInt)) <|> pure Normal {-# INLINE parseInt #-} parseInt = do digits <- some digit return $ read digits {-# INLINE alphaNum #-} alphaNum = satisfy (\c -> C.isAlphaNum c) {-# INLINE digit #-} digit = satisfy C.isDigit -- | Parse an embedded variable expression {-# INLINE embed #-} embed :: Parser e TemplateSegment embed = $(char '{') *> variables <* $(char '}') -- | Parse a URI template segments {-# INLINE uriTemplate #-} uriTemplate :: Parser e (V.Vector TemplateSegment) uriTemplate = V.fromList <$> (ws *> many (literal <|> embed)) -- | Helper to separate a parser by a separator {-# INLINE sepBy1 #-} sepBy1 :: Parser e a -> Parser e sep -> Parser e [a] sepBy1 p sep = (:) <$> p <*> many (sep *> p) -- | Parse a template from a String parseTemplate :: String -> Either ParseError UriTemplate parseTemplate input = case runParser uriTemplate (TE.encodeUtf8 $ T.pack input) of OK result _ -> Right (UriTemplate result) Err _ -> Left (GenericParseError ("Parse error in URI template: " ++ input)) Fail -> Left (GenericParseError ("Parse error in URI template: " ++ input)) {- | 'IsString' instance for 'UriTemplate' allows using string literals directly as templates when @OverloadedStrings@ is enabled. >>> :set -XOverloadedStrings >>> let template = "/users/{userId}" :: UriTemplate >>> renderTemplate template "/users/{userId}" Note: If the string fails to parse as a valid URI template, this will call 'error'. For fallible parsing, use 'parseTemplate' instead. -} instance S.IsString UriTemplate where fromString s = case parseTemplate s of Right template -> template Left err -> error $ "Invalid URI template: " ++ s ++ "\n" ++ show err