{-# LANGUAGE CPP #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
module Data.RDF.IRI
( IRI(..), IRIRef(..)
, Scheme(..), Authority(..), UserInfo(..), Host(..), Port(..)
, Path(..), Query(..), Fragment(..)
, IRIError(..), SchemaError(..)
, mkIRI
, serializeIRI
, parseIRI, parseRelIRI
, validateIRI, resolveIRI
, removeIRIFragment
) where
import Data.Semigroup (Semigroup(..))
import Data.Maybe (maybe, isJust)
import Data.Functor
import Data.List (intersperse)
import Control.Applicative
import Control.Monad (guard)
import Control.Arrow (first, (&&&), (>>>))
import Data.Char (isAlpha, isDigit, isAlphaNum, toUpper, toLower)
import Data.Text (Text)
import qualified Data.Text as T
import Data.Attoparsec.Text (Parser, (<?>))
import qualified Data.Attoparsec.Text as P
newtype IRI = IRI { getIRI :: Text }
deriving (Show, Eq)
data IRIRef = IRIRef
!(Maybe Scheme)
!(Maybe Authority)
!Path
!(Maybe Query)
!(Maybe Fragment)
deriving (Show, Eq, Ord)
newtype Scheme = Scheme Text
deriving (Show, Eq, Ord)
data Authority = Authority
!(Maybe UserInfo)
!Host
!(Maybe Port)
deriving (Show, Eq, Ord)
newtype UserInfo = UserInfo Text
deriving (Show, Eq, Ord)
newtype Host = Host Text
deriving (Show, Eq, Ord)
newtype Port = Port Int
deriving (Show, Eq, Ord)
newtype Path = Path Text
deriving (Show, Eq, Semigroup, Monoid, Ord)
newtype Query = Query Text
deriving (Show, Eq, Semigroup, Ord)
instance Monoid Query where
mempty = Query mempty
#if !(MIN_VERSION_base(4,11,0))
mappend = (<>)
#endif
newtype Fragment = Fragment Text
deriving (Show, Eq, Semigroup, Ord)
instance Monoid Fragment where
mempty = Fragment mempty
#if !(MIN_VERSION_base(4,11,0))
mappend = (<>)
#endif
data IRIError = InvalidIRI
deriving (Show, Eq)
data SchemaError
= NonAlphaLeading
| InvalidChars
| MissingColon
deriving (Show, Eq)
removeIRIFragment :: IRIRef -> IRIRef
removeIRIFragment (IRIRef s a p q _) = IRIRef s a p q Nothing
serializeIRI :: IRIRef -> Text
serializeIRI (IRIRef s a p q f) = mconcat
[ maybe mempty scheme s
, maybe mempty authority a
, path p
, maybe mempty query q
, maybe mempty fragment f ]
where
scheme (Scheme s') = s' <> ":"
authority (Authority u (Host h) p') = mconcat
[ "//"
, maybe mempty userInfo u
, h
, maybe mempty port p' ]
userInfo (UserInfo u) = u <> "@"
port (Port p') = (":" <>) . T.pack . show $ p'
path (Path p') = p'
query (Query q') = "?" <> q'
fragment (Fragment f') = "#" <> f'
mkIRI :: Text -> Either String IRI
mkIRI t = IRI . serializeIRI <$> parseIRI t
parseIRI :: Text -> Either String IRIRef
parseIRI = P.parseOnly $ iriParser <* (P.endOfInput <?> "Unexpected characters at the end")
parseRelIRI :: Text -> Either String IRIRef
parseRelIRI = P.parseOnly $ irelativeRefParser <* (P.endOfInput <?> "Unexpected characters at the end")
validateIRI :: Text -> Either String Text
validateIRI t = t <$ parseIRI t
resolveIRI
:: Text
-> Text
-> Either String Text
resolveIRI baseIri iri = serializeIRI <$> resolvedIRI
where
resolvedIRI = either (const resolvedRelativeIRI) resolveAbsoluteIRI (parseIRI iri)
resolveAbsoluteIRI (IRIRef s a (Path p) q f) = return $ IRIRef s a (removeDotSegments p) q f
resolvedRelativeIRI = do
(IRIRef _ ra rp@(Path rp') rq rf) <- parseRelIRI iri
(IRIRef bs ba bp bq _) <- parseIRI baseIri
let rIriWithoutAuth = resolveIriWithoutAuth rp rq rf bs ba bp bq
rIriWithAuth = return (IRIRef bs ra (removeDotSegments rp') rq rf)
maybe rIriWithoutAuth (const rIriWithAuth) ra
resolveIriWithoutAuth rp rq rf bs ba bp bq = return $!
if (rp == mempty)
then maybe (IRIRef bs ba bp bq rf) (const (IRIRef bs ba bp rq rf)) rq
else let (Path rp') = rp in if (T.head rp' == '/')
then IRIRef bs ba (removeDotSegments rp') rq rf
else IRIRef bs ba (removeDotSegments (merge ba bp rp)) rq rf
removeDotSegments p = removeDotSegments' (T.split (== '/') p) mempty
removeDotSegments' [] os = Path $ mconcat (intersperse "/" os)
removeDotSegments' ["."] os = removeDotSegments' mempty (os <> [mempty])
removeDotSegments' [".."] [] = removeDotSegments' mempty mempty
removeDotSegments' [".."] os = removeDotSegments' mempty (init os <> [mempty])
removeDotSegments' ss@[_] os = removeDotSegments' mempty (os <> ss)
removeDotSegments' (".":ss) os = removeDotSegments' ss os
removeDotSegments' ("..":ss) [] = removeDotSegments' ss mempty
removeDotSegments' ("..":ss) os@[""] = removeDotSegments' ss os
removeDotSegments' ("..":ss) os = removeDotSegments' ss (init os)
removeDotSegments' (s:ss) os = removeDotSegments' ss (os <> [s])
merge ba (Path bp) (Path rp)
| isJust ba && bp == mempty = "/" <> rp
| otherwise = T.dropWhileEnd (/= '/') bp <> rp
iriParser :: Parser IRIRef
iriParser = do
scheme <- Just <$> schemeParser
_ <- P.string ":" <?> "Missing colon after scheme"
(authority, path) <- ihierPartParser
query <- optional iqueryParser
fragment <- optional ifragmentParser
return (IRIRef scheme authority path query fragment)
ihierPartParser :: Parser (Maybe Authority, Path)
ihierPartParser =
iauthWithPathParser <|>
ipathAbsoluteParser <|>
ipathRootlessParser <|>
ipathEmptyParser
irelativeRefParser :: Parser IRIRef
irelativeRefParser = do
(authority, path) <- irelativePartParser
query <- optional iqueryParser
fragment <- optional ifragmentParser
return (IRIRef Nothing authority path query fragment)
irelativePartParser :: Parser (Maybe Authority, Path)
irelativePartParser =
iauthWithPathParser <|>
ipathAbsoluteParser <|>
ipathNoSchemeParser <|>
ipathEmptyParser
iauthorityParser :: Parser Authority
iauthorityParser =
Authority <$> optional (iuserInfoParser <* P.string "@")
<*> ihostParser
<*> optional (P.string ":" *> portParser)
<?> "Authority"
iuserInfoParser :: Parser UserInfo
iuserInfoParser = UserInfo . mconcat <$> P.many1 iuserInfoP
where iuserInfoP = iunreservedP <|> pctEncodedParser <|> subDelimsP <|> P.string ":"
ihostParser :: Parser Host
ihostParser = Host <$> (ipLiteralParser <|> ipV4AddressParser <|> iregNameParser)
<?> "Host"
iregNameParser :: Parser Text
iregNameParser = mconcat <$> P.many' (iunreservedP <|> pctEncodedParser <|> subDelimsP)
ipathAbEmptyParser :: Parser Path
ipathAbEmptyParser = Path <$> ipathAbEmptyParser'
ipathAbEmptyParser' :: Parser Text
ipathAbEmptyParser' = mconcat <$> P.many' (mconcat <$> sequence [P.string "/", isegmentParser])
ipathAbsoluteParser :: Parser (Maybe Authority, Path)
ipathAbsoluteParser = (Nothing,) <$> (Path <$> ipathAbsoluteParser')
ipathAbsoluteParser' :: Parser Text
ipathAbsoluteParser' = mconcat <$> sequence [P.string "/", ipathRootlessParser']
ipathNoSchemeParser :: Parser (Maybe Authority, Path)
ipathNoSchemeParser = (Nothing,) <$> (Path <$> ipathNoSchemeParser')
ipathNoSchemeParser' :: Parser Text
ipathNoSchemeParser' = mconcat <$> sequence [isegmentNzNcParser, ipathAbEmptyParser']
ipathRootlessParser :: Parser (Maybe Authority, Path)
ipathRootlessParser = (Nothing,) <$> (Path <$> ipathRootlessParser')
ipathRootlessParser' :: Parser Text
ipathRootlessParser' = mconcat <$> sequence [isegmentNzParser, ipathAbEmptyParser']
ipathEmptyParser :: Parser (Maybe Authority, Path)
ipathEmptyParser = (Nothing, mempty) <$ ipathEmptyParser'
ipathEmptyParser' :: Parser Text
ipathEmptyParser' = P.string mempty <?> "Empty path"
isegmentParser :: Parser Text
isegmentParser = mconcat <$> (P.many' ipcharParser)
isegmentNzParser :: Parser Text
isegmentNzParser = mconcat <$> (P.many1 ipcharParser)
isegmentNzNcParser :: Parser Text
isegmentNzNcParser = mconcat <$> (P.many1 _isegmentNzNcParser)
where _isegmentNzNcParser = iunreservedP <|> pctEncodedParser <|> subDelimsP <|> P.string "@"
ipcharParser :: Parser Text
ipcharParser = iunreservedP <|> pctEncodedParser <|> subDelimsP <|> P.string ":" <|> P.string "@"
iqueryParser :: Parser Query
iqueryParser = Query <$> iqueryParser'
iqueryParser' :: Parser Text
iqueryParser' =
P.char '?' *> (mconcat <$> P.many' (ipcharParser <|> iprivateParser <|> P.string "/" <|> P.string "?"))
<?> "Query"
ifragmentParser :: Parser Fragment
ifragmentParser = Fragment <$> ifragmentParser'
ifragmentParser' :: Parser Text
ifragmentParser' =
P.char '#' *> (mconcat <$> P.many' (ipcharParser <|> P.string "/" <|> P.string "?"))
<?> "Fragment"
iunreservedP :: Parser Text
iunreservedP = T.singleton <$> P.satisfy isIunreserved
isIunreserved :: Char -> Bool
isIunreserved c = isUnreserved c || isUcsChar c
isUcsChar :: Char -> Bool
isUcsChar c = ('\x000A0' <= c && c <= '\x0D7FF')
|| ('\x0F900' <= c && c <= '\x0FDCF')
|| ('\x0FDF0' <= c && c <= '\x0FFEF')
|| ('\x10000' <= c && c <= '\x1FFFD')
|| ('\x20000' <= c && c <= '\x2FFFD')
|| ('\x30000' <= c && c <= '\x3FFFD')
|| ('\x40000' <= c && c <= '\x4FFFD')
|| ('\x50000' <= c && c <= '\x5FFFD')
|| ('\x60000' <= c && c <= '\x6FFFD')
|| ('\x70000' <= c && c <= '\x7FFFD')
|| ('\x80000' <= c && c <= '\x8FFFD')
|| ('\x90000' <= c && c <= '\x9FFFD')
|| ('\xA0000' <= c && c <= '\xAFFFD')
|| ('\xB0000' <= c && c <= '\xBFFFD')
|| ('\xC0000' <= c && c <= '\xCFFFD')
|| ('\xD0000' <= c && c <= '\xDFFFD')
|| ('\xE1000' <= c && c <= '\xEFFFD')
iprivateParser :: Parser Text
iprivateParser = T.singleton <$> P.satisfy isIPrivate
isIPrivate :: Char -> Bool
isIPrivate c = ('\x00E000' <= c && c <= '\x00F8FF')
|| ('\x0F0000' <= c && c <= '\x0FFFFD')
|| ('\x100000' <= c && c <= '\x10FFFD')
schemeParser :: Parser Scheme
schemeParser =
Scheme . T.map toLower <$> (T.cons <$> schemeHead <*> schemeRest)
where
schemeHead = P.satisfy isAlpha <?> "Scheme head"
schemeRest = P.takeWhile isSchemeTailChar <?> "Scheme tail"
isSchemeTailChar c = isAlphaNum c
|| c == '+' || c == '.' || c == '_' || c == '-'
portParser :: Parser Port
portParser = Port <$> portParser'
portParser' :: Parser Int
portParser' = P.decimal <?> "Port"
ipLiteralParser :: Parser Text
ipLiteralParser = P.string "[" *> (ipV6AddressParser <|> ipFutureParser) <* P.string "]"
ipFutureParser :: Parser Text
ipFutureParser =
mconcat <$> sequence [
P.string "v",
P.takeWhile1 isHexaDigit,
P.string ".",
P.takeWhile1 isValidFinalChar]
where isValidFinalChar c = isUnreserved c || isSubDelims c || c == ':'
ipV6AddressParser :: Parser Text
ipV6AddressParser = do
l <- leadingP
t <- trailingP l
joinParts l t
<?> "IPV6"
where
leadingP = h16 `P.sepBy` ":"
trailingP = (id &&& length) >>> \l -> ipNotElided l <|> ipElided l
joinParts leading trailing = pure $ (T.intercalate ":" leading) <> trailing
h16 = parseBetween 1 4 (P.takeWhile isHexaDigit)
ipNotElided (leading, lengthL) =
guard (lengthL == 7 && isDecOctet (last leading)) *> partialIpV4 <|>
(guard (lengthL == 8) $> mempty)
ipElided (_, lengthL) = do
guard $ lengthL <= 8
elision <- P.string "::"
trailing <- h16 `P.sepBy` ":"
let lengthT = length trailing
let lengthTotal = lengthL + lengthT
guard $ lengthT < 8
embeddedIpV4 <-
guard (lengthT > 0 && lengthTotal < 7 && isDecOctet (last trailing)) *> partialIpV4 <|>
pure mempty
pure $ mconcat [elision, (T.intercalate ":" trailing), embeddedIpV4]
partialIpV4 = mconcat <$> sequence [dotP, decOctetP, dotP, decOctetP, dotP, decOctetP]
ipV4AddressParser :: Parser Text
ipV4AddressParser = mconcat <$> sequence [decOctetP, dotP, decOctetP, dotP, decOctetP, dotP, decOctetP]
decOctetP :: Parser Text
decOctetP = do
s <- P.takeWhile1 isDigit
guard (isDecOctet s)
pure s
isDecOctet :: Text -> Bool
isDecOctet s = len > 0 && T.all isDigit s && (len < 3 || (len == 3 && s <= "255"))
where len = T.length s
pctEncodedParser :: Parser Text
pctEncodedParser =
T.cons <$> P.char '%'
<*> (T.pack . fmap toUpper <$> (P.count 2 (P.satisfy isHexaDigit)))
<?> "Percent encoding"
isUnreserved :: Char -> Bool
isUnreserved c = isAlphaNum c
|| c == '-' || c == '.' || c == '_' || c == '~'
subDelimsP :: Parser Text
subDelimsP = T.singleton <$> P.satisfy isSubDelims
isSubDelims :: Char -> Bool
isSubDelims c = c `elem` ("!$&'()*+,;=" :: String)
iauthWithPathParser :: Parser (Maybe Authority, Path)
iauthWithPathParser = do
void (P.string "//")
curry (first Just) <$> iauthorityParser <*> ipathAbEmptyParser
isHexaDigit :: Char -> Bool
isHexaDigit c = (isDigit c) ||
(c >= 'a' && c <= 'f') ||
(c >= 'A' && c <= 'F')
dotP :: Parser Text
dotP = P.string "."
parseBetween :: Int -> Int -> Parser Text -> Parser Text
parseBetween i j p = do
s <- p
let len = T.length s
guard $ len >= i && len <= j
return s