module Iri.Parsing.Attoparsec.ByteString ( uri, httpUri, ) where import Iri.Prelude hiding (foldl, hash) import Iri.Data import Data.Attoparsec.ByteString hiding (try) import qualified Data.Attoparsec.ByteString.Char8 as F import qualified Data.ByteString as K import qualified Data.Text as T import qualified Data.Text.Punycode as A import qualified Data.Text.Encoding as B import qualified Data.Text.Encoding.Error as L import qualified Data.HashMap.Strict as O import qualified Data.Vector as S import qualified VectorBuilder.Builder as P import qualified VectorBuilder.Vector as Q import qualified VectorBuilder.MonadPlus as E import qualified Iri.PercentEncoding as I import qualified Iri.CodePointPredicates.Rfc3986 as C import qualified Iri.MonadPlus as R import qualified Ptr.Poking as G import qualified Ptr.ByteString as H import qualified Text.Builder as J import qualified Net.IPv4 as M import qualified Net.IPv6 as N {-# INLINE percent #-} percent :: Parser Word8 percent = word8 37 {-# INLINE plus #-} plus :: Parser Word8 plus = word8 43 {-# INLINE colon #-} colon :: Parser Word8 colon = word8 58 {-# INLINE at #-} at :: Parser Word8 at = word8 64 {-# INLINE forwardSlash #-} forwardSlash :: Parser Word8 forwardSlash = word8 47 {-# INLINE question #-} question :: Parser Word8 question = word8 63 {-# INLINE hash #-} hash :: Parser Word8 hash = word8 35 {-# INLINE equality #-} equality :: Parser Word8 equality = word8 61 {-# INLINE ampersand #-} ampersand :: Parser Word8 ampersand = word8 38 {-# INLINE semicolon #-} semicolon :: Parser Word8 semicolon = word8 59 {-# INLINE labeled #-} labeled :: String -> Parser a -> Parser a labeled label parser = parser label {-| Parser of a well-formed URI conforming to the RFC3986 standard into IRI. Performs URL- and Punycode-decoding. -} {-# INLINABLE uri #-} uri :: Parser Iri uri = labeled "URI" $ do parsedScheme <- scheme colon parsedHierarchy <- hierarchy parsedQuery <- query parsedFragment <- fragment return (Iri parsedScheme parsedHierarchy parsedQuery parsedFragment) {-| Same as 'uri', but optimized specifially for the case of HTTP URIs. -} {-# INLINABLE httpUri #-} httpUri :: Parser HttpIri httpUri = labeled "HTTP URI" $ do satisfy (\ x -> x == 104 || x == 72) satisfy (\ x -> x == 116 || x == 84) satisfy (\ x -> x == 116 || x == 84) satisfy (\ x -> x == 112 || x == 80) secure <- satisfy (\ b -> b == 115 || b == 83) $> True <|> pure False string "://" parsedHost <- host parsedPort <- PresentPort <$> (colon *> port) <|> pure MissingPort parsedPath <- (forwardSlash *> path) <|> pure (Path mempty) parsedQuery <- query parsedFragment <- fragment return (HttpIri (Security secure) parsedHost parsedPort parsedPath parsedQuery parsedFragment) {-# INLINE hierarchy #-} hierarchy :: Parser Hierarchy hierarchy = do slashPresent <- forwardSlash $> True <|> pure False if slashPresent then do slashPresent <- forwardSlash $> True <|> pure False if slashPresent then authorisedHierarchyBody AuthorisedHierarchy else AbsoluteHierarchy <$> path else RelativeHierarchy <$> path {-# INLINE authorisedHierarchyBody #-} authorisedHierarchyBody :: (Authority -> Path -> body) -> Parser body authorisedHierarchyBody body = do parsedUserInfo <- (presentUserInfo PresentUserInfo <* at) <|> pure MissingUserInfo parsedHost <- host parsedPort <- PresentPort <$> (colon *> port) <|> pure MissingPort parsedPath <- (forwardSlash *> path) <|> pure (Path mempty) return (body (Authority parsedUserInfo parsedHost parsedPort) parsedPath) {-# INLINE scheme #-} scheme :: Parser Scheme scheme = labeled "Scheme" $ fmap Scheme (takeWhile1 (C.scheme . fromIntegral)) {-# INLINABLE presentUserInfo #-} presentUserInfo :: (User -> Password -> a) -> Parser a presentUserInfo result = labeled "User info" $ do user <- User <$> urlEncodedString (C.unencodedUserInfoComponent . fromIntegral) passwordFollows <- True <$ colon <|> pure False if passwordFollows then do password <- PresentPassword <$> urlEncodedString (C.unencodedUserInfoComponent . fromIntegral) return (result user password) else return (result user MissingPassword) {-# INLINE host #-} host :: Parser Host host = labeled "Host" $ IpV6Host <$> ipV6 <|> IpV4Host <$> M.parserUtf8 <|> NamedHost <$> domainName {-# INLINABLE ipV6 #-} ipV6 :: Parser IPv6 ipV6 = do a <- F.hexadecimal colon b <- F.hexadecimal colon c <- F.hexadecimal colon d <- F.hexadecimal colon mplus (do e <- F.hexadecimal colon f <- F.hexadecimal colon g <- F.hexadecimal colon h <- F.hexadecimal return (N.fromWord16s a b c d e f g h)) (do colon return (N.fromWord16s a b c d 0 0 0 0)) {-# INLINE domainName #-} domainName :: Parser RegName domainName = fmap RegName (E.sepBy1 domainLabel (word8 46)) {-| Domain label with Punycode decoding applied. -} {-# INLINE domainLabel #-} domainLabel :: Parser DomainLabel domainLabel = labeled "Domain label" $ do punycodeFollows <- True <$ string "xn--" <|> pure False ascii <- takeWhile1 (C.domainLabel . fromIntegral) if punycodeFollows then case A.decode ascii of Right text -> return (DomainLabel text) Left exception -> fail (showString "Punycode decoding exception: " (show exception)) else return (DomainLabel (B.decodeUtf8 ascii)) {-# INLINE port #-} port :: Parser Word16 port = F.decimal {-# INLINE path #-} path :: Parser Path path = do segments <- E.sepBy pathSegment forwardSlash if segmentsAreEmpty segments then return (Path mempty) else return (Path segments) where segmentsAreEmpty segments = S.length segments == 1 && (case S.unsafeHead segments of PathSegment headSegmentText -> T.null headSegmentText) {-# INLINE pathSegment #-} pathSegment :: Parser PathSegment pathSegment = fmap PathSegment (urlEncodedString (C.unencodedPathSegment . fromIntegral)) {-# INLINABLE utf8Chunks #-} utf8Chunks :: Parser ByteString -> Parser Text utf8Chunks chunk = labeled "UTF8 chunks" $ R.foldlM progress (mempty, mempty, B.streamDecodeUtf8) chunk >>= finish where progress (!builder, _, decode) bytes = case unsafeDupablePerformIO (try (evaluate (decode bytes))) of Right (B.Some decodedChunk undecodedBytes newDecode) -> return (builder <> J.text decodedChunk, undecodedBytes, newDecode) Left (L.DecodeError error _) -> fail (showString "UTF8 decoding: " error) finish (builder, undecodedBytes, _) = if K.null undecodedBytes then return (J.run builder) else fail (showString "UTF8 decoding: Bytes remaining: " (show undecodedBytes)) {-# INLINABLE urlEncodedString #-} urlEncodedString :: (Word8 -> Bool) -> Parser Text urlEncodedString unencodedBytesPredicate = labeled "URL-encoded string" $ utf8Chunks $ takeWhile1 unencodedBytesPredicate <|> encoded where encoded = K.singleton <$> percentEncodedByte {-# INLINE percentEncodedByte #-} percentEncodedByte :: Parser Word8 percentEncodedByte = labeled "Percent-encoded byte" $ do percent byte1 <- anyWord8 byte2 <- anyWord8 I.matchPercentEncodedBytes (fail "Broken percent encoding") return byte1 byte2 {-# INLINABLE query #-} query :: Parser Query query = labeled "Query" $ (question *> (Query <$> queryOrFragmentBody)) <|> pure (Query mempty) {-# INLINABLE fragment #-} fragment :: Parser Fragment fragment = labeled "Fragment" $ (hash *> (Fragment <$> queryOrFragmentBody)) <|> pure (Fragment mempty) {-| The stuff after the question or the hash mark. -} {-# INLINABLE queryOrFragmentBody #-} queryOrFragmentBody :: Parser Text queryOrFragmentBody = utf8Chunks $ takeWhile1 (C.unencodedQuery . fromIntegral) <|> " " <$ plus <|> K.singleton <$> percentEncodedByte