{-# LANGUAGE OverloadedStrings, RankNTypes, PatternSynonyms, ViewPatterns,
             BangPatterns #-}

module Data.CSS.Syntax.Tokens
    ( Token(..)
    , NumericValue(..)
    , HashFlag(..)
    , Unit

    , tokenize
    , serialize
    ) where


import           Control.Applicative
import           Control.Monad

import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as TLB
import           Data.Monoid
import           Data.Char
import           Data.Scientific
import           Numeric

import           Prelude

import           Data.Text.Internal (Text(..))
import           Data.Text.Unsafe (inlineInterleaveST)
import qualified Data.Text.Array as A
import           Control.Monad.ST (ST)
import           GHC.Base (unsafeChr)
import           Data.Word (Word16)
import           Data.Char (ord)
import           Data.Bits


data Token
    = Whitespace

    | CDO -- CommentDelimiterOpen
    | CDC -- CommentDelimiterClose

    | Comma
    | Colon
    | Semicolon

    | LeftParen
    | RightParen
    | LeftSquareBracket
    | RightSquareBracket
    | LeftCurlyBracket
    | RightCurlyBracket

    | SuffixMatch
    | SubstringMatch
    | PrefixMatch
    | DashMatch
    | IncludeMatch

    | Column

    | String !Text
    | BadString

    | Number !Text !NumericValue
    | Percentage !Text !NumericValue
    | Dimension !Text !NumericValue !Unit

    | Url !Text
    | BadUrl

    | Ident !Text

    | AtKeyword !Text

    | Function !Text

    | Hash !HashFlag !Text

    | Delim !Char

    deriving (Show, Eq)


data NumericValue
    = NVInteger !Integer   -- ^ number without dot '.' or exponent 'e'
    | NVNumber !Scientific -- ^ number with dot '.' or exponent 'e'
    deriving (Show, Eq)

data HashFlag = HId | HUnrestricted
    deriving (Show, Eq)

type Unit = Text



-- Tokenization
-------------------------------------------------------------------------------


-- | Parse a 'Text' into a list of 'Token's.
--
-- https://drafts.csswg.org/css-syntax/#tokenization

tokenize :: Text -> [Token]
tokenize = parseTokens . preprocessInputStream



-- | Before sending the input stream to the tokenizer, implementations must
-- make the following code point substitutions: (see spec)
--
-- https://drafts.csswg.org/css-syntax/#input-preprocessing

preprocessInputStream :: Text -> Text
preprocessInputStream t0@(Text _ _ len) = withNewA len $ \ dst -> do
    let go t d = case t of
            '\x0D' :. '\x0A' :. t' ->
                put '\x0A' t'
            '\x0D' :. t' ->
                put '\x0A' t'
            '\x0C' :. t' ->
                put '\x0A' t'
            '\x00' :. t' ->
                put '\xFFFD' t'
            c :. t' ->
                put c t'
            _ ->
                return d
            where put x t' = do
                      write dst d x
                      go t' (d + 1)
    go t0 0


-- Low level utilities
-------------------------------------------------------------------------------

pattern (:.) :: Char -> Text -> Text
pattern x :. xs <- (uncons -> Just (x, xs))

infixr 5 :.

-- | uncons first Word16 from Text without trying to decode UTF-16 sequence
uncons :: Text -> Maybe (Char, Text)
uncons (Text src offs len)
    | len <= 0 = Nothing
    | otherwise =
      Just (w2c (A.unsafeIndex src offs), Text src (offs+1) (len-1))
{-# INLINE uncons #-}

-- | write 16bit character
write :: A.MArray s -> Int -> Char -> ST s ()
write dst d x = A.unsafeWrite dst d (c2w x)
{-# INLINE write #-}

-- | write character that could have more than 16bit
-- code from Data.Text.Internal.Unsafe.Char.unsafeWrite
writeChar :: A.MArray s -> Int -> Char -> ST s Int
writeChar dst d c
    | n < 0x10000 = do
        A.unsafeWrite dst d (fromIntegral n)
        return (d+1)
    | otherwise = do
        A.unsafeWrite dst d lo
        A.unsafeWrite dst (d+1) hi
        return (d+2)
    where n = ord c
          m = n - 0x10000
          lo = fromIntegral $ (m `shiftR` 10) + 0xD800
          hi = fromIntegral $ (m .&. 0x3FF) + 0xDC00
{-# INLINE writeChar #-}

type Writer' s = (A.MArray s -> Int -> ST s Int, Text)
type Writer s = A.MArray s -> Int -> ST s (Int, Text)

-- | no-op for convenient pattern matching
w2c :: Word16 -> Char
w2c = unsafeChr . fromIntegral
{-# INLINE w2c #-}

c2w :: Char -> Word16
c2w = fromIntegral . ord
{-# INLINE c2w #-}

withNewA :: Int -> (forall s . A.MArray s -> ST s Int) -> Text
withNewA len act = Text a 0 l
    where (a, l) = A.run2 $ do
              dst <- A.new len
              dLen <- act dst
              return (dst, dLen)


-- Serialization
-------------------------------------------------------------------------------


-- | Serialize a list of 'Token's back into 'Text'.
--
-- Serialization "round-trips" with parsing:
--
--   tokenize (serialize (tokenize s)) == tokenize s
--
-- https://drafts.csswg.org/css-syntax/#serialization


serialize :: [Token] -> Text
serialize = TL.toStrict . TLB.toLazyText . go
    where go [] = ""
          go [Delim '\\'] = "\\" -- do not add newline in last token
          go [x] = renderToken x
          go (x:xs@(y:_))
              | needComment x y = renderToken x <> "/**/" <> go xs
              | otherwise = renderToken x <> go xs

{-# INLINE renderToken #-}
{-# INLINE needComment #-}

needComment :: Token -> Token -> Bool
needComment a CDC = case a of
    -- Can't be parsed that way but may exists in generated `Token` list.
    -- It's also possible to make Delim 'a' which will be parsed as Ident
    -- but we can't do much in this case since it's impossible to
    -- create Delim 'a' tokens in parser.
    Delim '!' -> True
    Delim '@' -> True
    Delim '#' -> True
    Delim '-' -> True
    Number {} -> True
    Dimension {} -> True
    Ident _ -> True
    AtKeyword _ -> True
    Function _ -> True
    Hash {} -> True
    _ -> False
needComment a b = case a of
    Whitespace    -> b == Whitespace
    Ident _       -> idn || b == CDC || b == LeftParen
    AtKeyword _   -> idn || b == CDC
    Hash {}       -> idn || b == CDC
    Dimension {}  -> idn || b == CDC
    Delim '#'     -> idn
    Delim '-'     -> idn
    Number {}     -> i || num || b == Delim '%'
    Delim '@'     -> i || b == Delim '-'
    Delim '.'     -> num
    Delim '+'     -> num
    Delim '/'     -> b == Delim '*' || b == SubstringMatch
    Delim '|'     -> b == Delim '='
        || b == Delim '|' ||  b == Column || b == DashMatch
    Delim '$'     -> b == Delim '='
    Delim '*'     -> b == Delim '='
    Delim '^'     -> b == Delim '='
    Delim '~'     -> b == Delim '='
    _             -> False
    where idn = i || b == Delim '-' || num
          i = case b of
              Ident _ -> True
              Function _ -> True
              Url _ -> True
              BadUrl -> True
              _ -> False
          num = case b of
              Number {} -> True
              Percentage {} -> True
              Dimension {} -> True
              _ -> False


renderToken :: Token -> TLB.Builder
renderToken token = case token of
    Whitespace         -> c ' '

    CDO                -> "<!--"
    CDC                -> "-->"

    Comma              -> c ','
    Colon              -> c ':'
    Semicolon          -> c ';'

    LeftParen          -> c '('
    RightParen         -> c ')'
    LeftSquareBracket  -> c '['
    RightSquareBracket -> c ']'
    LeftCurlyBracket   -> c '{'
    RightCurlyBracket  -> c '}'

    SuffixMatch        -> "$="
    SubstringMatch     -> "*="
    PrefixMatch        -> "^="
    DashMatch          -> "|="
    IncludeMatch       -> "~="

    Column             -> "||"

    String x           -> string x
    BadString          -> "\"\n"

    Number x _         -> t x
    Percentage x _     -> t x <> c '%'
    Dimension x _ u    -> t x <> t (renderDimensionUnit x u)

    Url x              -> "url(" <> t (renderUrl x) <> c ')'
    BadUrl             -> "url(()"

    Ident x            -> ident x

    AtKeyword x        -> c '@' <> ident x

    Function x         -> ident x <> c '('

    Hash HId x           -> c '#' <> ident x
    Hash HUnrestricted x -> c '#' <> t (renderUnrestrictedHash x)

    Delim '\\'         -> "\\\n"
    Delim x            -> c x
    where c = TLB.singleton
          t = TLB.fromText
          q = c '"'
          string x = q <> t (renderString x) <> q
          ident = t . renderIdent

-- https://www.w3.org/TR/cssom-1/#serialize-a-string

renderString :: Text -> Text
renderString t0@(Text _ _ l)
    | T.any needEscape t0 = withNewA (l*8) $ go t0 0
    | otherwise = t0
  where
    needEscape c = c <= '\x1F' || c == '\x7F' || c == '"' || c == '\\'
    go t d dst = case T.uncons t of
        Nothing -> return d
        Just (c, t')
            | c == '\x0' -> do
                write dst d '\xFFFD'
                -- spec says it should be escaped, but we loose
                -- serialize->tokenize->serialize roundtrip that way
                go t' (d+1) dst
            | (c >= '\x1' && c <= '\x1F') || c == '\x7F' -> do
                d' <- escapeAsCodePoint dst d c
                go t' d' dst
            | c == '"' || c == '\\' -> do
                -- strings are always in double quotes, so '\'' aren't escaped
                write dst d '\\'
                write dst (d+1) c
                go t' (d+2) dst
            | otherwise -> do
                d' <- writeChar dst d c
                go t' d' dst

renderUrl :: Text -> Text
renderUrl t0@(Text _ _ l)
    | T.any needEscape t0 = withNewA (l*8) $ go t0 0
    | otherwise = t0
  where
    needEscape c = c <= '\x1F' || c == '\x7F' || isWhitespace c
        || c == '\\' || c == ')' || c == '"' || c == '\'' || c == '('
    go t d dst = case T.uncons t of
        Nothing -> return d
        Just (c, t')
            | c == '\x0' -> do
                write dst d '\xFFFD'
                go t' (d+1) dst
            | needEscape c -> do
                d' <- escapeAsCodePoint dst d c
                go t' d' dst
            | otherwise -> do
                d' <- writeChar dst d c
                go t' d' dst

renderDimensionUnit :: Text -> Text -> Text
renderDimensionUnit num t0@(Text _ _ l)
    | not (T.any isExponent num)
    , c :. t' <- t0
    , isExponent c && validExp t' =
        withNewA (l*8) $ \ dst -> do
            d' <- escapeAsCodePoint dst 0 c
            renderUnrestrictedHash' t' d' dst
    | otherwise =
        renderIdent t0
    where validExp (s :. d :. _) | (s == '+' || s == '-') = isDigit d
          validExp (d :. _) = isDigit d
          validExp _ = False

renderIdent :: Text -> Text
renderIdent "-" = "\\-"
renderIdent t0@(Text _ _ l) = case t0 of
    c :. t'
        | isDigit c -> withNewA (l*8) $ \ dst -> do
            d' <- escapeAsCodePoint dst 0 c
            renderUnrestrictedHash' t' d' dst
    '-' :. c :. t'
        | isDigit c -> withNewA (l*8) $ \ dst -> do
            write dst 0 '-'
            d' <- escapeAsCodePoint dst 1 c
            renderUnrestrictedHash' t' d' dst
    _ -> renderUnrestrictedHash t0

renderUnrestrictedHash :: Text -> Text
renderUnrestrictedHash t0@(Text _ _ l)
    | T.any (not . nameCodePoint) t0 =
        withNewA (l*8) $ renderUnrestrictedHash' t0 0
    | otherwise = t0

renderUnrestrictedHash' :: Text -> Int -> A.MArray s -> ST s Int
renderUnrestrictedHash' = go
    where go t d dst = case T.uncons t of
            Nothing -> return d
            Just (c, t')
                | c == '\x0' -> do
                    write dst d '\xFFFD'
                    go t' (d+1) dst
                | (c >= '\x1' && c <= '\x1F') || c == '\x7F' -> do
                    d' <- escapeAsCodePoint dst d c
                    go t' d' dst
                | nameCodePoint c -> do
                    d' <- writeChar dst d c
                    go t' d' dst
                | otherwise -> do
                    write dst d '\\'
                    d' <- writeChar dst (d+1) c
                    go t' d' dst

escapeAsCodePoint :: A.MArray s -> Int -> Char -> ST s Int
escapeAsCodePoint dst d c = do
    write dst d '\\'
    d' <- foldM (\ o x -> write dst o x >> return (o+1))
        (d+1) (showHex (ord c) [])
    write dst d' ' '
    return (d' + 1)


-- | verify valid escape and consume escaped code point
escapedCodePoint :: Text -> Maybe (Writer' s)
escapedCodePoint t = case t of
    (hex -> Just d) :. ts -> go 5 d ts
    '\n' :. _ -> Nothing
    c :. ts -> Just (\ dst d -> write dst d c >> return (d+1), ts)
    _ -> Nothing
    where go :: Int -> Int -> Text -> Maybe (Writer' s)
          go 0 acc ts = ret acc ts
          go n acc ts = case ts of
              (hex -> Just d) :. ts' -> go (n-1) (acc*16 + d) ts'
              c :. ts' | isWhitespace c -> ret acc ts'
              _ -> ret acc ts
          ret (safe -> c) ts
              | c < 0x10000 = Just
                  (\ dst d -> write dst d (unsafeChr c) >> return (d+1), ts)
              | otherwise = Just
                  (\ dst d -> write dst d lo >> write dst (d+1) hi >> return (d+2)
                  ,ts)
              where m = c - 0x10000
                    lo = unsafeChr $ (m `shiftR` 10) + 0xD800
                    hi = unsafeChr $ (m .&. 0x3FF) + 0xDC00

safe :: Int -> Int
safe x
    | x == 0 || x > 0x10FFFF   = 0xFFFD
    | x .&. 0x1ff800 /= 0xd800 = x
    | otherwise                = 0xFFFD -- UTF16 surrogate code point

hex :: Char -> Maybe Int
hex c
    | c >= '0' && c <= '9' = Just (ord c - ord '0')
    | c >= 'a' && c <= 'f' = Just (ord c - ord 'a' + 10)
    | c >= 'A' && c <= 'F' = Just (ord c - ord 'A' + 10)
    | otherwise            = Nothing

{-# INLINE safe #-}
{-# INLINE hex #-}

escapedCodePoint' :: Text -> Maybe (Writer' s)
escapedCodePoint' ('\\' :. ts) = escapedCodePoint ts
escapedCodePoint' _ = Nothing

nameStartCodePoint :: Char -> Bool
nameStartCodePoint c =
    isAsciiLower c || isAsciiUpper c || c >= '\x0080' || c == '_'

nameCodePoint :: Char -> Bool
nameCodePoint c = nameStartCodePoint c || isDigit c || c == '-'

satisfyOrEscaped :: (Char -> Bool) -> Text -> Maybe (Writer' s)
satisfyOrEscaped p (c :. ts)
    | p c = Just (\ dst d -> write dst d c >> return (d+1), ts)
    | c == '\\' = escapedCodePoint ts
satisfyOrEscaped _ _ = Nothing

-- | Check if three code points would start an identifier and consume name
parseName :: Text -> Maybe (Writer s)
parseName t = case t of
    '-' :. ts -> consumeName' <$> satisfyOrEscaped (\ c -> nameStartCodePoint c || c == '-') ts
    ts -> consumeName <$> satisfyOrEscaped nameStartCodePoint ts
    where consumeName' n dst d = do
              write dst d '-'
              consumeName n dst (d + 1)


consumeName :: Writer' s -> Writer s
consumeName (w0, ts0) dst d0 = do
    d' <- w0 dst d0
    loop ts0 d'
    where loop ts d = case satisfyOrEscaped nameCodePoint ts of
              Just (w, ts') -> do
                  d' <- w dst d
                  loop ts' d'
              Nothing -> return (d, ts)

{-# INLINE parseName #-}
{-# INLINE consumeName #-}
{-# INLINE satisfyOrEscaped #-}
{-# INLINE escapedCodePoint #-}
{-# INLINE escapedCodePoint' #-}

parseNumericValue :: Text -> Maybe (Text, NumericValue, Text)
parseNumericValue t0@(Text a offs1 _) = case withSign start t0 of
    Just (nv, ts@(Text _ offs2 _)) ->
        Just (Text a offs1 (offs2 - offs1), nv, ts)
    Nothing -> Nothing
    where start sign t = case t of
              '.' :. (digit -> Just d) :. ts -> dot sign (startIR d) (-1) ts
              (digit -> Just d) :. ts        -> digits sign (startIR d) ts
              _ -> Nothing
          digits sign !c t = case t of
              '.' :. (digit -> Just d) :. ts -> dot sign (accIR c d) (-1) ts
              (digit -> Just d) :. ts        -> digits sign (accIR c d) ts
              _ -> Just $ expn True (sign $ readIR c) 0 t
          dot sign !c !e t = case t of
              (digit -> Just d) :. ts        -> dot sign (accIR c d) (e-1) ts
              _ -> Just $ expn False (sign $ readIR c) e t
          expn int c e0 t = case t of
              x :. ts
                  | isExponent x
                  , Just r <- withSign (expStart c e0 0) ts -> r
              _   | int -> (NVInteger c, t)
                  | otherwise -> (NVNumber $ scientific c e0, t)
          expStart c e0 e sign t = case t of
              (digit -> Just d) :. ts -> expDigits c e0 (e*10 + d) sign ts
              _ -> Nothing
          expDigits c e0 !e sign t = case t of
              (digit -> Just d) :. ts -> expDigits c e0 (e*10 + d) sign ts
              _ -> Just (NVNumber $ scientific c (sign e + e0), t)
          digit :: Enum a => Char -> Maybe a
          digit c
              | isDigit c = Just (toEnum $ ord c - ord '0')
              | otherwise = Nothing
          withSign :: Num a => ((a -> a) -> Text -> Maybe (b, Text))
                   -> Text -> Maybe (b, Text)
          withSign f t = case t of
              '+' :. ts -> f id ts
              '-' :. ts -> f negate ts
              _ -> f id t

-- Idea stolen from GHC implementation of `instance Read Integer`
-- http://hackage.haskell.org/package/base-4.11.1.0/docs/src/Text.Read.Lex.html#valInteger
-- A sub-quadratic algorithm for converting digits to Integer.
-- First we collect blocks of `blockDigits`-digit Integers
-- (so we don't do anything besides simple (acc*10+digit) on most inputs).
-- Then we combine them:
-- Pairs of adjacent radix b digits are combined into a single radix b^2 digit.
-- This process is repeated until we are left with a single digit.

blockDigits :: Int
blockDigits = 40

startBase :: Integer
startBase = 10^blockDigits

-- | (num digits in current block, blocks, current block's value)
type IntegerReader = (Int, [Integer], Integer)

startIR :: Integer -> IntegerReader
startIR d = (1, [], d)

{-# INLINE startIR #-}
{-# INLINE accIR #-}
{-# INLINE readIR #-}

accIR :: IntegerReader -> Integer -> IntegerReader
accIR (n, blocks, !cd) d
    | n < blockDigits = (n+1, blocks, cd*10 + d)
    | otherwise = (1, cd:blocks, d)

readIR :: IntegerReader -> Integer
readIR (_, [], cd) = cd
readIR (n, blocks, cd) =
    go startBase ((cd * padding):blocks) `div` padding
    where padding = 10^(blockDigits-n)
          go :: Integer -> [Integer] -> Integer
          go _ [] = 0
          go _ [x] = x
          go b xs = go (b*b) (combine b xs)
          combine :: Integer -> [Integer] -> [Integer]
          combine _ [] = []
          combine _ [x] = [x]
          combine b (x0:x1:xs) = x' : combine b xs
              where !x' = x0 + x1*b

skipComment :: Text -> Text
skipComment t = case t of
    '*' :. '/' :. ts -> ts
    _ :. ts -> skipComment ts
    ts -> ts

skipWhitespace :: Text -> Text
skipWhitespace t = case t of
    c :. ts
        | isWhitespace c -> skipWhitespace ts
        | otherwise -> t
    ts -> ts

parseTokens :: Text -> [Token]
parseTokens t0@(Text _ _ len) = snd $ A.run2 $ do
    dst <- A.new len
    dsta <- A.unsafeFreeze dst
    let go' !t d tgo = do
            ts <- inlineInterleaveST $ go d tgo
            return (t : ts)
        go d tgo = case tgo of
            c :. ts | isWhitespace c ->
                 go' Whitespace d (skipWhitespace ts)
            '/' :. '*' :. ts -> go d (skipComment ts)

            '<' :. '!' :. '-' :. '-' :. ts -> token CDO ts
            '-' :. '-' :. '>' :. ts ->        token CDC ts

            ',' :. ts -> token Comma ts
            ':' :. ts -> token Colon ts
            ';' :. ts -> token Semicolon ts
            '(' :. ts -> token LeftParen ts
            ')' :. ts -> token RightParen ts
            '[' :. ts -> token LeftSquareBracket ts
            ']' :. ts -> token RightSquareBracket ts
            '{' :. ts -> token LeftCurlyBracket ts
            '}' :. ts -> token RightCurlyBracket ts

            '$' :. '=' :. ts -> token SuffixMatch ts
            '*' :. '=' :. ts -> token SubstringMatch ts
            '^' :. '=' :. ts -> token PrefixMatch ts
            '|' :. '=' :. ts -> token DashMatch ts
            '~' :. '=' :. ts -> token IncludeMatch ts

            '|' :. '|' :. ts -> token Column ts

            (parseNumericValue -> Just (repr, nv, ts))
                | '%' :. ts' <- ts ->
                    go' (Percentage repr nv) d ts'
                | Just u <- parseName ts -> do
                    (unit, d', ts') <- mkText dst d u
                    go' (Dimension repr nv unit) d' ts'
                | otherwise ->
                    go' (Number repr nv) d ts

            -- ident like
            (parseName -> Just n) -> do
                (name, d', ts) <- mkText dst d n
                if isUrl name then
                    -- Special handling of url() functions (they are not really
                    -- functions, they have their own Token type).
                    case ts of
                        '(' :. (skipWhitespace -> ts') ->
                            case ts' of
                                '"'  :. _ -> go' (Function name) d' ts'
                                '\'' :. _ -> go' (Function name) d' ts'
                                _ -> parseUrl d' ts'
                        _ -> go' (Ident name) d' ts
                else
                    case ts of
                        '(' :. ts' -> go' (Function name) d' ts'
                        _ -> go' (Ident name) d' ts

            '"' :. ts -> parseString '"' d ts
            '\'' :. ts -> parseString '\'' d ts

            '@' :. (parseName -> Just n) -> do
                (name, d', ts) <- mkText dst d n
                go' (AtKeyword name) d' ts

            '#' :. (parseName -> Just n) -> do
                (name, d', ts) <- mkText dst d n
                go' (Hash HId name) d' ts

            '#' :. (satisfyOrEscaped nameCodePoint -> Just n) -> do
                (name, d', ts) <- mkText dst d (consumeName n)
                go' (Hash HUnrestricted name) d' ts

            c :. ts ->
                token (Delim c) ts
            _ -> return []

            where token t ts = go' t d ts

        isUrl t@(Text _ _ 3)
            | u :. r :. l :. _ <- t =
                (u == 'u' || u == 'U') &&
                (r == 'r' || r == 'R') &&
                (l == 'l' || l == 'L')
        isUrl _ = False

        -- https://drafts.csswg.org/css-syntax-3/#consume-string-token
        parseString endingCodePoint d0 = string d0
            where string d t = case t of
                      c :. ts | c == endingCodePoint -> ret d ts
                      '\\' :. ts
                          | Just (p, ts') <- escapedCodePoint ts -> do
                              d' <- p dst d
                              string d' ts'
                          | '\n' :. ts' <- ts ->
                              string d ts'
                          | Text _ _ 0 <- ts ->
                              string d ts
                      '\n' :. _ -> go' BadString d t
                      c :. ts -> do
                          write dst d c
                          string (d+1) ts
                      _ -> ret d t
                  ret d t = go' (String $ Text dsta d0 (d-d0)) d t

        -- https://drafts.csswg.org/css-syntax/#consume-url-token
        parseUrl d0 tUrl = url d0 (skipWhitespace tUrl)
            where ret d ts = go' (Url (Text dsta d0 (d-d0))) d ts
                  url d t = case t of
                      ')' :. ts -> ret d ts
                      c :. ts
                          | c == '"' || c == '\'' || c == '('
                            || nonPrintableCodePoint c -> do
                              badUrl d ts
                          | isWhitespace c ->
                              whitespace d ts
                      '\\' :. ts
                          | Just (p, ts') <- escapedCodePoint ts -> do
                              d' <- p dst d
                              url d' ts'
                          | otherwise ->
                              badUrl d ts
                      c :. ts -> do
                          write dst d c
                          url (d+1) ts
                      _ ->
                          ret d t
                  whitespace d t = case t of
                      c :. ts -> do
                          if isWhitespace c then
                              whitespace d ts
                          else if c == ')' then
                              ret d ts
                          else
                              badUrl d ts
                      _ ->
                          ret d t
                  badUrl d t = case t of
                      ')' :. ts -> go' BadUrl d ts
                      (escapedCodePoint' -> Just (_, ts)) -> do
                          badUrl d ts
                      _ :. ts ->
                          badUrl d ts
                      _ -> go' BadUrl d t
        mkText :: A.MArray s -> Int -> Writer s -> ST s (Text, Int, Text)
        mkText dest d w = do
            (d', ts) <- w dest d
            return (Text dsta d (d' - d), d', ts)

    r <- go 0 t0
    return (dst, r)


isWhitespace :: Char -> Bool
isWhitespace '\x0009' = True
isWhitespace '\x000A' = True
isWhitespace '\x0020' = True
isWhitespace _        = False

nonPrintableCodePoint :: Char -> Bool
nonPrintableCodePoint c
    | c >= '\x0000' && c <= '\x0008' = True -- NULL through BACKSPACE
    | c == '\x000B'                  = True -- LINE TABULATION
    | c >= '\x000E' && c <= '\x001F' = True -- SHIFT OUT through INFORMATION SEPARATOR ONE
    | c == '\x007F'                  = True -- DELETE
    | otherwise                      = False

isExponent :: Char -> Bool
isExponent c = c == 'e' || c == 'E'