{-# OPTIONS_GHC -fno-warn-unused-do-bind #-}
{-# LANGUAGE LambdaCase        #-}
{-# LANGUAGE OverloadedStrings #-}

-- | Module containing parsers for tweet and response data.
module Web.Tweet.Parser ( parseTweet
                        , getData ) where

import           Control.Composition        ((.*))
import           Control.Monad
import qualified Data.ByteString            as BS
import           Data.List                  (isInfixOf)
import qualified Data.Map                   as M
import           Data.Maybe
import qualified Data.Text                  as T
import qualified Data.Text.Encoding         as TE
import           Data.Void
import           Text.Megaparsec
import           Text.Megaparsec.Byte
import           Text.Megaparsec.Byte.Lexer as L
import           Web.Tweet.Types

type Parser = Parsec Void String

-- | Parse some number of tweets
parseTweet :: Parser Timeline
parseTweet :: Parser Timeline
parseTweet = ParsecT Void String Identity TweetEntity -> Parser Timeline
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many (ParsecT Void String Identity TweetEntity
-> ParsecT Void String Identity TweetEntity
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try ParsecT Void String Identity TweetEntity
getData ParsecT Void String Identity TweetEntity
-> ParsecT Void String Identity TweetEntity
-> ParsecT Void String Identity TweetEntity
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (String
-> Maybe Int
-> String
-> String
-> Int
-> [String]
-> Maybe TweetEntity
-> Int
-> Int
-> TweetEntity
TweetEntity String
"" Maybe Int
forall a. Maybe a
Nothing String
"" String
"" Int
0 [String]
forall a. Monoid a => a
mempty Maybe TweetEntity
forall a. Maybe a
Nothing Int
0 Int
0 TweetEntity
-> ParsecT Void String Identity ()
-> ParsecT Void String Identity TweetEntity
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ ParsecT Void String Identity ()
forall e s (m :: * -> *). MonadParsec e s m => m ()
eof))

inReplyTo :: String -> Maybe Int
inReplyTo :: String -> Maybe Int
inReplyTo String
str =
    if String
"null" String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isInfixOf` String
str
        then Maybe Int
forall a. Maybe a
Nothing
        else Int -> Maybe Int
forall (f :: * -> *) a. Applicative f => a -> f a
pure (String -> Int
forall a. Read a => String -> a
read String
str)

-- | Parse a single tweet's: n, text, fave count, retweet count
getData :: Parser TweetEntity
getData :: ParsecT Void String Identity TweetEntity
getData = do
    Int
idNum <- String -> Int
forall a. Read a => String -> a
read (String -> Int)
-> ParsecT Void String Identity String
-> ParsecT Void String Identity Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> ParsecT Void String Identity String
filterStr String
"id"
    String
t <- String -> ParsecT Void String Identity String
filterStr String
"text"
    ParsecT Void String Identity ()
skipMentions
    Maybe Int
irt <- String -> Maybe Int
inReplyTo (String -> Maybe Int)
-> ParsecT Void String Identity String
-> ParsecT Void String Identity (Maybe Int)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> ParsecT Void String Identity String
filterStr String
"in_reply_to_status_id"
    String
n <- String -> ParsecT Void String Identity String
filterStr String
"name"
    String
screenn' <- String -> ParsecT Void String Identity String
filterStr String
"screen_name"
    --withheldCountries <- (catMaybes . sequence) <$> optional filterList
    let withheldCountries :: [String]
withheldCountries = [String]
forall a. Monoid a => a
mempty
    --let toBlock = "DE" `elem` (catMaybes (sequence bannedList))
    String
isQuote <- String -> ParsecT Void String Identity String
filterStr String
"is_quote_status"
    case String
isQuote of
        String
"false" -> do
            Int
rts <- String -> Int
forall a. Read a => String -> a
read (String -> Int)
-> ParsecT Void String Identity String
-> ParsecT Void String Identity Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> ParsecT Void String Identity String
filterStr String
"retweet_count"
            Int
faves <- String -> Int
forall a. Read a => String -> a
read (String -> Int)
-> ParsecT Void String Identity String
-> ParsecT Void String Identity Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> ParsecT Void String Identity String
filterStr String
"favorite_count"
            TweetEntity -> ParsecT Void String Identity TweetEntity
forall (f :: * -> *) a. Applicative f => a -> f a
pure (String
-> Maybe Int
-> String
-> String
-> Int
-> [String]
-> Maybe TweetEntity
-> Int
-> Int
-> TweetEntity
TweetEntity String
t Maybe Int
irt String
n String
screenn' Int
idNum [String]
withheldCountries Maybe TweetEntity
forall a. Maybe a
Nothing Int
rts Int
faves)
        String
"true" -> do
            Maybe TweetEntity
q <- Parser (Maybe TweetEntity)
parseQuoted
            Int
rts <- String -> Int
forall a. Read a => String -> a
read (String -> Int)
-> ParsecT Void String Identity String
-> ParsecT Void String Identity Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> ParsecT Void String Identity String
filterStr String
"retweet_count"
            Int
faves <- String -> Int
forall a. Read a => String -> a
read (String -> Int)
-> ParsecT Void String Identity String
-> ParsecT Void String Identity Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> ParsecT Void String Identity String
filterStr String
"favorite_count"
            TweetEntity -> ParsecT Void String Identity TweetEntity
forall (f :: * -> *) a. Applicative f => a -> f a
pure (TweetEntity -> ParsecT Void String Identity TweetEntity)
-> TweetEntity -> ParsecT Void String Identity TweetEntity
forall a b. (a -> b) -> a -> b
$ String
-> Maybe Int
-> String
-> String
-> Int
-> [String]
-> Maybe TweetEntity
-> Int
-> Int
-> TweetEntity
TweetEntity String
t Maybe Int
irt String
n String
screenn' Int
idNum [String]
withheldCountries Maybe TweetEntity
q Int
rts Int
faves
        String
_ -> String -> ParsecT Void String Identity TweetEntity
forall a. HasCallStack => String -> a
error String
"is_quote_status must have a value of \"true\" or \"false\""

-- | Parse a the quoted tweet
parseQuoted :: Parser (Maybe TweetEntity)
parseQuoted :: Parser (Maybe TweetEntity)
parseQuoted = do
    ParsecT Void String Identity String
-> ParsecT Void String Identity (Maybe String)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (Tokens String -> ParsecT Void String Identity (Tokens String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens String
",\"quoted_status_id" ParsecT Void String Identity String
-> ParsecT Void String Identity String
-> ParsecT Void String Identity String
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> String -> ParsecT Void String Identity String
filterStr String
"quoted_status_id_str") -- FIXME it's skipping too many? prob is when two are deleted in a row twitter just dives in to RTs
    Maybe String
contents <- ParsecT Void String Identity String
-> ParsecT Void String Identity (Maybe String)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (ParsecT Void String Identity String
 -> ParsecT Void String Identity (Maybe String))
-> ParsecT Void String Identity String
-> ParsecT Void String Identity (Maybe String)
forall a b. (a -> b) -> a -> b
$ Tokens String -> ParsecT Void String Identity (Tokens String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens String
"\",\"quoted_status"
    case Maybe String
contents of
        (Just String
_) -> TweetEntity -> Maybe TweetEntity
forall (f :: * -> *) a. Applicative f => a -> f a
pure (TweetEntity -> Maybe TweetEntity)
-> ParsecT Void String Identity TweetEntity
-> Parser (Maybe TweetEntity)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Void String Identity TweetEntity
getData
        Maybe String
_        -> Maybe TweetEntity -> Parser (Maybe TweetEntity)
forall (f :: * -> *) a. Applicative f => a -> f a
pure Maybe TweetEntity
forall a. Maybe a
Nothing

-- | Skip a set of square brackets []
skipInsideBrackets :: Parser ()
skipInsideBrackets :: ParsecT Void String Identity ()
skipInsideBrackets = ParsecT Void String Identity [()]
-> ParsecT Void String Identity ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (ParsecT Void String Identity Char
-> ParsecT Void String Identity Char
-> ParsecT Void String Identity [()]
-> ParsecT Void String Identity [()]
forall (m :: * -> *) open close a.
Applicative m =>
m open -> m close -> m a -> m a
between (Token String -> ParsecT Void String Identity (Token String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Token s -> m (Token s)
single Char
Token String
'[') (Token String -> ParsecT Void String Identity (Token String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Token s -> m (Token s)
single Char
Token String
']') (ParsecT Void String Identity [()]
 -> ParsecT Void String Identity [()])
-> ParsecT Void String Identity [()]
-> ParsecT Void String Identity [()]
forall a b. (a -> b) -> a -> b
$ ParsecT Void String Identity ()
-> ParsecT Void String Identity [()]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many (ParsecT Void String Identity ()
skipInsideBrackets ParsecT Void String Identity ()
-> ParsecT Void String Identity ()
-> ParsecT Void String Identity ()
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT Void String Identity Char
-> ParsecT Void String Identity ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void ([Token String] -> ParsecT Void String Identity (Token String)
forall (f :: * -> *) e s (m :: * -> *).
(Foldable f, MonadParsec e s m) =>
f (Token s) -> m (Token s)
noneOf (String
"[]" :: String))))

-- | Skip user mentions field to avoid parsing the wrong n
skipMentions :: Parser ()
skipMentions :: ParsecT Void String Identity ()
skipMentions = do
    ParsecT Void String Identity ()
-> ParsecT Void String Identity [()]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many (ParsecT Void String Identity ()
 -> ParsecT Void String Identity [()])
-> ParsecT Void String Identity ()
-> ParsecT Void String Identity [()]
forall a b. (a -> b) -> a -> b
$ ParsecT Void String Identity () -> ParsecT Void String Identity ()
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try (ParsecT Void String Identity ()
 -> ParsecT Void String Identity ())
-> ParsecT Void String Identity ()
-> ParsecT Void String Identity ()
forall a b. (a -> b) -> a -> b
$ ParsecT Void String Identity Char
forall e s (m :: * -> *). MonadParsec e s m => m (Token s)
anySingle ParsecT Void String Identity Char
-> ParsecT Void String Identity ()
-> ParsecT Void String Identity ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ParsecT Void String Identity String
-> ParsecT Void String Identity ()
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m ()
notFollowedBy (Tokens String -> ParsecT Void String Identity (Tokens String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens String
"\"user_mentions\":")
    Tokens String -> ParsecT Void String Identity (Tokens String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens String
",\"user_mentions\":"
    ParsecT Void String Identity ()
skipInsideBrackets

-- | Throw out input until we get to a relevant tag.
filterStr :: String -> Parser String
filterStr :: String -> ParsecT Void String Identity String
filterStr String
str = do
    ParsecT Void String Identity ()
-> ParsecT Void String Identity [()]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many (ParsecT Void String Identity ()
 -> ParsecT Void String Identity [()])
-> ParsecT Void String Identity ()
-> ParsecT Void String Identity [()]
forall a b. (a -> b) -> a -> b
$ ParsecT Void String Identity () -> ParsecT Void String Identity ()
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try (ParsecT Void String Identity ()
 -> ParsecT Void String Identity ())
-> ParsecT Void String Identity ()
-> ParsecT Void String Identity ()
forall a b. (a -> b) -> a -> b
$ ParsecT Void String Identity Char
forall e s (m :: * -> *). MonadParsec e s m => m (Token s)
anySingle ParsecT Void String Identity Char
-> ParsecT Void String Identity ()
-> ParsecT Void String Identity ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ParsecT Void String Identity String
-> ParsecT Void String Identity ()
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m ()
notFollowedBy (Tokens String -> ParsecT Void String Identity (Tokens String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string (String
"\"" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
str String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"\":"))
    Token String -> ParsecT Void String Identity (Token String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Token s -> m (Token s)
single Char
Token String
','
    String -> ParsecT Void String Identity String
filterTag String
str

-- | Parse a field given its tag
filterTag :: String -> Parser String
filterTag :: String -> ParsecT Void String Identity String
filterTag String
str = do
    Tokens String -> ParsecT Void String Identity (Tokens String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string (Tokens String -> ParsecT Void String Identity (Tokens String))
-> Tokens String -> ParsecT Void String Identity (Tokens String)
forall a b. (a -> b) -> a -> b
$ String
"\"" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
str String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"\":"
    Maybe Char
open <- ParsecT Void String Identity Char
-> ParsecT Void String Identity (Maybe Char)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (ParsecT Void String Identity Char
 -> ParsecT Void String Identity (Maybe Char))
-> ParsecT Void String Identity Char
-> ParsecT Void String Identity (Maybe Char)
forall a b. (a -> b) -> a -> b
$ Token String -> ParsecT Void String Identity (Token String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Token s -> m (Token s)
single Char
Token String
'\"'
    let forbidden :: String
forbidden = if Maybe Char -> Bool
forall a. Maybe a -> Bool
isJust Maybe Char
open then (String
"\\\"" :: String) else (String
"\\\"," :: String)
    ParsecT Void String Identity Char
-> ParsecT Void String Identity String
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many (ParsecT Void String Identity Char
 -> ParsecT Void String Identity String)
-> ParsecT Void String Identity Char
-> ParsecT Void String Identity String
forall a b. (a -> b) -> a -> b
$ ParsecT Void String Identity Char
parseHTMLChar ParsecT Void String Identity Char
-> ParsecT Void String Identity Char
-> ParsecT Void String Identity Char
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> [Token String] -> ParsecT Void String Identity (Token String)
forall (f :: * -> *) e s (m :: * -> *).
(Foldable f, MonadParsec e s m) =>
f (Token s) -> m (Token s)
noneOf String
[Token String]
forbidden ParsecT Void String Identity Char
-> ParsecT Void String Identity Char
-> ParsecT Void String Identity Char
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> ParsecT Void String Identity Char
specialChar Char
'\"' ParsecT Void String Identity Char
-> ParsecT Void String Identity Char
-> ParsecT Void String Identity Char
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> ParsecT Void String Identity Char
specialChar Char
'/' ParsecT Void String Identity Char
-> ParsecT Void String Identity Char
-> ParsecT Void String Identity Char
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT Void String Identity Char
newlineChar ParsecT Void String Identity Char
-> ParsecT Void String Identity Char
-> ParsecT Void String Identity Char
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT Void String Identity Char
emojiChar ParsecT Void String Identity Char
-> ParsecT Void String Identity Char
-> ParsecT Void String Identity Char
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT Void String Identity Char
unicodeChar -- TODO modify parsec to make this parallel?

-- | Parse a newline
newlineChar :: Parser Char
newlineChar :: ParsecT Void String Identity Char
newlineChar = Tokens String -> ParsecT Void String Identity (Tokens String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens String
"\\n" ParsecT Void String Identity String
-> ParsecT Void String Identity Char
-> ParsecT Void String Identity Char
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Char -> ParsecT Void String Identity Char
forall (f :: * -> *) a. Applicative f => a -> f a
pure Char
'\n'

-- | Parser for unicode; twitter will give us something like "/u320a"
unicodeChar :: Parser Char
unicodeChar :: ParsecT Void String Identity Char
unicodeChar = Int -> Char
forall a. Enum a => Int -> a
toEnum (Int -> Char) -> ([Token String] -> Int) -> [Token String] -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Integer -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Integer -> Int)
-> ([Token String] -> Integer) -> [Token String] -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Token String] -> Integer
f ([Token String] -> Char)
-> ParsecT Void String Identity [Token String]
-> ParsecT Void String Identity Char
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Void String Identity [Token String]
go
    where go :: ParsecT Void String Identity [Token String]
go = Tokens String -> ParsecT Void String Identity (Tokens String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens String
"\\u" ParsecT Void String Identity (Tokens String)
-> ParsecT Void String Identity [Token String]
-> ParsecT Void String Identity [Token String]
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Int
-> ParsecT Void String Identity (Token String)
-> ParsecT Void String Identity [Token String]
forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
4 ParsecT Void String Identity (Token String)
forall e s (m :: * -> *). MonadParsec e s m => m (Token s)
anySingle
          f :: [Token String] -> Integer
f = ByteString -> Integer
fromHex (ByteString -> Integer)
-> ([Token String] -> ByteString) -> [Token String] -> Integer
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
filterEmoji (ByteString -> ByteString)
-> ([Token String] -> ByteString) -> [Token String] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Word8] -> ByteString
BS.pack ([Word8] -> ByteString)
-> ([Token String] -> [Word8]) -> [Token String] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Token String -> Word8) -> [Token String] -> [Word8]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> (Token String -> Int) -> Token String -> Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Token String -> Int
forall a. Enum a => a -> Int
fromEnum)

emojiChar :: Parser Char
emojiChar :: ParsecT Void String Identity Char
emojiChar = ParsecT Void String Identity String
-> ParsecT Void String Identity Char
go ParsecT Void String Identity String
ParsecT Void String Identity [Token String]
a
    where a :: ParsecT Void String Identity [Token String]
a = Tokens String -> ParsecT Void String Identity (Tokens String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens String
"\\ud" ParsecT Void String Identity (Tokens String)
-> ParsecT Void String Identity [Token String]
-> ParsecT Void String Identity [Token String]
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Int
-> ParsecT Void String Identity (Token String)
-> ParsecT Void String Identity [Token String]
forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
3 ParsecT Void String Identity (Token String)
forall e s (m :: * -> *). MonadParsec e s m => m (Token s)
anySingle
          go :: ParsecT Void String Identity String
-> ParsecT Void String Identity Char
go = ParsecT Void String Identity (String -> Char)
-> ParsecT Void String Identity String
-> ParsecT Void String Identity Char
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
(<*>) (ParsecT Void String Identity (String -> Char)
 -> ParsecT Void String Identity String
 -> ParsecT Void String Identity Char)
-> (ParsecT Void String Identity String
    -> ParsecT Void String Identity (String -> Char))
-> ParsecT Void String Identity String
-> ParsecT Void String Identity Char
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< (((Text -> Char
T.head (Text -> Char) -> (String -> Text) -> String -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
decodeUtf16) (String -> Char)
-> (String -> String -> String) -> String -> String -> Char
forall c d a b. (c -> d) -> (a -> b -> c) -> a -> b -> d
.* (String -> String -> String
forall a. Semigroup a => a -> a -> a
(<>) (String -> String -> String)
-> (String -> String) -> String -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"d") (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String
"d" String -> String -> String
forall a. Semigroup a => a -> a -> a
<>))) (String -> String -> Char)
-> ParsecT Void String Identity String
-> ParsecT Void String Identity (String -> Char)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>)

decodeUtf16 :: String -> T.Text
decodeUtf16 :: String -> Text
decodeUtf16 = ByteString -> Text
TE.decodeUtf16BE (ByteString -> Text) -> (String -> ByteString) -> String -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ByteString] -> ByteString
BS.concat ([ByteString] -> ByteString)
-> (String -> [ByteString]) -> String -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [ByteString]
go
    where
        go :: String -> [ByteString]
go []             = []
        go (Char
a:Char
b:Char
c:Char
d:String
rest) = let sym :: ByteString
sym = String -> String -> ByteString
convert16 [Char
a,Char
b] [Char
c,Char
d] in ByteString
sym ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: String -> [ByteString]
go String
rest
        go String
_              = String -> [ByteString]
forall a. HasCallStack => String -> a
error String
"Internal error: decodeUtf16 failed."
        convert16 :: String -> String -> ByteString
convert16 String
x String
y = [Word8] -> ByteString
BS.pack [(String -> Word8
forall a. Read a => String -> a
read (String -> Word8) -> (String -> String) -> String -> Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String
"0x"String -> String -> String
forall a. Semigroup a => a -> a -> a
<>)) String
x, (String -> Word8
forall a. Read a => String -> a
read (String -> Word8) -> (String -> String) -> String -> Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String
"0x"String -> String -> String
forall a. Semigroup a => a -> a -> a
<>)) String
y]

-- | helper function to ignore emoji
filterEmoji :: BS.ByteString -> BS.ByteString
filterEmoji :: ByteString -> ByteString
filterEmoji ByteString
str = if ByteString -> Word8
BS.head ByteString
str Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== (Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> (Char -> Int) -> Char -> Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
forall a. Enum a => a -> Int
fromEnum (Char -> Word8) -> Char -> Word8
forall a b. (a -> b) -> a -> b
$ Char
'd') then ByteString
"FFFD" else ByteString
str

-- | Parse HTML chars
parseHTMLChar :: Parser Char
parseHTMLChar :: ParsecT Void String Identity Char
parseHTMLChar = do
    Token String -> ParsecT Void String Identity (Token String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Token s -> m (Token s)
single Char
Token String
'&'
    String
innards <- ParsecT Void String Identity Char
-> ParsecT Void String Identity String
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many (ParsecT Void String Identity Char
 -> ParsecT Void String Identity String)
-> ParsecT Void String Identity Char
-> ParsecT Void String Identity String
forall a b. (a -> b) -> a -> b
$ Token String -> ParsecT Void String Identity (Token String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Token s -> m (Token s)
anySingleBut Char
Token String
';'
    Token String -> ParsecT Void String Identity (Token String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Token s -> m (Token s)
single Char
Token String
';'
    Char -> ParsecT Void String Identity Char
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char -> ParsecT Void String Identity Char)
-> (Maybe Char -> Char)
-> Maybe Char
-> ParsecT Void String Identity Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (\case
        (Just Char
a) -> Char
a
        Maybe Char
Nothing -> Char
'?') (Maybe Char -> ParsecT Void String Identity Char)
-> Maybe Char -> ParsecT Void String Identity Char
forall a b. (a -> b) -> a -> b
$ String -> Map String Char -> Maybe Char
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup String
innards ([(String, Char)] -> Map String Char
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList [(String
"amp",Char
'&'),(String
"gt",Char
'>'),(String
"lt",Char
'<'),(String
"quot",Char
'"'),(String
"euro",Char
'€'),(String
"ndash",Char
'–'),(String
"mdash",Char
'—')])

-- | Parse escaped characters
specialChar :: Char -> Parser Char
specialChar :: Char -> ParsecT Void String Identity Char
specialChar Char
c = Tokens String -> ParsecT Void String Identity (Tokens String)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string (String
"\\" String -> String -> String
forall a. Semigroup a => a -> a -> a
<> Char -> String
forall (f :: * -> *) a. Applicative f => a -> f a
pure Char
c) ParsecT Void String Identity String
-> ParsecT Void String Identity Char
-> ParsecT Void String Identity Char
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Char -> ParsecT Void String Identity Char
forall (f :: * -> *) a. Applicative f => a -> f a
pure Char
c

-- | Convert a string of four hexadecimal digits to an integer.
fromHex :: BS.ByteString -> Integer
fromHex :: ByteString -> Integer
fromHex = Maybe Integer -> Integer
forall p. Maybe p -> p
fromRight (Maybe Integer -> Integer)
-> (ByteString -> Maybe Integer) -> ByteString -> Integer
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parsec Void ByteString Integer -> ByteString -> Maybe Integer
forall e s a. (Ord e, Stream s) => Parsec e s a -> s -> Maybe a
parseMaybe (Parsec Void ByteString Integer
forall e s (m :: * -> *) a.
(MonadParsec e s m, Token s ~ Word8, Num a) =>
m a
L.hexadecimal :: Parsec Void BS.ByteString Integer)
    where fromRight :: Maybe p -> p
fromRight (Just p
a) = p
a
          fromRight Maybe p
_        = String -> p
forall a. HasCallStack => String -> a
error String
"failed to parse hex"