module Web.Tweet.Parser ( parseTweet
, getData ) where
import qualified Data.ByteString as BS
import Text.Megaparsec.ByteString
import Text.Megaparsec.Lexer as L
import Text.Megaparsec
import Web.Tweet.Types
import Data.Monoid
import qualified Data.Map as M
import Data.Maybe
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Control.Monad
parseTweet :: Parser Timeline
parseTweet = many (try getData <|> (const (TweetEntity "" "" "" 0 Nothing 0 0) <$> eof))
getData :: Parser TweetEntity
getData = do
id <- read <$> filterStr "id"
text <- filterStr "text"
skipMentions
name <- filterStr "name"
screenName' <- filterStr "screen_name"
isQuote <- filterStr "is_quote_status"
case isQuote of
"false" -> do
rts <- read <$> filterStr "retweet_count"
faves <- read <$> filterStr "favorite_count"
pure (TweetEntity text name screenName' id Nothing rts faves)
"true" -> do
quoted <- parseQuoted
rts <- read <$> filterStr "retweet_count"
faves <- read <$> filterStr "favorite_count"
pure $ TweetEntity text name screenName' id quoted rts faves
parseQuoted :: Parser (Maybe TweetEntity)
parseQuoted = do
optional (string ",\"quoted_status_id" >> filterStr "quoted_status_id_str")
contents <- optional $ string "\",\"quoted_status"
case contents of
(Just contents) -> pure <$> getData
_ -> pure Nothing
skipInsideBrackets :: Parser ()
skipInsideBrackets = void (between (char '[') (char ']') $ many (skipInsideBrackets <|> void (noneOf ("[]" :: String))))
skipMentions :: Parser ()
skipMentions = do
many $ try $ anyChar >> notFollowedBy (string "\"user_mentions\":")
char ','
string "\"user_mentions\":"
skipInsideBrackets
pure ()
filterStr :: String -> Parser String
filterStr str = do
many $ try $ anyChar >> notFollowedBy (string ("\"" <> str <> "\":"))
char ','
filterTag str
filterTag :: String -> Parser String
filterTag str = do
string $ "\"" <> str <> "\":"
open <- optional $ char '\"'
let forbidden = if isJust open then ("\\\"" :: String) else ("\\\"," :: String)
want <- many $ parseHTMLChar <|> noneOf forbidden <|> specialChar '\"' <|> specialChar '/' <|> newlineChar <|> emojiChar <|> unicodeChar
pure want
newlineChar :: Parser Char
newlineChar = do
string "\\n"
pure '\n'
unicodeChar :: Parser Char
unicodeChar = do
string "\\u"
num <- fromHex . filterEmoji . BS.pack . map (fromIntegral . fromEnum) <$> count 4 anyChar
pure . toEnum . fromIntegral $ num
emojiChar :: Parser Char
emojiChar = do
string "\\ud"
str1 <- count 3 anyChar
str2 <- string "\\ud" >> count 3 anyChar
let num = decodeUtf16 $ "d" <> str1 <> "d" <> str2
pure . head $ num
decodeUtf16 = T.unpack . TE.decodeUtf16BE . BS.concat . go
where
go [] = []
go (a:b:c:d:rest) = let sym = convert16 [a,b] [c,d] in sym : go rest
convert16 x y = BS.pack [(read . ("0x"<>)) x, (read . ("0x"<>)) y]
filterEmoji str = if BS.head str == (fromIntegral . fromEnum $ 'd') then "FFFD" else str
parseHTMLChar :: Parser Char
parseHTMLChar = do
char '&'
innards <- many $ noneOf (";" :: String)
char ';'
pure . (\case
(Just a) -> a
Nothing -> '?') $ M.lookup innards (M.fromList [("amp",'&'),("gt",'>'),("lt",'<'),("quot",'"'),("euro",'€'),("ndash",'–'),("mdash",'—')])
specialChar :: Char -> Parser Char
specialChar c = do
string $ "\\" ++ pure c
pure c
fromHex :: BS.ByteString -> Integer
fromHex = fromRight . (parse (L.hexadecimal :: Parser Integer) "")
where fromRight (Right a) = a
fromRight (Left x) = error (show x)