{-# OPTIONS_GHC -fno-warn-unused-do-bind #-}
{-# LANGUAGE OverloadedStrings #-}

-- | Module containing parsers for tweet and response data.
module Web.Tweet.Parser ( parseTweet
                        , getData ) where

import Control.Composition ((.*))
import qualified Data.ByteString as BS
import Text.Megaparsec.Byte
import Text.Megaparsec.Byte.Lexer as L
import Text.Megaparsec
import Web.Tweet.Types
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Control.Monad
import Data.Maybe
import Data.Void

type Parser = Parsec Void String

-- | Parse some number of tweets
parseTweet :: Parser Timeline
parseTweet = many (try getData <|> (const (TweetEntity "" "" "" 0 mempty Nothing 0 0) <$> eof))

-- | Parse a single tweet's: n, text, fave count, retweet count
getData :: Parser TweetEntity
getData = do
    idNum <- read <$> filterStr "id"
    t <- filterStr "text"
    skipMentions
    n <- filterStr "name"
    screenn' <- filterStr "screen_name"
    --withheldCountries <- (catMaybes . sequence) <$> optional filterList
    let withheldCountries = mempty
    --let toBlock = "DE" `elem` (catMaybes (sequence bannedList))
    isQuote <- filterStr "is_quote_status"
    case isQuote of
        "false" -> do
            rts <- read <$> filterStr "retweet_count"
            faves <- read <$> filterStr "favorite_count"
            pure (TweetEntity t n screenn' idNum withheldCountries Nothing rts faves)
        "true" -> do
            q <- parseQuoted
            rts <- read <$> filterStr "retweet_count"
            faves <- read <$> filterStr "favorite_count"
            pure $ TweetEntity t n screenn' idNum withheldCountries q rts faves
        _ -> error "is_quote_status must have a value of \"true\" or \"false\""

-- | Parse a the quoted tweet
parseQuoted :: Parser (Maybe TweetEntity)
parseQuoted = do
    optional (string ",\"quoted_status_id" >> filterStr "quoted_status_id_str") -- FIXME it's skipping too many? prob is when two are deleted in a row twitter just dives in to RTs
    contents <- optional $ string "\",\"quoted_status"
    case contents of
        (Just _) -> pure <$> getData
        _ -> pure Nothing

-- | Skip a set of square brackets []
skipInsideBrackets :: Parser ()
skipInsideBrackets = void (between (single '[') (single ']') $ many (skipInsideBrackets <|> void (noneOf ("[]" :: String))))

-- | Skip user mentions field to avoid parsing the wrong n
skipMentions :: Parser ()
skipMentions = do
    many $ try $ anySingle >> notFollowedBy (string "\"user_mentions\":")
    string ",\"user_mentions\":"
    skipInsideBrackets

-- | Throw out input until we get to a relevant tag.
filterStr :: String -> Parser String
filterStr str = do
    many $ try $ anySingle >> notFollowedBy (string ("\"" <> str <> "\":"))
    single ','
    filterTag str

-- | Parse a field given its tag
filterTag :: String -> Parser String
filterTag str = do
    string $ "\"" <> str <> "\":"
    open <- optional $ single '\"'
    let forbidden = if isJust open then ("\\\"" :: String) else ("\\\"," :: String)
    many $ parseHTMLChar <|> noneOf forbidden <|> specialChar '\"' <|> specialChar '/' <|> newlineChar <|> emojiChar <|> unicodeChar -- TODO modify parsec to make this parallel?

-- | Parse a newline
newlineChar :: Parser Char
newlineChar = string "\\n" >> pure '\n'

-- | Parser for unicode; twitter will give us something like "/u320a"
unicodeChar :: Parser Char
unicodeChar = toEnum . fromIntegral . f <$> go
    where go = string "\\u" >> count 4 anySingle
          f = fromHex . filterEmoji . BS.pack . fmap (fromIntegral . fromEnum)

emojiChar :: Parser Char
emojiChar = go a
    where a = string "\\ud" >> count 3 anySingle
          go = (<*>) =<< (((T.head . decodeUtf16) .* ((<>) . (<> "d") . ("d" <>))) <$>)

decodeUtf16 :: String -> T.Text
decodeUtf16 = TE.decodeUtf16BE . BS.concat . go
    where
        go []             = []
        go (a:b:c:d:rest) = let sym = convert16 [a,b] [c,d] in sym : go rest
        go _ = error "Internal error: decodeUtf16 failed."
        convert16 x y = BS.pack [(read . ("0x"<>)) x, (read . ("0x"<>)) y]

-- | helper function to ignore emoji
filterEmoji :: BS.ByteString -> BS.ByteString
filterEmoji str = if BS.head str == (fromIntegral . fromEnum $ 'd') then "FFFD" else str

-- | Parse HTML chars
parseHTMLChar :: Parser Char
parseHTMLChar = do
    single '&'
    innards <- many $ anySingleBut ';'
    single ';'
    pure . (\case
        (Just a) -> a
        Nothing -> '?') $ M.lookup innards (M.fromList [("amp",'&'),("gt",'>'),("lt",'<'),("quot",'"'),("euro",'€'),("ndash",'–'),("mdash",'—')])

-- | Parse escaped characters
specialChar :: Char -> Parser Char
specialChar c = string ("\\" <> pure c) >> pure c

-- | Convert a string of four hexadecimal digits to an integer.
fromHex :: BS.ByteString -> Integer
fromHex = fromRight . parseMaybe (L.hexadecimal :: Parsec Void BS.ByteString Integer)
    where fromRight (Just a) = a
          fromRight _        = error "failed to parse hex"