module Network.Readability.Parser
( ParserToken(..)
, Article(..)
, parse
, parseByUrl
, parseById
, ArticleStatus(..)
, Status(..)
, getContentStatus
, getContentStatusByUrl
, getContentStatusById
, Confidence(..)
, getConfidence
) where
import Control.Applicative ((<$>))
import qualified Data.ByteString.Char8 as BS
import Data.Text (Text)
import Data.Aeson (FromJSON, eitherDecode)
import Data.Aeson.TH (deriveFromJSON, defaultOptions, fieldLabelModifier)
import Network.HTTP.Conduit (method, parseUrl, responseBody, responseHeaders, setQueryString, withManager, httpLbs)
newtype ParserToken = ParserToken BS.ByteString deriving (Eq, Show)
data Article = Article
{ content :: Maybe Text
, domain :: Maybe Text
, author :: Maybe Text
, url :: Text
, short_url :: Maybe Text
, title :: Maybe Text
, excerpt :: Maybe Text
, direction :: Maybe Text
, word_count :: Integer
, total_pages :: Maybe Integer
, date_published :: Maybe Text
, dek :: Maybe Text
, lead_image_url :: Maybe Text
, next_page_id :: Maybe Text
, rendered_pages :: Maybe Int
} deriving (Show, Eq)
$(deriveFromJSON defaultOptions ''Article)
data Confidence = Confidence
{ conf_url :: Text
, conf_confidence :: Double
} deriving (Show, Eq)
$(deriveFromJSON defaultOptions{ fieldLabelModifier = drop (length ("conf_" :: String)) } ''Confidence)
data ArticleStatus = ArticleStatus
{ as_article_id :: BS.ByteString
, as_article_status :: Status
} deriving (Show, Eq)
data Status
= Invalid
| Unretrieved
| ProvidedByUser
| ValidatedByUsers
| Fetched
deriving (Show, Eq)
apiPrefix :: String
apiPrefix = "https://readability.com/api/content/v1"
parseByUrl :: ParserToken -> BS.ByteString -> Maybe Int -> IO (Either String Article)
parseByUrl token articleUrl maximumPages = parse token (Just articleUrl) Nothing maximumPages
parseById :: ParserToken -> BS.ByteString -> Maybe Int -> IO (Either String Article)
parseById token articleId maximumPages = parse token Nothing (Just articleId) maximumPages
parse :: ParserToken
-> Maybe BS.ByteString
-> Maybe BS.ByteString
-> Maybe Int
-> IO (Either String Article)
parse (ParserToken token) articleUrl articleId maximumPages = readabilityRequest "/parser" params
where
params =
[ ("token", Just token)
]
++ maybeParam "url" articleUrl
++ maybeParam "id" articleId
++ maybeShowParam "max_pages" maximumPages
getContentStatusByUrl :: ParserToken -> BS.ByteString -> IO (Maybe ArticleStatus)
getContentStatusByUrl token articleUrl = getContentStatus token (Just articleUrl) Nothing
getContentStatusById :: ParserToken -> BS.ByteString -> IO (Maybe ArticleStatus)
getContentStatusById token articleId = getContentStatus token Nothing (Just articleId)
getContentStatus :: ParserToken
-> Maybe BS.ByteString
-> Maybe BS.ByteString
-> IO (Maybe ArticleStatus)
getContentStatus (ParserToken token) articleUrl articleId = contentStatusRequest params
where
params =
[ ("token", Just token)
]
++ maybeParam "url" articleUrl
++ maybeParam "id" articleId
contentStatusRequest :: [(BS.ByteString, Maybe BS.ByteString)] -> IO (Maybe ArticleStatus)
contentStatusRequest params = do
query <- setQueryString params <$> parseUrl (apiPrefix ++ "/parser")
response <- withManager $ httpLbs query{ method = "HEAD" }
let headers = responseHeaders response
return $ do
article_id <- lookup "X-Article-Id" headers
article_status <- parseStatus =<< lookup "X-Article-Status" headers
return $ ArticleStatus article_id article_status
getConfidence :: BS.ByteString
-> IO (Either String Confidence)
getConfidence article_url = readabilityRequest "/confidence" [ ("url", Just article_url) ]
readabilityRequest :: FromJSON a => String -> [(BS.ByteString, Maybe BS.ByteString)] -> IO (Either String a)
readabilityRequest api params = do
request <- setQueryString params <$> parseUrl (apiPrefix ++ api)
response <- withManager $ httpLbs request
return $ eitherDecode $ responseBody response
maybeShowParam :: (Show a) => BS.ByteString -> Maybe a -> [(BS.ByteString, Maybe BS.ByteString)]
maybeShowParam name = maybeParam name . Just . BS.pack . show
maybeParam :: BS.ByteString -> Maybe BS.ByteString -> [(BS.ByteString, Maybe BS.ByteString)]
maybeParam name = maybe [] $ \x -> [(name, Just x)]
parseStatus :: BS.ByteString -> Maybe Status
parseStatus "INVALID" = Just Invalid
parseStatus "UNRETRIEVED" = Just Unretrieved
parseStatus "PROVIDED_BY_USER" = Just ProvidedByUser
parseStatus "VALIDATED_BY_USERS" = Just ValidatedByUsers
parseStatus "FETCHED" = Just Fetched
parseStatus _ = Nothing