{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UnicodeSyntax #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# OPTIONS_HADDOCK show-extensions #-}
-- |
-- Module : HBooru.Types
-- Copyright : (c) Mateusz Kowalczyk 2013-2014
-- License : GPL-3
--
-- Maintainer : fuuzetsu@fuuzetsu.co.uk
-- Stability : experimental
--
-- Module definining types used by the library.
module HBooru.Types where
import Control.Arrow
import Control.Applicative
import Control.Monad
import Control.Exception
import Control.Monad.Error
import Data.Proxy
import GHC.TypeLits (Symbol)
import Data.Vinyl
import Data.Vinyl.TH
import Network.HTTP.Conduit (HttpException(..))
import Prelude
import Text.XML.HXT.Core hiding (mkName, (<+>))
-- | Tags used for searching in sites. No special escaping is done.
-- Note that many sites would treat a tag like \"striped panties\"
-- as two separate tags and you wouldn't get the results you were after.
type Tag = String
-- | Data format used by various 'Site's. See instances for currently used
-- formats.
class DataFormat a where
-- | Used as one of the data formats.
data XML = XML deriving Show
-- | Used as one of the data formats.
data JSON = JSON deriving Show
instance DataFormat XML where
instance DataFormat JSON where
-- | Thanks to this class, we're able to provide instances converting
-- from a 'DataFormat' to 'Response'. This is useful if we need a 'DataFormat'
-- while we only have a type that's an instance of 'Response'. Note that the
-- functional dependency currently requires that there is only one way to coerce
-- between two types.
class Response r ⇒ CoerceResponse x r | x → r, r → x where
-- | Given something and a 'String', we get the appropriate 'Response'.
-- For example with @instance 'CoerceResponse' 'XML' 'XMLResponse'@:
--
-- >>> toResponse XML ""
-- XMLReponse ""
toResponse ∷ x → String → r
-- | Given some kind of 'Response', we get the appropriate value back,
-- depending on the class instance.
-- For example with @instance 'CoerceResponse' 'XML' 'XMLResponse'@:
--
-- >>> fromResponse $ XMLReponse ""
-- XML
fromResponse ∷ r → x
instance CoerceResponse XML XMLResponse where
toResponse _ = XMLResponse
fromResponse _ = XML
instance CoerceResponse JSON JSONResponse where
toResponse _ = JSONResponse
fromResponse _ = JSON
-- | Class specifying a parser that can fetch posts. A post usually
-- consists of links to the image, samples, and some meta-data. The
-- reason for this class is that sometimes we might get different
-- information based on the 'DataFormat' we use so we use type
-- families to denote this rather than forcing the library user to
-- make do with our best guess on what goes into the post. It also
-- allows us to use different post types for sites that provide
-- different information.
class (Site s, DataFormat r) ⇒ PostParser s r where
type ImageTy s r
-- | Given a parser working with 'DataFormat' specified by an instance of
-- this class, we require through 'CoerceResponse' that it is able to parse
-- responses in the format so what we actually pass into this function is
-- the 'Site' this parser works with (so that we can pick the appropriate data
-- type for the posts) and a 'Response' matching the 'DataFormat' (through a
-- class instance). For @PostParser 'Gelbooru' 'XML'@ instance, example use
-- might go like
--
-- @
-- do fc \<- 'XMLResponse' <$> 'readFile' \"gelbooruResponse.xml\"
-- -- the type of images is actually inferred for us
-- let images ∷ ['HBooru.Parsers.Gelbooru.GelbooruPost']
-- images = parseResponse 'HBooru.Parsers.Gelbooru.Gelbooru' fc
-- return images
-- @
--
-- The cool thing is that we can't feed anything but 'XMLResponse' to an
-- XML parser.
parseResponse ∷ CoerceResponse r r' ⇒ s → r' → [ImageTy s r]
-- | Describes whether a response from a 'Site' in given 'DataFormat'
-- allows us to get the information about total number of posts matching our
-- query. Some sites don't provide this information.
class (Site s, DataFormat r) ⇒ Counted s r where
-- | Parses out the number of available images from a response.
parseCount ∷ CoerceResponse r r' ⇒ s → r' → Integer
class (Counted s r, Postable s r) ⇒ PostablePaged s r where
-- | Similar to 'postUrl' but requests images from specific page if
-- the site allows it.
postUrlPaged ∷ s → r → [Tag] → Integer → String
postUrlPaged s r ts i = postUrl s r ts ++ "&pid=" ++ show i
-- | If we can make an API request to 'Site' in a specific 'DataFormat', we can
-- use instances of this class to pass in
class PostParser s r ⇒ Postable s r where
-- | Given a 'Site', a 'DataFormat' and a list of 'Tag's, an instance of this
-- class should be able to return a 'String' at which we can find data in
-- 'DataFormat' format that honours our tags. This is effectively a URL
-- builder for POST requests.
postUrl ∷ s → r → [Tag] → String
-- | Provides information about whether there's a hard limit on the amount of
-- posts we can fetch from the site at once. The reason for this function here
-- rather than in 'Site' is that we might be parsing data without an API we
-- can post to at all and we're getting our data through other means.
hardLimit ∷ s → r → Limit
-- | Describes a site for a parser. The reason why this isn't a simple data type
-- is to allow us to write additional parsers in the future without modifying
-- this library if we wish to do so.
class Site s where
-- | Rating used on *booru sites.
data Rating = Safe | Questionable | Explicit deriving (Show, Eq)
-- | Denotes whethere there's a hard limit on the number of posts
-- we can fetch at a time from a site. NoLimit implies that we can fetch
-- everything at once and not that we don't know. See 'Counted' for a way to
-- potentially retrieve number of posts present on the site.
data Limit = NoLimit | Limit Integer deriving (Show, Eq)
-- | One of the formats we can receive responses from sites in. For things
-- like parsers parametrisation, use 'XML' instead and use methods in
-- 'CoerceResponse' if you need to.
data XMLResponse = XMLResponse String deriving Show
-- | One of the formats we can receive responses from sites in. For things
-- like parsers parametrisation, use 'JSON' instead and use methods in
-- 'CoerceResponse' if you need to.
data JSONResponse = JSONResponse String deriving Show
-- | Specifies what is considered a response. You'll almost certainly also
-- want new 'DataFormat' and 'CoerceResponse' instances if you're adding some
-- here. This class assumes that all responses carry the response in a string we
-- can extract. Note that this is not for use as network response if you're
-- scraping, only for putting data into after you have done all the error
-- checking and whatnot.
class Response r where
-- | Extract the response string.
getResponse ∷ r → String
instance Response XMLResponse where
getResponse (XMLResponse x) = x
instance Response JSONResponse where
getResponse (JSONResponse x) = x
instance Functor (LA XmlTree) where
fmap f (LA g) = LA $ fmap fmap fmap f g
bA ∷ ArrowApply cat ⇒ cat c' b → (b → cat c' c) → cat c' c
bA mx f = (arr (\a -> mx >>> arr (\x -> (f x, a)) >>> app) &&& arr id) >>> app
instance Applicative (LA XmlTree) where
pure x = LA . const $ return x
(<*>) = ap
instance Monad (LA XmlTree) where
return = pure
(>>=) = bA
-- | Parse failures from various parsers
newtype ParseFailure = PF String deriving (Show, Eq)
instance Error ParseFailure where
noMsg = PF noMsg
strMsg = PF . strMsg
-- | Alias for our parser monad with failure possibility
type Parse = Either ParseFailure
data RealWorldExcs = Network HttpException
| IOE IOException
| SomethingElse String
deriving (Show)
instance Error RealWorldExcs where
noMsg = SomethingElse noMsg
strMsg = SomethingElse . strMsg
type ExcIO a = ErrorT RealWorldExcs IO a
makeUniverse' ''Symbol "ElF"
semantics ''ElF [ [t| "height" |] :~> [t| Integer |]
, [t| "score" |] :~> [t| Integer |]
, [t| "file_url" |] :~> [t| String |]
, [t| "parent_id" |] :~> [t| Maybe Integer |]
, [t| "sample_url" |] :~> [t| String |]
, [t| "sample_width" |] :~> [t| Integer |]
, [t| "sample_height" |] :~> [t| Integer |]
, [t| "preview_url" |] :~> [t| String |]
, [t| "rating" |] :~> [t| Rating |]
, [t| "tags" |] :~> [t| [Tag] |]
, [t| "id" |] :~> [t| Integer |]
, [t| "width" |] :~> [t| Integer |]
, [t| "change" |] :~> [t| Int |]
, [t| "md5" |] :~> [t| String |]
, [t| "creator_id" |] :~> [t| Integer |]
, [t| "has_children" |] :~> [t| Bool |]
, [t| "created_at" |] :~> [t| String |]
, [t| "status" |] :~> [t| String |]
, [t| "source" |] :~> [t| String |]
, [t| "has_notes" |] :~> [t| Maybe Bool |]
, [t| "has_comments" |] :~> [t| Maybe Bool |]
, [t| "preview_width" |] :~> [t| Integer |]
, [t| "preview_height" |] :~> [t| Integer |]
, [t| "author" |] :~> [t| String |]
, [t| "frames" |] :~> [t| String |]
, [t| "frames_pending" |] :~> [t| String |]
, [t| "frames_pending_string" |] :~> [t| String |]
, [t| "frames_string" |] :~> [t| String |]
, [t| "is_held" |] :~> [t| Bool |]
, [t| "is_shown_in_index" |] :~> [t| Bool |]
, [t| "jpeg_file_size" |] :~> [t| Integer |]
, [t| "jpeg_height" |] :~> [t| Integer |]
, [t| "jpeg_url" |] :~> [t| String |]
, [t| "jpeg_width" |] :~> [t| Integer |]
, [t| "sample_file_size" |] :~> [t| Integer |]
, [t| "actual_preview_height" |] :~> [t| Integer |]
, [t| "actual_preview_width" |] :~> [t| Integer |]
, [t| "file_size" |] :~> [t| Integer |]
]
-- | Handy synonym hiding 'ElF'.
type R a = PlainRec ElF a
-- | 'R' wrapped in a 'Parse'.
type PR a = Parse (R a)
-- * Commonly used fields
height ∷ Proxy "height"
height = Proxy
score ∷ Proxy "score"
score = Proxy
file_url ∷ Proxy "file_url"
file_url = Proxy
parent_id ∷ Proxy "parent_id"
parent_id = Proxy
sample_url ∷ Proxy "sample_url"
sample_url = Proxy
sample_width ∷ Proxy "sample_width"
sample_width = Proxy
sample_height ∷ Proxy "sample_height"
sample_height = Proxy
preview_url ∷ Proxy "preview_url"
preview_url = Proxy
rating ∷ Proxy "rating"
rating = Proxy
tags ∷ Proxy "tags"
tags = Proxy
id ∷ Proxy "id"
id = Proxy
width ∷ Proxy "width"
width = Proxy
change ∷ Proxy "change"
change = Proxy
md5 ∷ Proxy "md5"
md5 = Proxy
creator_id ∷ Proxy "creator_id"
creator_id = Proxy
has_children ∷ Proxy "has_children"
has_children = Proxy
created_at ∷ Proxy "created_at"
created_at = Proxy
status ∷ Proxy "status"
status = Proxy
source ∷ Proxy "source"
source = Proxy
has_notes ∷ Proxy "has_notes"
has_notes = Proxy
has_comments ∷ Proxy "has_comments"
has_comments = Proxy
preview_width ∷ Proxy "preview_width"
preview_width = Proxy
preview_height ∷ Proxy "preview_height"
preview_height = Proxy
author ∷ Proxy "author"
author = Proxy
frames ∷ Proxy "frames"
frames = Proxy
frames_pending ∷ Proxy "frames_pending"
frames_pending = Proxy
frames_pending_string ∷ Proxy "frames_pending_string"
frames_pending_string = Proxy
frames_string ∷ Proxy "frames_string"
frames_string = Proxy
is_held ∷ Proxy "is_held"
is_held = Proxy
is_shown_in_index ∷ Proxy "is_shown_in_index"
is_shown_in_index = Proxy
jpeg_file_size ∷ Proxy "jpeg_file_size"
jpeg_file_size = Proxy
jpeg_height ∷ Proxy "jpeg_height"
jpeg_height = Proxy
jpeg_url ∷ Proxy "jpeg_url"
jpeg_url = Proxy
jpeg_width ∷ Proxy "jpeg_width"
jpeg_width = Proxy
sample_file_size ∷ Proxy "sample_file_size"
sample_file_size = Proxy
actual_preview_height ∷ Proxy "actual_preview_height"
actual_preview_height = Proxy
actual_preview_width ∷ Proxy "actual_preview_width"
actual_preview_width = Proxy
file_size ∷ Proxy "file_size"
file_size = Proxy