{-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE FunctionalDependencies #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE UnicodeSyntax #-} {-# OPTIONS_GHC -fno-warn-orphans #-} {-# OPTIONS_HADDOCK show-extensions #-} -- | -- Module : HBooru.Types -- Copyright : (c) Mateusz Kowalczyk 2013-2014 -- License : GPL-3 -- -- Maintainer : fuuzetsu@fuuzetsu.co.uk -- Stability : experimental -- -- Module definining types used by the library. module HBooru.Types where import Control.Arrow import Control.Applicative import Control.Monad import Control.Exception import Control.Monad.Error import Data.Proxy import GHC.TypeLits (Symbol) import Data.Vinyl import Data.Vinyl.TH import Network.HTTP.Conduit (HttpException(..)) import Prelude import Text.XML.HXT.Core hiding (mkName, (<+>)) -- | Tags used for searching in sites. No special escaping is done. -- Note that many sites would treat a tag like \"striped panties\" -- as two separate tags and you wouldn't get the results you were after. type Tag = String -- | Data format used by various 'Site's. See instances for currently used -- formats. class DataFormat a where -- | Used as one of the data formats. data XML = XML deriving Show -- | Used as one of the data formats. data JSON = JSON deriving Show instance DataFormat XML where instance DataFormat JSON where -- | Thanks to this class, we're able to provide instances converting -- from a 'DataFormat' to 'Response'. This is useful if we need a 'DataFormat' -- while we only have a type that's an instance of 'Response'. Note that the -- functional dependency currently requires that there is only one way to coerce -- between two types. class Response r ⇒ CoerceResponse x r | x → r, r → x where -- | Given something and a 'String', we get the appropriate 'Response'. -- For example with @instance 'CoerceResponse' 'XML' 'XMLResponse'@: -- -- >>> toResponse XML "" -- XMLReponse "" toResponse ∷ x → String → r -- | Given some kind of 'Response', we get the appropriate value back, -- depending on the class instance. -- For example with @instance 'CoerceResponse' 'XML' 'XMLResponse'@: -- -- >>> fromResponse $ XMLReponse "" -- XML fromResponse ∷ r → x instance CoerceResponse XML XMLResponse where toResponse _ = XMLResponse fromResponse _ = XML instance CoerceResponse JSON JSONResponse where toResponse _ = JSONResponse fromResponse _ = JSON -- | Class specifying a parser that can fetch posts. A post usually -- consists of links to the image, samples, and some meta-data. The -- reason for this class is that sometimes we might get different -- information based on the 'DataFormat' we use so we use type -- families to denote this rather than forcing the library user to -- make do with our best guess on what goes into the post. It also -- allows us to use different post types for sites that provide -- different information. class (Site s, DataFormat r) ⇒ PostParser s r where type ImageTy s r -- | Given a parser working with 'DataFormat' specified by an instance of -- this class, we require through 'CoerceResponse' that it is able to parse -- responses in the format so what we actually pass into this function is -- the 'Site' this parser works with (so that we can pick the appropriate data -- type for the posts) and a 'Response' matching the 'DataFormat' (through a -- class instance). For @PostParser 'Gelbooru' 'XML'@ instance, example use -- might go like -- -- @ -- do fc \<- 'XMLResponse' <$> 'readFile' \"gelbooruResponse.xml\" -- -- the type of images is actually inferred for us -- let images ∷ ['HBooru.Parsers.Gelbooru.GelbooruPost'] -- images = parseResponse 'HBooru.Parsers.Gelbooru.Gelbooru' fc -- return images -- @ -- -- The cool thing is that we can't feed anything but 'XMLResponse' to an -- XML parser. parseResponse ∷ CoerceResponse r r' ⇒ s → r' → [ImageTy s r] -- | Describes whether a response from a 'Site' in given 'DataFormat' -- allows us to get the information about total number of posts matching our -- query. Some sites don't provide this information. class (Site s, DataFormat r) ⇒ Counted s r where -- | Parses out the number of available images from a response. parseCount ∷ CoerceResponse r r' ⇒ s → r' → Integer class (Counted s r, Postable s r) ⇒ PostablePaged s r where -- | Similar to 'postUrl' but requests images from specific page if -- the site allows it. postUrlPaged ∷ s → r → [Tag] → Integer → String postUrlPaged s r ts i = postUrl s r ts ++ "&pid=" ++ show i -- | If we can make an API request to 'Site' in a specific 'DataFormat', we can -- use instances of this class to pass in class PostParser s r ⇒ Postable s r where -- | Given a 'Site', a 'DataFormat' and a list of 'Tag's, an instance of this -- class should be able to return a 'String' at which we can find data in -- 'DataFormat' format that honours our tags. This is effectively a URL -- builder for POST requests. postUrl ∷ s → r → [Tag] → String -- | Provides information about whether there's a hard limit on the amount of -- posts we can fetch from the site at once. The reason for this function here -- rather than in 'Site' is that we might be parsing data without an API we -- can post to at all and we're getting our data through other means. hardLimit ∷ s → r → Limit -- | Describes a site for a parser. The reason why this isn't a simple data type -- is to allow us to write additional parsers in the future without modifying -- this library if we wish to do so. class Site s where -- | Rating used on *booru sites. data Rating = Safe | Questionable | Explicit deriving (Show, Eq) -- | Denotes whethere there's a hard limit on the number of posts -- we can fetch at a time from a site. NoLimit implies that we can fetch -- everything at once and not that we don't know. See 'Counted' for a way to -- potentially retrieve number of posts present on the site. data Limit = NoLimit | Limit Integer deriving (Show, Eq) -- | One of the formats we can receive responses from sites in. For things -- like parsers parametrisation, use 'XML' instead and use methods in -- 'CoerceResponse' if you need to. data XMLResponse = XMLResponse String deriving Show -- | One of the formats we can receive responses from sites in. For things -- like parsers parametrisation, use 'JSON' instead and use methods in -- 'CoerceResponse' if you need to. data JSONResponse = JSONResponse String deriving Show -- | Specifies what is considered a response. You'll almost certainly also -- want new 'DataFormat' and 'CoerceResponse' instances if you're adding some -- here. This class assumes that all responses carry the response in a string we -- can extract. Note that this is not for use as network response if you're -- scraping, only for putting data into after you have done all the error -- checking and whatnot. class Response r where -- | Extract the response string. getResponse ∷ r → String instance Response XMLResponse where getResponse (XMLResponse x) = x instance Response JSONResponse where getResponse (JSONResponse x) = x instance Functor (LA XmlTree) where fmap f (LA g) = LA $ fmap fmap fmap f g bA ∷ ArrowApply cat ⇒ cat c' b → (b → cat c' c) → cat c' c bA mx f = (arr (\a -> mx >>> arr (\x -> (f x, a)) >>> app) &&& arr id) >>> app instance Applicative (LA XmlTree) where pure x = LA . const $ return x (<*>) = ap instance Monad (LA XmlTree) where return = pure (>>=) = bA -- | Parse failures from various parsers newtype ParseFailure = PF String deriving (Show, Eq) instance Error ParseFailure where noMsg = PF noMsg strMsg = PF . strMsg -- | Alias for our parser monad with failure possibility type Parse = Either ParseFailure data RealWorldExcs = Network HttpException | IOE IOException | SomethingElse String deriving (Show) instance Error RealWorldExcs where noMsg = SomethingElse noMsg strMsg = SomethingElse . strMsg type ExcIO a = ErrorT RealWorldExcs IO a makeUniverse' ''Symbol "ElF" semantics ''ElF [ [t| "height" |] :~> [t| Integer |] , [t| "score" |] :~> [t| Integer |] , [t| "file_url" |] :~> [t| String |] , [t| "parent_id" |] :~> [t| Maybe Integer |] , [t| "sample_url" |] :~> [t| String |] , [t| "sample_width" |] :~> [t| Integer |] , [t| "sample_height" |] :~> [t| Integer |] , [t| "preview_url" |] :~> [t| String |] , [t| "rating" |] :~> [t| Rating |] , [t| "tags" |] :~> [t| [Tag] |] , [t| "id" |] :~> [t| Integer |] , [t| "width" |] :~> [t| Integer |] , [t| "change" |] :~> [t| Int |] , [t| "md5" |] :~> [t| String |] , [t| "creator_id" |] :~> [t| Integer |] , [t| "has_children" |] :~> [t| Bool |] , [t| "created_at" |] :~> [t| String |] , [t| "status" |] :~> [t| String |] , [t| "source" |] :~> [t| String |] , [t| "has_notes" |] :~> [t| Maybe Bool |] , [t| "has_comments" |] :~> [t| Maybe Bool |] , [t| "preview_width" |] :~> [t| Integer |] , [t| "preview_height" |] :~> [t| Integer |] , [t| "author" |] :~> [t| String |] , [t| "frames" |] :~> [t| String |] , [t| "frames_pending" |] :~> [t| String |] , [t| "frames_pending_string" |] :~> [t| String |] , [t| "frames_string" |] :~> [t| String |] , [t| "is_held" |] :~> [t| Bool |] , [t| "is_shown_in_index" |] :~> [t| Bool |] , [t| "jpeg_file_size" |] :~> [t| Integer |] , [t| "jpeg_height" |] :~> [t| Integer |] , [t| "jpeg_url" |] :~> [t| String |] , [t| "jpeg_width" |] :~> [t| Integer |] , [t| "sample_file_size" |] :~> [t| Integer |] , [t| "actual_preview_height" |] :~> [t| Integer |] , [t| "actual_preview_width" |] :~> [t| Integer |] , [t| "file_size" |] :~> [t| Integer |] ] -- | Handy synonym hiding 'ElF'. type R a = PlainRec ElF a -- | 'R' wrapped in a 'Parse'. type PR a = Parse (R a) -- * Commonly used fields height ∷ Proxy "height" height = Proxy score ∷ Proxy "score" score = Proxy file_url ∷ Proxy "file_url" file_url = Proxy parent_id ∷ Proxy "parent_id" parent_id = Proxy sample_url ∷ Proxy "sample_url" sample_url = Proxy sample_width ∷ Proxy "sample_width" sample_width = Proxy sample_height ∷ Proxy "sample_height" sample_height = Proxy preview_url ∷ Proxy "preview_url" preview_url = Proxy rating ∷ Proxy "rating" rating = Proxy tags ∷ Proxy "tags" tags = Proxy id ∷ Proxy "id" id = Proxy width ∷ Proxy "width" width = Proxy change ∷ Proxy "change" change = Proxy md5 ∷ Proxy "md5" md5 = Proxy creator_id ∷ Proxy "creator_id" creator_id = Proxy has_children ∷ Proxy "has_children" has_children = Proxy created_at ∷ Proxy "created_at" created_at = Proxy status ∷ Proxy "status" status = Proxy source ∷ Proxy "source" source = Proxy has_notes ∷ Proxy "has_notes" has_notes = Proxy has_comments ∷ Proxy "has_comments" has_comments = Proxy preview_width ∷ Proxy "preview_width" preview_width = Proxy preview_height ∷ Proxy "preview_height" preview_height = Proxy author ∷ Proxy "author" author = Proxy frames ∷ Proxy "frames" frames = Proxy frames_pending ∷ Proxy "frames_pending" frames_pending = Proxy frames_pending_string ∷ Proxy "frames_pending_string" frames_pending_string = Proxy frames_string ∷ Proxy "frames_string" frames_string = Proxy is_held ∷ Proxy "is_held" is_held = Proxy is_shown_in_index ∷ Proxy "is_shown_in_index" is_shown_in_index = Proxy jpeg_file_size ∷ Proxy "jpeg_file_size" jpeg_file_size = Proxy jpeg_height ∷ Proxy "jpeg_height" jpeg_height = Proxy jpeg_url ∷ Proxy "jpeg_url" jpeg_url = Proxy jpeg_width ∷ Proxy "jpeg_width" jpeg_width = Proxy sample_file_size ∷ Proxy "sample_file_size" sample_file_size = Proxy actual_preview_height ∷ Proxy "actual_preview_height" actual_preview_height = Proxy actual_preview_width ∷ Proxy "actual_preview_width" actual_preview_width = Proxy file_size ∷ Proxy "file_size" file_size = Proxy