-- | Parsers/Post.hs -- A module which contain functions for parse and show thread post. module Parsers.Post ( Post(..), PostSettings(..), parsePost, showPost ) where import Config import Text.HTML.TagSoup import Data.List.Utils import Data.List -- | Post structure. data Post = Post { postTitle :: Maybe String , postAuthor :: Maybe String , postEmail :: Maybe String , postTrip :: Maybe String , postTime :: String , postImg :: Maybe Img , postBody :: String } deriving Show -- | Img structure. data Img = Img { imgProperties :: String , imgURL :: String , thumbURL :: String } deriving Show -- | Settings for post parser. data PostSettings = PostSettings { sPostSplitter :: String , sPostNumberTag :: String , sTitleTag :: (String, String) , sAuthorTag :: (String, String) , sTripTag :: String , sTimeCloseTag :: String , sPropertiesTag :: String , sPropertiesCloseTag :: String , sImgTag :: String , sThumbTag :: String , sBodyWalkAll :: Bool , sBodyTag :: String , sBodyCloseTag :: String } -- | Parse entire post. parsePost :: PostSettings -> [Tag] -> Post parsePost s tags = fst $ parseTitle Post tags >== parseAuthorAndEmail >== parseTrip >== parseTime >== parseImg >== parseBody where (f, rest) >== f2 = f2 f rest ----------------------------------------------- parseTitle f tags = let r = dropWhile (~/= (fst $ sTitleTag s)) tags rest = if null r then dropWhile (~/= (snd $ sTitleTag s)) tags else r (title, newRest) = if null rest then (Nothing, tags) else let tag = head $ tail rest in if isTagText tag then (Just $ fromTagText tag, rest) else (Nothing, tags) in (f title, newRest) ----------------------------------------------- parseAuthorAndEmail f tags = let r = dropWhile (~/= (fst $ sAuthorTag s)) tags rest = if null r then dropWhile (~/= (snd $ sAuthorTag s)) tags else r (author, email, newRest) = if null rest then (Nothing, Nothing, tags) else let tag = head $ tail rest isHasEmail = tag ~== "" email = if isHasEmail then Just $ fromAttrib "href" tag else Nothing authorTag = if isHasEmail then last $ take 3 rest else tag author = if isTagText authorTag then Just $ fromTagText authorTag else Nothing in (author, email, rest) in (f author email, newRest) ----------------------------------------------- parseTrip f tags = let rest = dropWhile (~/= sTripTag s) tags (trip, newRest) = if null rest then (Nothing, tags) else let tag = head $ tail rest in if isTagText tag then (Just $ fromTagText tag, rest) else (Nothing, rest) in (f trip, newRest) ----------------------------------------------- -- required parseTime f tags = let (t, rest) = break (~== sTimeCloseTag s) tags time = fromTagText $ last t in (f time, rest) ----------------------------------------------- parseImg f t = let r = dropWhile (~/= sPropertiesTag s) t rest = if null r then dropWhile (~/= sPropertiesTag s) tags else r (img, newRest) = if null rest then (Nothing, t) else let (properties, rest') = break (~== sPropertiesCloseTag s) rest imgU = fromAttrib "href" $ head $ dropWhile (~/= sImgTag s) rest' thumbU = fromAttrib "src" $ head $ dropWhile (~/= sThumbTag s) rest' in (Just $ Img { imgProperties = innerText $ properties , imgURL = imgU , thumbURL = thumbU }, rest') in (f img, newRest) ----------------------------------------------- -- required parseBody f tags = let bodyTags = if sBodyWalkAll s then reverse $ dropWhile (~/= sBodyCloseTag s) $ reverse $ dropWhile (~/= sBodyTag s) tags else takeWhile (~/= sBodyCloseTag s) $ dropWhile (~/= sBodyTag s) tags body = innerText $ map parseWakabaMark bodyTags in (f body, []) where -- FIXME: parse it correctly!!! parseWakabaMark tag | tag ~== "
" = TagText "\n" parseWakabaMark tag | tag ~== "

" = TagText "\n" parseWakabaMark tag | tag ~== "

" = TagText "\n" parseWakabaMark tag = tag -- | Show post. showPost :: Chan -> (Integer, Post) -> String showPost chan (n, p) = let title = maybe "" id $ postTitle p author = maybe "" id $ postAuthor p email = maybe "" id $ postEmail p trip = maybe "" id $ postTrip p time = postTime p number = show n (properties, imgU, thumbU) = maybe ("", "", "") (\i -> (imgProperties i ,fixURL (imgURL i) ,fixURL (thumbURL i))) $ postImg p body = postBody p -- formating format = "_<author>_<trip>_<email>_<time>_#<number>|\ \<img>|\ \<body>" formating = [ ("<body>", body) , ("<title>", title) , ("<author>", author) , ("<email>", email) , ("<trip>", trip) , ("<time>", time) , ("<number>", number) , ("<size>", properties) , ("<img>", imgU) , ("<thumb>", thumbU) , ("|", "\n") , ("_", " ") ] in foldr (\(f, r) s -> replace f r s) format formating where fixURL u = if "http://" `isPrefixOf` u then u else (curl chan)++u