-- | Parsers/Thread.hs -- A module which contain functions for split thread on ports and -- parse it all together. module Parsers.Thread ( ThreadSettings(..), parseThread, parsePosts ) where import Parsers.Post import Data.Maybe import Text.Regex.Posix import Text.HTML.TagSoup import Codec.Binary.UTF8.String import Data.List.Utils -- | Settings for post parser. data ThreadSettings = ThreadSettings { sThreadSplitter :: String , sAfterSplit :: Bool } -- | Post with number. type NPost = (Integer, Post) -- | Parse thread on main page. parseThread :: ThreadSettings -> PostSettings -> Integer -> String -> [NPost] parseThread tSets pSets lastPost body = let threads = split (sThreadSplitter tSets) body p [] = [] p [number] | lastPost < number = [number] p _ = [] in if length threads < 2 then [] else let thread = if sAfterSplit tSets then head $ tail threads else head threads in universalParser p pSets thread -- | Parse full thread. parsePosts :: PostSettings -> Integer -> String -> [NPost] parsePosts pSets lastPost body = let p [] = [] p numbers | lastPost == 0 = [last numbers] p numbers = reverse $ take 10 $ reverse $ dropWhile (<=lastPost) numbers in universalParser p pSets body -- | Universal parser. universalParser :: ([Integer] -> [Integer]) -> PostSettings -> String -> [NPost] universalParser p pSets body = let chunks = split (sPostSplitter pSets) body nposts' = map (\chunk -> (parseNumber chunk, chunk)) chunks needed = p $ map fst nposts' parseNumber = read . filter (`elem` ['0'..'9']) . flip (=~) (sPostNumberTag pSets) parseNPost (number, chunk) = if number `elem` needed then Just (number, parsePost pSets $ parseTags $ decodeString chunk) else Nothing in catMaybes $ map parseNPost nposts'