{-# LANGUAGE ScopedTypeVariables #-} -- Programmatic interface to epguides.com information. module Epguide ( fetchTitles , fetchTitlesFromFile , fetchTitles' ) where import Utils import Types import Config import Data.Char import Data.List import Data.Maybe import Text.XML.HaXml import Text.XML.HaXml.Xtract.Parse import Control.Exception import Control.Monad import qualified Data.ByteString.Char8 as BS import Text.ParserCombinators.ReadP fetchTitles :: Config -> String -- ^Series -> IO [(LocalEpisodeIndex,String)] fetchTitles cfg series = do let url = mkGoogleUrl "epguides.com" series mbDoc <- try $ downloadAsXML cfg url case mbDoc of Left (err::SomeException) -> return [] Right doc -> fetchTitles' doc fetchTitlesFromFile :: Config -> FilePath -> IO [(LocalEpisodeIndex,String)] fetchTitlesFromFile cfg inp = do html <- BS.readFile inp xml <- htmlToXml cfg html fetchTitles' (xmlParse inp (BS.unpack xml)) fetchTitles' :: Document -- ^ Input -> IO [(LocalEpisodeIndex,String)] fetchTitles' (Document _ _ elt _) = do let Elem _ _ cs = xmlUnEscape stdXmlEscaper elt cs' = concatMap (xtract "//pre") $ cs titles = [ (index,title) | CElem (Elem "pre" _ cs'') <- cs' , [CString _ l, CElem (Elem "a" attrs titleParts)] <- split 2 cs'' -- FIXME: support escape codes like á , let title = concat [ t | CString _ t <- titleParts ] , index <- parseEpNum l , Just (AttValue [Left str]) <- [lookup "href" attrs] , "summary.html" `isSuffixOf` str , not ("http://www.tv.com/show/" `isPrefixOf` str) , title `notElem` ["TBD","TBA","TBW"]] return titles parseEpNum :: String -> [LocalEpisodeIndex] parseEpNum inp = fromMaybe [] $ flip fromReadP inp $ do many anyChar satisfy (not.isDigit) s <- fmap read $ many1 digit char '-' skipSpaces e <- fmap read $ many1 digit satisfy (not.isDigit) guard (s > 0 && e > 0) many anyChar date <- option [] $ do satisfy (not.isDigit) day <- fmap read $ many1 digit skipSpaces month <- getMonth skipSpaces year <- fmap read $ many1 digit return [DateIdx (year+2000) month day] return $ [EpIdx s e] ++ date getMonth = choice $ map (\(n,str) -> string str >> return n) $ zip [1..] [ "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" ] digit :: ReadP Char digit = satisfy isDigit anyChar :: ReadP Char anyChar = satisfy (const True) fromReadP :: ReadP a -> String -> Maybe a fromReadP p inp = case map fst $ readP_to_S p inp of [] -> Nothing lst -> Just (last lst) split n lst = worker n [] lst where worker 0 acc lst = reverse acc:worker n [] lst worker _ acc [] = [reverse acc] worker n acc (x:xs) = worker (n-1) (x:acc) xs