----------------------------------------------------------------------------- -- | -- Module : Data.Classify.Television -- Copyright : PublicDomain -- -- Maintainer : lemmih@gmail.com -- Portability : portable -- -- This module defines a parser for television media files commonly found in -- filesharing communities. The internals of the parser are non-deterministic -- and heuristics are used to select the interpretation most likely to be correct. -- module Data.Classify.Television ( -- * Types Name, Version(..), Title, -- * Classify tryClassify, classify ) where import Data.Classify.DataTypes ( Element(..), Name, Version(..), Title ) import Data.Classify.Parser ( run, parseElement ) import Data.Classify.Rank ( sortByRank ) import qualified Data.Map as Map import Data.Char ( isAlphaNum, isSpace, toLower ) import Data.List ( inits, tails, isPrefixOf ) tryClassify :: [Name] -> String -> Maybe (Name, Version, Title) tryClassify knownNames inp = case sortByRank possibleParses of [] -> Nothing sorted -> let elt = last sorted in Just (name elt, version elt, title elt) where idx = Map.fromList [ (src, name) | name <- knownNames, src <- stripName name ] possibleParses = run (parseElement idx) inp classify :: [Name] -> String -> (Name, Version, Title) classify knownNames inp = case tryClassify knownNames inp of Nothing -> error $ "Data.Classify.Television.classify: Failed to parse: " ++ show inp Just result -> result stripName :: String -> [String] stripName = worker . filter isAlphaNum . filter (not . isSpace) . map toLower where alts = [ ("the", "") , ("newyork", "ny") , ("los angeles", "la") ] doesMatch key [] = Nothing doesMatch key (x:xs) | fst x `isPrefixOf` key = Just x | otherwise = doesMatch key xs expand lst = lst : concatMap expand [ before ++ new ++ drop (length old) after | (before, after) <- split lst , Just (old, new) <- [doesMatch after alts] ] worker lst = expand lst split :: String -> [(String,String)] split str = zip (inits str) (tails str)