-----------------------------------------------------------------------------
-- |
-- Module      :  Data.Classify.Television
-- Copyright   :  PublicDomain
--
-- Maintainer  :  lemmih@gmail.com
-- Portability :  portable
--
-- This module defines a parser for television media files commonly found in
-- filesharing communities. The internals of the parser are non-deterministic
-- and heuristics are used to select the interpretation most likely to be correct.
--
module Data.Classify.Television
  ( -- * Types
    Name,
    Version(..),
    Title,
    -- * Classify
    tryClassify,
    classify
  ) where

import Data.Classify.DataTypes        ( Element(..), Name, Version(..), Title )
import Data.Classify.Parser           ( run, parseElement )
import Data.Classify.Rank             ( sortByRank )

import qualified Data.Map as Map
import Data.Char                      ( isAlphaNum, isSpace, toLower )
import Data.List                      ( inits, tails, isPrefixOf )

tryClassify :: [Name] -> String -> Maybe (Name, Version, Title)
tryClassify knownNames inp =
  case sortByRank possibleParses of
    []     -> Nothing
    sorted ->
      let elt = last sorted
      in Just (name elt, version elt, title elt)
  where
    idx = Map.fromList [ (src, name) | name <- knownNames, src <- stripName name ]
    possibleParses = run (parseElement idx) inp

classify :: [Name] -> String -> (Name, Version, Title)
classify knownNames inp =
  case tryClassify knownNames inp of
    Nothing     -> error $ "Data.Classify.Television.classify: Failed to parse: " ++ show inp
    Just result -> result

stripName :: String -> [String]
stripName = worker . filter isAlphaNum . filter (not . isSpace) . map toLower
  where
    alts = [ ("the", "")
           , ("newyork", "ny")
           , ("los angeles", "la") ]
    doesMatch key [] = Nothing
    doesMatch key (x:xs)
      | fst x `isPrefixOf` key = Just x
      | otherwise              = doesMatch key xs
    expand lst = lst : concatMap expand [ before ++ new ++ drop (length old) after
                 | (before, after) <- split lst
                 , Just (old, new) <- [doesMatch after alts] ]
    worker lst = expand lst

split :: String -> [(String,String)]
split str = zip (inits str) (tails str)