module Data.Classify.Parser
where
import Data.Classify.DataTypes
import Text.ParserCombinators.ReadP
import Data.Char
import Data.List
import Data.Ord
import Control.Monad
import System.IO.Unsafe
import qualified Data.Map as Map
import qualified Data.Set as Set
run readp inp
= let rets = map fst $ readP_to_S readp inp
in nub rets
separator
= choice
[ do skipSpaces
skipMany (char '-')
skipSpaces
, do skipSpaces
skipMany (char '.')
skipSpaces
, do skipSpaces ]
makeVersion s e = Version (read s) (read e)
digit :: ReadP Char
digit = satisfy isDigit
oneOf :: String -> ReadP Char
oneOf str = choice (map char str)
anyChar :: ReadP Char
anyChar = satisfy (const True)
putback :: ReadP a -> ReadP a
putback readp
= do inp <- look
choice
$ flip map (readP_to_S readp inp) $ \(a,str)
-> return a
parseYear = choice
[ choice
[ do string (show year)
return (year, 2)
, do string (drop 2 (show year))
return (year, 0)
]
| year <- [2001..2015]]
getDigits n
= do ds <- many1 digit
satisfy (not.isDigit)
guard (length ds <= 2)
return (read ds)
mkDate year month day n
= do guard (month >= 1 && month <= 12)
guard (day >= 1 && day <= 31)
return (DateVersion year month day, n)
parseDateVersion
= choice
[ do (year, n) <- parseYear
separator
month <- getDigits 2
separator
day <- getDigits 2
mkDate year month day (n+4)
, do month <- getDigits 2
separator
day <- getDigits 2
separator
(year, n) <- parseYear
mkDate year month day (n+5) +++ mkDate year day month (n+3)
]
parseVersion
= choice
[ parseDateVersion
, do s <- many1 digit
oneOf "XxEe."
e <- many1 digit
return (makeVersion s e, 0)
, do s <- count 1 digit +++ count 2 digit
e <- count 2 digit
return (makeVersion s e, 0)
, do oneOf "sS"
s <- many1 digit
oneOf "Ee"
e <- many1 digit
return (makeVersion s e, 2)
, between (char '(') (char ')') parseVersion
, between (char '{') (char '}') parseVersion
, between (char '[') (char ']') parseVersion
]
trunc xs = [ toLower x | x <- xs, isAlphaNum x ]
parseSeries known
= do many anyChar
nPre <- many1 anyChar
n <- Map.lookup (trunc nPre) known
s <- look
guard (take 2 s /= "th")
many anyChar
separator
(v,s) <- parseVersion
separator
(t,l) <- choice
[do { t <- manyTill anyChar (do char '.'
rest <- look
if '.' `elem` rest
then pfail
else return ())
; l <- look
; return (t,l) }
,do { t <- look
; return ("",t) }]
guard (trunc l `Set.member` knownExtensions)
return $ Series n v (pp t) (pp l) s
where pp = trim . removeGarbage
knownExtensions = Set.fromList $ map trunc ["avi","mpg", "mpeg", "mp4", "mov","wmv","mkw","mkv"]
removeGarbage [] = []
removeGarbage ('.':xs) = ' ':removeGarbage xs
removeGarbage ('_':xs) = ' ':removeGarbage xs
removeGarbage ('-':xs) = ' ':removeGarbage xs
removeGarbage (x:xs) | isAlphaNum x || isSpace x = x:removeGarbage xs
| otherwise = ' ':removeGarbage xs
trim = reverse . dropWhile isSpace . reverse . dropWhile isSpace
parseElement known
= do choice [parseSeries known]
parseMarkupElement
= choice
[parseNumeric,
parseSymbol,
parseLiterate]
where parseLiterate
= do lit <- anyChar
return $ Literate lit
parseSymbol
= do char '%'
sym <- anyChar
return $ Symbol sym
parseNumeric
= do char '%'
n <- many1 digit
sym <- anyChar
return $ Numeric sym (read n)
parseMarkup = many1 parseMarkupElement
generateMarkup s
= case readP_to_S parseMarkup s of
rets -> fst (head (sortBy (comparing (length.snd)) rets))