module Data.Classify.Parser --(parseElement,generateMarkup,module Parser) where import Data.Classify.DataTypes -- import Text.ParserCombinators.Parsec import Text.ParserCombinators.ReadP import Data.Char import Data.List import Data.Ord import Control.Monad --import Utils --import System.Time import System.IO.Unsafe import qualified Data.Map as Map import qualified Data.Set as Set run readp inp = let rets = map fst $ readP_to_S readp inp in nub rets separator = choice [ do skipSpaces skipMany (char '-') skipSpaces , do skipSpaces skipMany (char '.') skipSpaces , do skipSpaces ] makeVersion s e = Version (read s) (read e) digit :: ReadP Char digit = satisfy isDigit oneOf :: String -> ReadP Char oneOf str = choice (map char str) anyChar :: ReadP Char anyChar = satisfy (const True) putback :: ReadP a -> ReadP a putback readp = do inp <- look choice $ flip map (readP_to_S readp inp) $ \(a,str) -> return a parseYear = choice [ choice [ do string (show year) return (year, 2) , do string (drop 2 (show year)) return (year, 0) ] | year <- [2001..2015]] -- Try years from 2001 to 2015 getDigits n = do ds <- many1 digit satisfy (not.isDigit) guard (length ds <= 2) return (read ds) mkDate year month day n = do guard (month >= 1 && month <= 12) guard (day >= 1 && day <= 31) return (DateVersion year month day, n) parseDateVersion = choice [ do (year, n) <- parseYear separator month <- getDigits 2 separator day <- getDigits 2 mkDate year month day n , do satisfy (not.isDigit) month <- getDigits 2 separator day <- getDigits 2 separator (year, n) <- parseYear mkDate year month day (n+5) +++ mkDate year day month (n+3) ] parseVersion = choice [ parseDateVersion , do s <- many1 digit oneOf "XxEe" e <- many1 digit return (makeVersion s e, 3) , do s <- many1 digit oneOf "." e <- many1 digit return (makeVersion s e, 0) , do s <- count 2 digit +++ count 1 digit e <- count 2 digit return (makeVersion s e, negate 2) , do oneOf "sS" s <- many1 digit oneOf "Ee" e <- many1 digit return (makeVersion s e, 2) , between (char '(') (char ')') parseVersion , between (char '{') (char '}') parseVersion , between (char '[') (char ']') parseVersion ] trunc xs = [ toLower x | x <- xs, isAlphaNum x ] parseSeries known = do many anyChar nPre <- many1 anyChar n <- case Map.lookup (trunc nPre) known of Nothing -> fail "not known" Just v -> return v -- guard (trunc n `Map.member` known) s <- look guard (take 2 s /= "th") -- Fix for '24th 2007.avi'. It shouldn't parse 24th as a series. many anyChar separator (v,s) <- parseVersion satisfy (not.isDigit) separator (t,l) <- choice [do { t <- manyTill anyChar (do char '.' rest <- look if '.' `elem` rest then pfail else return ()) ; l <- look ; return (t,l) } ,do { t <- look ; choice [return ("",t), return (t,"")] }] guard (trunc l `Set.member` knownExtensions) return $ Series n v (pp t) (pp l) s where pp = trim . removeGarbage knownExtensions = Set.fromList $ map trunc ["avi","mpg", "mpeg", "mp4", "mov","wmv","mkw","mkv",""] removeGarbage [] = [] removeGarbage ('.':xs) = ' ':removeGarbage xs removeGarbage ('_':xs) = ' ':removeGarbage xs removeGarbage ('-':xs) = ' ':removeGarbage xs removeGarbage (x:xs) | isAlphaNum x || isSpace x = x:removeGarbage xs | otherwise = ' ':removeGarbage xs trim = reverse . dropWhile isSpace . reverse . dropWhile isSpace parseElement known = do choice [parseSeries known] {- The simpsons - 0x00 - Null Simpsons - S00E00 - Null Simpsons S0x0 Null S0E0 Null -} {- putback p = do tok <- getInput r <- p setInput tok return r separator1 = do spaces oneOf "-" spaces separator2 = do many space return () separator3 = do many1 $ oneOf "-./,[]{}_ " return () separator = choice $ map try [separator3, separator2, separator1] -- return ()] makeVersion s e = Version (read s) (read e) parseVersion1 = do s <- many1 digit oneOf "XxEe." e <- many1 digit return $ makeVersion s e parseVersion2 = do oneOf "sS" s <- many1 digit oneOf "Ee" e <- many1 digit return $ makeVersion s e parseVersion3 = do (s,e) <- choice $ map try [count 2 digit >>= \s -> count 2 digit >>= \e -> return (s,e), count 1 digit >>= \s -> count 2 digit >>= \e -> return (s,e)] return $ makeVersion s e parseVersion4 = between (char '(') (char ')') parseVersion parseVersion = choice $ map try [parseVersion1, parseVersion2, parseVersion3, parseVersion4] parseName :: Parser Name parseName = manyTill anyChar (putback $ try $ (separator >> parseVersion>>return ())) {-parseSeries1 :: Parser Element parseSeries1 = do v <- parseVersion separator n <- parseName separator t <- many (noneOf ".") l <- many anyChar return $ Series n v t l-} parseSeries2 :: Parser Element parseSeries2 = do n <- parseName separator v <- parseVersion separator (t,l) <- choice $ map try [do { t <- manyTill anyChar (char '.') ; l <- getInput ; return (t,l) } ,do { t <- getInput ; return ("",t) }] return $ Series n v t l parseElement :: Parser Element parseElement = do choice $ map try [parseSeries2] -} ----------------------------- -- Markup Parser ----------------------------- parseMarkupElement = choice [parseNumeric, parseSymbol, parseLiterate] where parseLiterate = do lit <- anyChar return $ Literate lit parseSymbol = do char '%' sym <- anyChar return $ Symbol sym parseNumeric = do char '%' n <- many1 digit sym <- anyChar return $ Numeric sym (read n) parseMarkup = many1 parseMarkupElement generateMarkup s = case readP_to_S parseMarkup s of rets -> fst (head (sortBy (comparing (length.snd)) rets))