module Web.RBB.Crawler.MetaParser
where
import Control.Applicative (pure, (*>), (<$>), (<*), (<*>))
import Control.Monad
import Data.Char
import Data.Text (Text, pack)
import Text.Parsec
data Meta =
Tags [(TagQuantifier, Text)]
| Title Text
| Context FilePath
| None
deriving (Eq, Show)
data TagQuantifier
= TagAdd
| TagRemove
| TagReplace
deriving (Eq, Show, Read, Ord, Enum, Bounded)
ciChar :: Char -> Parsec String u Char
ciChar c = char (toLower c) <|> char (toUpper c)
ciString :: String -> Parsec String u String
ciString = mapM ciChar
parseMeta :: String -> Either ParseError [Meta]
parseMeta inp =
parse (many pSplice) "slice" (inp++"\n\n")
>>= mapM (parse pMeta "meta parser") . filter (not . all isSpace)
pSplice :: Parsec String u String
pSplice = blankLines
<|> unwords . words . unlines <$> many1 (notFollowedBy blankLine *> anyLine)
skipSpaces :: Parsec String u ()
skipSpaces = skipMany $ char ' ' <|> char '\t'
blankLine :: Parsec String u Char
blankLine = skipSpaces *> newline
blankLines :: Parsec String u String
blankLines = many1 blankLine
anyLine :: Parsec String u String
anyLine = skipSpaces *> (anyChar `manyTill` newline)
pMeta :: Parsec String u Meta
pMeta = try pTags <|> try pTitle <|> try pContext <|> pNone
pNone :: Parsec String u Meta
pNone = const None <$> many1 anyChar
pTags :: Parsec String u Meta
pTags = Tags
<$> (try (ciString "tag" *> optional (ciChar 's') *> try spaces *> char ':')
*> spaces *> pSpaceElements <* spaces)
pQuantifierPrefix :: Parsec String u TagQuantifier
pQuantifierPrefix = quantify <$> (try (char '+') <|> try (char '-') <|> pure '=')
where
quantify c
| c == '+' = TagAdd
| c == '-' = TagRemove
| otherwise = TagReplace
pSpaceElements :: Parsec String u [(TagQuantifier, Text)]
pSpaceElements = many pSpaceDelimitedElement
pSpaceDelimitedElement :: Parsec String u (TagQuantifier, Text)
pSpaceDelimitedElement = (\q t -> (q, pack t))
<$> pQuantifierPrefix
<*> (char '"' *> anyChar `manyTill` char '"' <* tryEndOfTag
<|> char '\'' *> anyChar `manyTill` try (char '\'' *> tryEndOfTag)
<|> ((:) <$> satisfy (not . isSpace)
<*> anyChar `manyTill` tryEndOfTag))
tryEndOfTag :: Parsec String u ()
tryEndOfTag = (try (void space) *> spaces) <|> eof
pTitle :: Parsec String u Meta
pTitle = Title . pack . unwords
<$> (try (ciString "title") *> spaces *> char ':' *> spaces
*> many ((:) <$> anyChar <*> anyChar `manyTill` tryEndOfTag))
pContext :: Parsec String u Meta
pContext = Context . unwords
<$> (try (ciString "context") *> spaces *> char ':' *> spaces
*> many ((:) <$> anyChar <*> anyChar `manyTill` tryEndOfTag))