module Text.BibTeX.Parse (
file,
comment,
entry,
assignment,
value,
texSequence,
texBlock,
identifier,
bibIdentifier,
skippingSpace,
skippingLeadingSpace,
splitCommaSepList,
splitAuthorList,
splitSepList,
) where
import qualified Text.BibTeX.Entry as Entry
import qualified Text.ParserCombinators.Parsec.Token as T
import qualified Text.ParserCombinators.Parsec.Language as L
import qualified Text.ParserCombinators.Parsec as Parsec
import Text.ParserCombinators.Parsec
(CharParser, Parser,
(<|>), alphaNum, digit, letter, char, noneOf, oneOf,
between, many, many1, sepEndBy, )
import Control.Monad (liftM, liftM2, liftM3, )
import Data.List.HT (chop, )
lexer :: T.TokenParser st
lexer =
T.makeTokenParser $ L.emptyDef {
L.commentLine = "%",
L.identStart = alphaNum,
L.identLetter = alphaNum
}
identifier, comma, equals :: CharParser st String
identifier = T.identifier lexer
comma = T.comma lexer
equals = T.symbol lexer "="
braces, lexeme :: CharParser st a -> CharParser st a
braces = T.braces lexer
lexeme = T.lexeme lexer
file :: Parser [Entry.T]
file = comment >> sepEndBy entry comment
comment :: Parser String
comment = many $ noneOf "@"
entry :: Parser Entry.T
entry =
do entryType <- char '@' >> identifier
braces $
liftM2 (Entry.Cons entryType)
(Parsec.try bibIdentifier)
(comma >> sepEndBy assignment comma)
assignment :: Parser (String, String)
assignment =
liftM2 (,)
bibIdentifier
(equals >> value)
value :: Parser String
value =
lexeme (many1 letter) <|>
lexeme (many1 digit) <|>
braces (texSequence '}') <|>
lexeme (between (char '"') (char '"') (texSequence '"'))
texSequence :: Char -> Parser String
texSequence closeChar =
liftM concat (many (texBlock closeChar))
texBlock :: Char -> Parser String
texBlock closeChar =
liftM3 (\open body close -> open : body ++ close : [])
(char '{') (texSequence '}') (char '}') <|>
sequence
[char '\\',
oneOf "_{}[]$|'`^&%\".,~# " <|> letter] <|>
fmap (:[]) (noneOf [closeChar])
bibIdentifier :: Parser String
bibIdentifier =
lexeme $
liftM2 (:) (letter <|> char '_') (many (alphaNum <|> oneOf "&;:-_.?+/"))
skippingSpace :: Parser a -> Parser a
skippingSpace p =
do x <- p
Parsec.skipMany Parsec.space
return x
skippingLeadingSpace :: Parser a -> Parser a
skippingLeadingSpace p =
Parsec.skipMany Parsec.space >> p
splitCommaSepList :: String -> [String]
splitCommaSepList = splitSepList ','
splitAuthorList :: String -> [String]
splitAuthorList =
map unwords . chop ("and" ==) . words
splitSepList :: Char -> String -> [String]
splitSepList sep =
map (dropWhile (' '==)) . chop (sep==)