{-# LANGUAGE OverloadedStrings, TupleSections #-} ----------------------------------------------------------------------------- -- | -- Module : Text.CSL.Parser -- Copyright : (C) 2014 John MacFarlane -- License : BSD-style (see LICENSE) -- -- Maintainer : John MacFarlane -- Stability : unstable -- Portability : unportable -- -- Parser for CSL XML files. ----------------------------------------------------------------------------- module Text.CSL.Parser (readCSLFile, parseCSL, parseCSL', parseLocale, localizeCSL) where import qualified Data.Text as T import qualified Data.ByteString.Lazy as L import qualified Data.Map as M import qualified Control.Exception as E import Control.Monad (when) import Data.Either (lefts, rights) import Data.Text (Text, unpack) import Text.CSL.Style hiding (parseNames) import Text.CSL.Util (toRead, findFile) import System.Directory (getAppUserDataDirectory) import Control.Applicative hiding (many, Const) import qualified Text.XML as X import Data.Default import Text.Pandoc.Shared (safeRead) import Text.XML.Cursor import Data.Maybe (listToMaybe, fromMaybe) import Text.Pandoc.UTF8 (fromStringLazy) import Text.Pandoc.Shared (fetchItem) import Text.CSL.Data (getLocale) -- | Parse a 'String' into a 'Style' (with default locale). parseCSL :: String -> Style parseCSL = parseCSL' . fromStringLazy -- | Parse locale. parseLocale :: String -> IO Locale parseLocale locale = parseLocaleElement . fromDocument . X.parseLBS_ def <$> getLocale locale -- | Merge locale into a CSL style. localizeCSL :: Maybe String -> Style -> IO Style localizeCSL mbLocale s = do let locale = fromMaybe (styleDefaultLocale s) mbLocale l <- parseLocale locale return s { styleLocale = mergeLocales locale l (styleLocale s) } -- | Read and parse a CSL style file into a localized sytle. readCSLFile :: Maybe String -> FilePath -> IO Style readCSLFile mbLocale src = do csldir <- getAppUserDataDirectory "csl" mbSrc <- findFile [".", csldir] src fetchRes <- fetchItem Nothing (fromMaybe src mbSrc) f <- case fetchRes of Left err -> E.throwIO err Right (rawbs, _) -> return $ L.fromChunks [rawbs] let cur = fromDocument $ X.parseLBS_ def f -- see if it's a dependent style, and if so, try to fetch its parent: let pickParentCur = get "link" >=> attributeIs (X.Name "rel" Nothing Nothing) "independent-parent" let parentCur = cur $/ get "info" &/ pickParentCur let parent' = concatMap (stringAttr "href") parentCur when (parent' == src) $ do error $ "Dependent CSL style " ++ src ++ " specifies itself as parent." case parent' of "" -> localizeCSL mbLocale $ parseCSLCursor cur y -> do -- note, we insert locale from the dependent style: let mbLocale' = case stringAttr "default-locale" cur of "" -> mbLocale x -> Just x readCSLFile mbLocale' y parseCSL' :: L.ByteString -> Style parseCSL' = parseCSLCursor . fromDocument . X.parseLBS_ def parseCSLCursor :: Cursor -> Style parseCSLCursor cur = Style{ styleVersion = version , styleClass = class_ , styleInfo = Just info , styleDefaultLocale = defaultLocale , styleLocale = locales , styleAbbrevs = Abbreviations M.empty , csOptions = filter (\(k,_) -> k `notElem` ["class", "xmlns", "version", "default-locale"]) $ parseOptions cur , csMacros = macros , citation = fromMaybe (Citation [] [] Layout{ layFormat = emptyFormatting , layDelim = "" , elements = [] }) $ listToMaybe $ cur $/ get "citation" &| parseCitation , biblio = listToMaybe $ cur $/ get "bibliography" &| parseBiblio } where version = unpack . T.concat $ cur $| laxAttribute "version" class_ = unpack . T.concat $ cur $| laxAttribute "class" defaultLocale = case cur $| laxAttribute "default-locale" of (x:_) -> unpack x [] -> "en-US" author = case (cur $// get "info" &/ get "author") of (x:_) -> CSAuthor (x $/ get "name" &/ string) (x $/ get "email" &/ string) (x $/ get "uri" &/ string) _ -> CSAuthor "" "" "" info = CSInfo { csiTitle = cur $/ get "info" &/ get "title" &/ string , csiAuthor = author , csiCategories = [] -- TODO we don't really use this, and the type -- in Style doesn't match current CSL at all , csiId = cur $/ get "info" &/ get "id" &/ string , csiUpdated = cur $/ get "info" &/ get "updated" &/ string } locales = cur $/ get "locale" &| parseLocaleElement macros = cur $/ get "macro" &| parseMacroMap get :: Text -> Axis get name = element (X.Name name (Just "http://purl.org/net/xbiblio/csl") Nothing) string :: Cursor -> String string = unpack . T.concat . content attrWithDefault :: Read a => Text -> a -> Cursor -> a attrWithDefault t d cur = case safeRead (toRead $ stringAttr t cur) of Just x -> x Nothing -> d stringAttr :: Text -> Cursor -> String stringAttr t cur = case node cur of X.NodeElement e -> case M.lookup (X.Name t Nothing Nothing) (X.elementAttributes e) of Just x -> unpack x Nothing -> "" _ -> "" parseCslTerm :: Cursor -> CslTerm parseCslTerm cur = let body = unpack $ T.dropAround (`elem` (" \t\r\n" :: String)) $ T.concat $ cur $/ content in CT { cslTerm = stringAttr "name" cur , termForm = attrWithDefault "form" Long cur , termGender = attrWithDefault "gender" Neuter cur , termGenderForm = attrWithDefault "gender-form" Neuter cur , termSingular = if null body then cur $/ get "single" &/ string else body , termPlural = if null body then cur $/ get "multiple" &/ string else body , termMatch = stringAttr "match" cur } parseLocaleElement :: Cursor -> Locale parseLocaleElement cur = Locale { localeVersion = unpack $ T.concat version , localeLang = unpack $ T.concat lang , localeOptions = concat $ cur $/ get "style-options" &| parseOptions , localeTerms = terms , localeDate = concat $ cur $/ get "date" &| parseElement } where version = cur $| laxAttribute "version" lang = cur $| laxAttribute "lang" terms = cur $/ get "terms" &/ get "term" &| parseCslTerm parseElement :: Cursor -> [Element] parseElement cur = case node cur of X.NodeElement e -> case X.nameLocalName $ X.elementName e of "term" -> parseTerm cur "text" -> parseText cur "choose" -> parseChoose cur "group" -> parseGroup cur "label" -> parseLabel cur "number" -> parseNumber cur "substitute" -> parseSubstitute cur "names" -> parseNames cur "date" -> parseDate cur _ -> [] _ -> [] getFormatting :: Cursor -> Formatting getFormatting cur = emptyFormatting{ prefix = stringAttr "prefix" cur , suffix = stringAttr "suffix" cur , fontFamily = stringAttr "font-family" cur , fontStyle = stringAttr "font-style" cur , fontVariant = stringAttr "font-variant" cur , fontWeight = stringAttr "font-weight" cur , textDecoration = stringAttr "text-decoration" cur , verticalAlign = stringAttr "vertical-align" cur , textCase = stringAttr "text-case" cur , display = stringAttr "display" cur , quotes = if attrWithDefault "quotes" False cur then NativeQuote else NoQuote , stripPeriods = attrWithDefault "strip-periods" False cur , noCase = attrWithDefault "no-case" False cur , noDecor = attrWithDefault "no-decor" False cur } parseDate :: Cursor -> [Element] parseDate cur = [Date (words variable) form format delim parts partsAttr] where variable = stringAttr "variable" cur form = case stringAttr "form" cur of "text" -> TextDate "numeric" -> NumericDate _ -> NoFormDate format = getFormatting cur delim = stringAttr "delimiter" cur parts = cur $/ get "date-part" &| parseDatePart partsAttr = stringAttr "date-parts" cur parseDatePart :: Cursor -> DatePart parseDatePart cur = DatePart { dpName = stringAttr "name" cur , dpForm = case stringAttr "form" cur of "" -> "long" x -> x , dpRangeDelim = case stringAttr "range-delimiter" cur of "" -> "-" x -> x , dpFormatting = getFormatting cur } parseNames :: Cursor -> [Element] parseNames cur = [Names (words variable) names formatting delim others] where variable = stringAttr "variable" cur formatting = getFormatting cur delim = stringAttr "delimiter" cur elts = cur $/ parseName names = case rights elts of [] -> [Name NotSet emptyFormatting [] [] []] xs -> xs others = lefts elts parseName :: Cursor -> [Either Element Name] parseName cur = case node cur of X.NodeElement e -> case X.nameLocalName $ X.elementName e of "name" -> [Right $ Name (attrWithDefault "form" NotSet cur) format (nameAttrs e) delim nameParts] "label" -> [Right $ NameLabel (attrWithDefault "form" Long cur) format plural] "et-al" -> [Right $ EtAl format $ stringAttr "term" cur] _ -> map Left $ parseElement cur _ -> map Left $ parseElement cur where format = getFormatting cur plural = attrWithDefault "plural" Contextual cur delim = stringAttr "delimiter" cur nameParts = cur $/ get "name-part" &| parseNamePart nameAttrs x = [(T.unpack n, T.unpack v) | (X.Name n _ _, v) <- M.toList (X.elementAttributes x), n `elem` nameAttrKeys] nameAttrKeys = [ "et-al-min" , "et-al-use-first" , "et-al-subsequent-min" , "et-al-subsequent-use-first" , "et-al-use-last" , "delimiter-precedes-et-al" , "and" , "delimiter-precedes-last" , "sort-separator" , "initialize" , "initialize-with" , "name-as-sort-order" ] parseNamePart :: Cursor -> NamePart parseNamePart cur = NamePart s format where format = getFormatting cur s = stringAttr "name" cur parseSubstitute :: Cursor -> [Element] parseSubstitute cur = [Substitute (cur $/ parseElement)] parseTerm :: Cursor -> [Element] parseTerm cur = let termForm' = attrWithDefault "form" Long cur formatting = getFormatting cur plural = attrWithDefault "plural" True cur name = stringAttr "name" cur in [Term name termForm' formatting plural] parseText :: Cursor -> [Element] parseText cur = let term = stringAttr "term" cur variable = stringAttr "variable" cur macro = stringAttr "macro" cur value = stringAttr "value" cur delim = stringAttr "delimiter" cur formatting = getFormatting cur plural = attrWithDefault "plural" True cur textForm = attrWithDefault "form" Long cur in if not (null term) then [Term term textForm formatting plural] else if not (null macro) then [Macro macro formatting] else if not (null variable) then [Variable (words variable) textForm formatting delim] else if not (null value) then [Const value formatting] else [] parseChoose :: Cursor -> [Element] parseChoose cur = let ifPart = cur $/ get "if" &| parseIf elseIfPart = cur $/ get "else-if" &| parseIf elsePart = cur $/ get "else" &/ parseElement in [Choose (head ifPart) elseIfPart elsePart] parseIf :: Cursor -> IfThen parseIf cur = IfThen cond mat elts where cond = Condition { isType = go "type" , isSet = go "variable" , isNumeric = go "is-numeric" , isUncertainDate = go "is-uncertain-date" , isPosition = go "position" , disambiguation = go "disambiguate" , isLocator = go "locator" } mat = attrWithDefault "match" All cur elts = cur $/ parseElement go x = words $ stringAttr x cur parseLabel :: Cursor -> [Element] parseLabel cur = [Label variable form formatting plural] where variable = stringAttr "variable" cur form = attrWithDefault "form" Long cur formatting = getFormatting cur plural = attrWithDefault "plural" Contextual cur parseNumber :: Cursor -> [Element] parseNumber cur = [Number variable numForm formatting] where variable = stringAttr "variable" cur numForm = attrWithDefault "form" Numeric cur formatting = getFormatting cur parseGroup :: Cursor -> [Element] parseGroup cur = let elts = cur $/ parseElement delim = stringAttr "delimiter" cur formatting = getFormatting cur in [Group formatting delim elts] parseMacroMap :: Cursor -> MacroMap parseMacroMap cur = (name, elts) where name = cur $| stringAttr "name" elts = cur $/ parseElement parseCitation :: Cursor -> Citation parseCitation cur = Citation{ citOptions = parseOptions cur , citSort = concat $ cur $/ get "sort" &| parseSort , citLayout = case cur $/ get "layout" &| parseLayout of (x:_) -> x [] -> Layout { layFormat = emptyFormatting , layDelim = "" , elements = [] } } parseSort :: Cursor -> [Sort] parseSort cur = concat $ cur $/ get "key" &| parseKey parseKey :: Cursor -> [Sort] parseKey cur = case stringAttr "variable" cur of "" -> case stringAttr "macro" cur of "" -> [] x -> [SortMacro x sorting (attrWithDefault "names-min" 0 cur) (attrWithDefault "names-use-first" 0 cur) (stringAttr "names-use-last" cur)] x -> [SortVariable x sorting] where sorting = case stringAttr "sort" cur of "descending" -> Descending "" _ -> Ascending "" parseBiblio :: Cursor -> Bibliography parseBiblio cur = Bibliography{ bibOptions = parseOptions cur, bibSort = concat $ cur $/ get "sort" &| parseSort, bibLayout = case cur $/ get "layout" &| parseLayout of (x:_) -> x [] -> Layout { layFormat = emptyFormatting , layDelim = "" , elements = [] } } parseOptions :: Cursor -> [Option] parseOptions cur = case node cur of X.NodeElement e -> [(T.unpack n, T.unpack v) | (X.Name n _ _, v) <- M.toList (X.elementAttributes e)] _ -> [] parseLayout :: Cursor -> Layout parseLayout cur = Layout { layFormat = getFormatting cur , layDelim = stringAttr "delimiter" cur , elements = cur $/ parseElement }