{-# LANGUAGE ScopedTypeVariables, PatternGuards, FlexibleContexts #-} module Text.CSL.Util ( safeRead , readNum , (<^>) , capitalize , camelize , uncamelize , isPunct , last' , init' , words' , trim , triml , trimr , parseBool , parseString , parseInt , mb , (.#?) , (.#:) , onBlocks , titlecase , unTitlecase , protectCase , splitStrWhen , proc , proc' , procM , query , betterThan , toRead , inlinesToString , headInline , lastInline , tailInline , initInline , tailFirstInlineStr , toCapital , mapHeadInline , tr' , findFile , (&=) , mapping' , parseRomanNumeral , isRange ) where import Data.Aeson import Data.Aeson.Types (Parser) import Data.Text (Text) import qualified Data.Text as T import Data.Char (toLower, toUpper, isLower, isUpper, isPunctuation, isAscii) import qualified Data.Traversable import Text.Pandoc.Shared (safeRead, stringify) import Text.Pandoc.Walk (walk) import Text.Pandoc import Data.List.Split (wordsBy) import Control.Monad.State import Data.Generics ( Typeable, Data, everywhere, everywhereM, mkM, everywhere', everything, mkT, mkQ ) import System.FilePath import System.Directory (doesFileExist) import qualified Data.Yaml.Builder as Y import Data.Yaml.Builder (ToYaml(..), YamlBuilder) import qualified Text.Parsec as P #ifdef TRACE import qualified Debug.Trace import Text.Show.Pretty (ppShow) #endif #ifdef TRACE tr' :: Show a => String -> a -> a tr' note' x = Debug.Trace.trace ("=== " ++ note' ++ "\n" ++ ppShow x ++ "\n") x #else tr' :: String -> a -> a tr' _ x = x #endif readNum :: String -> Int readNum s = case reads s of [(x,"")] -> x _ -> 0 -- | Conjoin strings, avoiding repeated punctuation. (<^>) :: String -> String -> String [] <^> sb = sb sa <^> [] = sa sa <^> (s:xs) | s `elem` puncts && last sa `elem` puncts = sa ++ xs where puncts = ";:,. " sa <^> sb = sa ++ sb capitalize :: String -> String capitalize [] = [] capitalize (c:cs) = toUpper c : cs isPunct :: Char -> Bool isPunct c = c `elem` ".;?!" camelize :: String -> String camelize ('-':y:ys) = toUpper y : camelize ys camelize ('_':y:ys) = toUpper y : camelize ys camelize (y:ys) = y : camelize ys camelize _ = [] uncamelize :: String -> String uncamelize = foldr g [] . f where g x xs = if isUpper x then '-' : toLower x : xs else x : xs f ( x:xs) = toLower x : xs f [] = [] last' :: [a] -> [a] last' [] = [] last' xs = [last xs] init' :: [a] -> [a] init' [] = [] init' xs = init xs -- | Like words, but doesn't break on nonbreaking spaces etc. words' :: String -> [String] words' = wordsBy (\c -> c == ' ' || c == '\t' || c == '\r' || c == '\n') -- | Remove leading and trailing space (including newlines) from string. trim :: String -> String trim = triml . trimr triml :: String -> String triml = dropWhile (`elem` " \r\n\t") trimr :: String -> String trimr = reverse . triml . reverse -- | Parse JSON Boolean or Number as Bool. parseBool :: Value -> Parser Bool parseBool (Bool b) = return b parseBool (Number n) = case fromJSON (Number n) of Success (0 :: Int) -> return False Success _ -> return True Error e -> fail $ "Could not read boolean: " ++ e parseBool _ = fail "Could not read boolean" -- | Parse JSON value as String. parseString :: Value -> Parser String parseString (String s) = return $ T.unpack s parseString (Number n) = case fromJSON (Number n) of Success (x :: Int) -> return $ show x Error _ -> case fromJSON (Number n) of Success (x :: Double) -> return $ show x Error e -> fail $ "Could not read string: " ++ e parseString (Bool b) = return $ map toLower $ show b parseString v@(Array _)= inlinesToString `fmap` parseJSON v parseString v = fail $ "Could not read as string: " ++ show v -- | Parse JSON value as Int. parseInt :: Value -> Parser Int parseInt (String s) = case safeRead (T.unpack s) of Just n -> return n Nothing -> fail "Could not read Int" parseInt (Number n) = case fromJSON (Number n) of Success (x :: Int) -> return x Error e -> fail $ "Could not read string: " ++ e parseInt _ = fail "Could not read string" mb :: Monad m => (b -> m a) -> (Maybe b -> m (Maybe a)) mb = Data.Traversable.mapM -- | Parse as a string (even if the value is a number). (.#?) :: Object -> Text -> Parser (Maybe String) x .#? y = (x .:? y) >>= mb parseString (.#:) :: Object -> Text -> Parser String x .#: y = (x .: y) >>= parseString onBlocks :: ([Inline] -> [Inline]) -> [Block] -> [Block] onBlocks f bs = walk f' bs where f' (Para ils) = Para (f ils) f' (Plain ils) = Plain (f ils) f' x = x hasLowercaseWord :: [Inline] -> Bool hasLowercaseWord = any startsWithLowercase . splitStrWhen isPunctuation where startsWithLowercase (Str (x:_)) = isLower x startsWithLowercase _ = False splitUpStr :: [Inline] -> [Inline] splitUpStr ils = case reverse (combineInternalPeriods (splitStrWhen (\c -> isPunctuation c || c == '\160') ils)) of [] -> [] (x:xs) -> reverse $ Span ("",["lastword"],[]) [x] : xs -- We want to make sure that the periods in www.example.com, for -- example, are not interpreted as sentence-ending punctuation. combineInternalPeriods :: [Inline] -> [Inline] combineInternalPeriods [] = [] combineInternalPeriods (Str xs:Str ".":Str ys:zs) = combineInternalPeriods $ Str (xs ++ "." ++ ys) : zs combineInternalPeriods (x:xs) = x : combineInternalPeriods xs unTitlecase :: [Inline] -> [Inline] unTitlecase zs = evalState (caseTransform untc zs) SentenceBoundary where untc w = do st <- get case (w, st) of (y, NoBoundary) -> return y (Str (x:xs), LastWordBoundary) | isUpper x -> return $ Str (map toLower (x:xs)) (Str (x:xs), WordBoundary) | isUpper x -> return $ Str (map toLower (x:xs)) (Str (x:xs), SentenceBoundary) | isLower x -> return $ Str (toUpper x : xs) (Span ("",[],[]) xs, _) | hasLowercaseWord xs -> return $ Span ("",["nocase"],[]) xs _ -> return w protectCase :: [Inline] -> [Inline] protectCase zs = evalState (caseTransform protect zs) SentenceBoundary where protect (Span ("",[],[]) xs) | hasLowercaseWord xs = do st <- get case st of NoBoundary -> return $ Span ("",[],[]) xs _ -> return $ Span ("",["nocase"],[]) xs protect x = return x -- From CSL docs: -- "Title case conversion (with text-case set to “title”) for English-language -- items is performed by: -- -- For uppercase strings, the first character of each word remains capitalized. -- All other letters are lowercased. -- For lower or mixed case strings, the first character of each lowercase word -- is capitalized. The case of words in mixed or uppercase stays the same. -- In both cases, stop words are lowercased, unless they are the first or last -- word in the string, or follow a colon. The stop words are “a”, “an”, “and”, -- “as”, “at”, “but”, “by”, “down”, “for”, “from”, “in”, “into”, “nor”, “of”, -- “on”, “onto”, “or”, “over”, “so”, “the”, “till”, “to”, “up”, “via”, “with”, -- and “yet”. titlecase :: [Inline] -> [Inline] titlecase zs = evalState (caseTransform tc zs) SentenceBoundary where tc (Str (x:xs)) = do st <- get return $ case st of LastWordBoundary -> case (x:xs) of s | not (isAscii x) -> Str s | isShortWord s -> Str s | all isUpper s -> Str s | isMixedCase s -> Str s | otherwise -> Str (toUpper x:map toLower xs) WordBoundary -> case (x:xs) of s | not (isAscii x) -> Str s | all isUpper s -> Str s | isShortWord s -> Str (map toLower s) | isMixedCase s -> Str s | otherwise -> Str (toUpper x:map toLower xs) SentenceBoundary -> if isMixedCase (x:xs) || (all isUpper (x:xs)) then Str (x:xs) else Str (toUpper x : xs) _ -> Str (x:xs) tc (Span ("",["nocase"],[]) xs) = return $ Span ("",["nocase"],[]) xs tc x = return x isShortWord s = map toLower s `elem` ["a","an","and","as","at","but","by","c","ca","d","de" ,"down","et","for","from" ,"in","into","nor","of","on","onto","or","over","so" ,"the","till","to","up","van","von","via","with","yet"] isMixedCase :: String -> Bool isMixedCase xs = any isUpper xs && any isLower xs data CaseTransformState = WordBoundary | LastWordBoundary | SentenceBoundary | NoBoundary caseTransform :: (Inline -> State CaseTransformState Inline) -> [Inline] -> State CaseTransformState [Inline] caseTransform xform = fmap reverse . foldM go [] . splitUpStr where go acc s | s == Space || s == SoftBreak = do modify (\st -> case st of SentenceBoundary -> SentenceBoundary _ -> case acc of (Str [x]:_) | x `elem` "?!:" -> SentenceBoundary _ -> WordBoundary) return $ Space : acc go acc LineBreak = do put WordBoundary return $ Space : acc go acc (Str [c]) | c `elem` "-/\x2013\x2014\160" = do put WordBoundary return $ Str [c] : acc | isPunctuation c = do -- leave state unchanged return $ Str [c] : acc go acc (Str []) = return acc go acc (Str xs) = do res <- xform (Str xs) put NoBoundary return $ res : acc go acc (Span ("",["lastword"],[]) [x]) = do b <- get case b of WordBoundary -> put LastWordBoundary _ -> return () go acc x go acc (Span ("",classes,[]) xs) | null classes || classes == ["nocase"] = do res <- xform (Span ("",classes,[]) xs) put NoBoundary return $ res : acc go acc (Quoted qt xs) = (:acc) <$> (Quoted qt <$> caseTransform xform xs) go acc (Emph xs) = (:acc) <$> (Emph <$> caseTransform xform xs) go acc (Strong xs) = (:acc) <$> (Strong <$> caseTransform xform xs) go acc (Link at xs t) = (:acc) <$> (Link at <$> caseTransform xform xs <*> pure t) go acc (Image at xs t) = (:acc) <$> (Link at <$> caseTransform xform xs <*> pure t) go acc (Span attr xs) = (:acc) <$> (Span attr <$> caseTransform xform xs) go acc x = return $ x : acc splitStrWhen :: (Char -> Bool) -> [Inline] -> [Inline] splitStrWhen _ [] = [] splitStrWhen p (Str xs : ys) = go xs ++ splitStrWhen p ys where go [] = [] go s = case break p s of ([],[]) -> [] (zs,[]) -> [Str zs] ([],(w:ws)) -> Str [w] : go ws (zs,(w:ws)) -> Str zs : Str [w] : go ws splitStrWhen p (x : ys) = x : splitStrWhen p ys -- | A generic processing function. proc :: (Typeable a, Data b) => (a -> a) -> b -> b proc f = everywhere (mkT f) -- | A generic processing function: process a data structure in -- top-down manner. proc' :: (Typeable a, Data b) => (a -> a) -> b -> b proc' f = everywhere' (mkT f) -- | A generic monadic processing function. procM :: (Monad m, Typeable a, Data b) => (a -> m a) -> b -> m b procM f = everywhereM (mkM f) -- | A generic query function. query :: (Typeable a, Data b, Monoid m) => (a -> m) -> b -> m query f = everything mappend (mempty `mkQ` f) betterThan :: [a] -> [a] -> [a] betterThan [] b = b betterThan a _ = a toRead :: String -> String toRead [] = [] toRead (s:ss) = toUpper s : camel ss where camel x | '-':y:ys <- x = toUpper y : camel ys | '_':y:ys <- x = toUpper y : camel ys | y:ys <- x = y : camel ys | otherwise = [] inlinesToString :: [Inline] -> String inlinesToString = stringify headInline :: [Inline] -> String headInline = take 1 . stringify lastInline :: [Inline] -> String lastInline xs = case stringify xs of [] -> [] ys -> [last ys] initInline :: [Inline] -> [Inline] initInline [] = [] initInline (i:[]) | Str s <- i = return $ Str (init' s) | Emph is <- i = return $ Emph (initInline is) | Strong is <- i = return $ Strong (initInline is) | Superscript is <- i = return $ Superscript (initInline is) | Subscript is <- i = return $ Subscript (initInline is) | Quoted q is <- i = return $ Quoted q (initInline is) | SmallCaps is <- i = return $ SmallCaps (initInline is) | Strikeout is <- i = return $ Strikeout (initInline is) | Link at is t <- i = return $ Link at (initInline is) t | Span at is <- i = return $ Span at (initInline is) | otherwise = [] initInline (i:xs) = i : initInline xs tailInline :: [Inline] -> [Inline] tailInline (Space:xs) = xs tailInline (SoftBreak:xs) = xs tailInline xs = removeEmpty $ tailFirstInlineStr xs where removeEmpty = dropWhile (== Str "") tailFirstInlineStr :: [Inline] -> [Inline] tailFirstInlineStr = mapHeadInline (drop 1) toCapital :: [Inline] -> [Inline] toCapital ils@(Span (_,["nocase"],_) _:_) = ils toCapital ils = mapHeadInline capitalize ils mapHeadInline :: (String -> String) -> [Inline] -> [Inline] mapHeadInline _ [] = [] mapHeadInline f (i:xs) | Str [] <- i = mapHeadInline f xs | Str s <- i = Str (f s) : xs | Emph is <- i = Emph (mapHeadInline f is) : xs | Strong is <- i = Strong (mapHeadInline f is) : xs | Superscript is <- i = Superscript (mapHeadInline f is) : xs | Subscript is <- i = Subscript (mapHeadInline f is) : xs | Quoted q is <- i = Quoted q (mapHeadInline f is) : xs | SmallCaps is <- i = SmallCaps (mapHeadInline f is) : xs | Strikeout is <- i = Strikeout (mapHeadInline f is) : xs | Link at is t <- i = Link at (mapHeadInline f is) t : xs | Span at is <- i = Span at (mapHeadInline f is) : xs | otherwise = i : xs findFile :: [FilePath] -> FilePath -> IO (Maybe FilePath) findFile [] _ = return Nothing findFile (p:ps) f | isAbsolute f = do exists <- doesFileExist f if exists then return (Just f) else return Nothing | otherwise = do exists <- doesFileExist (p f) if exists then return $ Just (p f) else findFile ps f (&=) :: (ToYaml a, Monoid a, Eq a) => Text -> a -> [(Text, YamlBuilder)] -> [(Text, YamlBuilder)] x &= y = \acc -> if y == mempty then acc else (x Y..= y) : acc mapping' :: [[(Text, YamlBuilder)] -> [(Text, YamlBuilder)]] -> YamlBuilder mapping' = Y.mapping . foldr ($) [] -- TODO: romanNumeral is defined in Text.Pandoc.Parsing, but it's -- not exported there. Eventually we should remove this code duplication -- by exporting something from pandoc. parseRomanNumeral :: String -> Maybe Int parseRomanNumeral s = case P.parse (pRomanNumeral <* P.eof) "" s of Left _ -> Nothing Right x -> Just x -- | Parses a roman numeral (uppercase or lowercase), returns number. pRomanNumeral :: P.Stream s m Char => P.ParsecT s st m Int pRomanNumeral = do let lowercaseRomanDigits = ['i','v','x','l','c','d','m'] let uppercaseRomanDigits = ['I','V','X','L','C','D','M'] c <- P.lookAhead $ P.oneOf (lowercaseRomanDigits ++ uppercaseRomanDigits) let romanDigits = if isUpper c then uppercaseRomanDigits else lowercaseRomanDigits let [one, five, ten, fifty, hundred, fivehundred, thousand] = map P.char romanDigits thousands <- P.many thousand >>= (return . (1000 *) . length) ninehundreds <- P.option 0 $ P.try $ hundred >> thousand >> return 900 fivehundreds <- P.many fivehundred >>= (return . (500 *) . length) fourhundreds <- P.option 0 $ P.try $ hundred >> fivehundred >> return 400 hundreds <- P.many hundred >>= (return . (100 *) . length) nineties <- P.option 0 $ P.try $ ten >> hundred >> return 90 fifties <- P.many fifty >>= (return . (50 *) . length) forties <- P.option 0 $ P.try $ ten >> fifty >> return 40 tens <- P.many ten >>= (return . (10 *) . length) nines <- P.option 0 $ P.try $ one >> ten >> return 9 fives <- P.many five >>= (return . (5 *) . length) fours <- P.option 0 $ P.try $ one >> five >> return 4 ones <- P.many one >>= (return . length) let total = thousands + ninehundreds + fivehundreds + fourhundreds + hundreds + nineties + fifties + forties + tens + nines + fives + fours + ones if total == 0 then fail "not a roman numeral" else return total isRange :: String -> Bool isRange s = ',' `elem` s || '-' `elem` s || '\x2013' `elem` s