src/Text/CSL/Util.hs

{-# LANGUAGE ScopedTypeVariables, PatternGuards #-}
module Text.CSL.Util
  ( safeRead
  , readNum
  , (<^>)
  , capitalize
  , camelize
  , isPunct
  , last'
  , init'
  , words'
  , trim
  , triml
  , trimr
  , parseBool
  , parseString
  , parseInt
  , mb
  , (.#?)
  , (.#:)
  , onBlocks
  , titlecase
  , unTitlecase
  , protectCase
  , splitStrWhen
  , proc
  , proc'
  , procM
  , query
  , betterThan
  , readable
  , toShow
  , toRead
  , inlinesToString
  , headInline
  , lastInline
  , tailInline
  , initInline
  , tailFirstInlineStr
  , toCapital
  , mapHeadInline
  , tr'
  ) where
import Data.Aeson
import Data.Aeson.Types (Parser)
import Data.Text (Text)
import qualified Data.Text as T
import Control.Applicative ((<$>), (<$), (<*>), pure)
import Data.Char (toLower, toUpper, isLower, isUpper, isPunctuation)
import qualified Data.Traversable
import Text.Pandoc.Shared (safeRead, stringify)
import Text.Pandoc.Walk (walk)
import Text.Pandoc
import Data.List.Split (wordsBy, whenElt, dropBlanks, split )
import Control.Monad.State
import Data.Monoid (Monoid, mappend, mempty)
import Data.Generics ( Typeable, Data, everywhere, everywhereM, mkM,
                       everywhere', everything, mkT, mkQ )
import qualified Debug.Trace

readNum :: String -> Int
readNum s = case reads s of
              [(x,"")] -> x
              _        -> 0

-- | Conjoin strings, avoiding repeated punctuation.
(<^>) :: String -> String -> String
[] <^> sb         = sb
sa <^> []         = sa
sa <^> (s:xs)
  | s `elem` puncts && last sa `elem` puncts = sa ++ xs
  where puncts = ";:,. "
sa <^> sb         = sa ++ sb

capitalize :: String -> String
capitalize [] = []
capitalize (c:cs) = toUpper c : cs

isPunct :: Char -> Bool
isPunct c = c `elem` ".;?!"

camelize :: String -> String
camelize ('-':y:ys) = toUpper y : camelize ys
camelize ('_':y:ys) = toUpper y : camelize ys
camelize     (y:ys) =         y : camelize ys
camelize      _     = []

last' :: [a] -> [a]
last' [] = []
last' xs = [last xs]

init' :: [a] -> [a]
init' [] = []
init' xs = init xs

-- | Like words, but doesn't break on nonbreaking spaces etc.
words' :: String -> [String]
words' = wordsBy (\c -> c == ' ' || c == '\t' || c == '\r' || c == '\n')

-- | Remove leading and trailing space (including newlines) from string.
trim :: String -> String
trim = triml . trimr

triml :: String -> String
triml = dropWhile (`elem` " \r\n\t")

trimr :: String -> String
trimr = reverse . triml . reverse

-- | Parse JSON Boolean or Number as Bool.
parseBool :: Value -> Parser Bool
parseBool (Bool b)   = return b
parseBool (Number n) = case fromJSON (Number n) of
                            Success (0 :: Int) -> return False
                            Success _          -> return True
                            Error e            -> fail $ "Could not read boolean: " ++ e
parseBool _          = fail "Could not read boolean"

-- | Parse JSON value as String.
parseString :: Value -> Parser String
parseString (String s) = return $ T.unpack s
parseString (Number n) = case fromJSON (Number n) of
                            Success (x :: Int) -> return $ show x
                            Error _ -> case fromJSON (Number n) of
                                            Success (x :: Double) -> return $ show x
                                            Error e -> fail $ "Could not read string: " ++ e
parseString (Bool b)   = return $ map toLower $ show b
parseString v@(Array _)= inlinesToString `fmap` parseJSON v
parseString _          = fail "Could not read string"

-- | Parse JSON value as Int.
parseInt :: Value -> Parser Int
parseInt (String s) = case safeRead (T.unpack s) of
                            Just n  -> return n
                            Nothing -> fail "Could not read Int"
parseInt (Number n) = case fromJSON (Number n) of
                            Success (x :: Int) -> return x
                            Error e -> fail $ "Could not read string: " ++ e
parseInt _          = fail "Could not read string"

mb :: Monad m => (b -> m a) -> (Maybe b -> m (Maybe a))
mb  = Data.Traversable.mapM

-- | Parse as a string (even if the value is a number).
(.#?) :: Object -> Text -> Parser (Maybe String)
x .#? y = (x .:? y) >>= mb parseString

(.#:) :: Object -> Text -> Parser String
x .#: y = (x .: y) >>= parseString

onBlocks :: ([Inline] -> [Inline]) -> [Block] -> [Block]
onBlocks f bs = walk f' bs
  where f' (Para ils)  = Para (f ils)
        f' (Plain ils) = Plain (f ils)
        f' x           = x

hasLowercaseWord :: [Inline] -> Bool
hasLowercaseWord = any startsWithLowercase . splitStrWhen isPunctuation
  where startsWithLowercase (Str (x:_)) = isLower x
        startsWithLowercase _           = False

splitUpStr :: [Inline] -> [Inline]
splitUpStr = splitStrWhen (\c -> isPunctuation c || c == '\160')

unTitlecase :: [Inline] -> [Inline]
unTitlecase zs = evalState (caseTransform untc $ splitUpStr zs) SentenceBoundary
  where untc (Str (x:xs))
          | isUpper x = Str (toLower x : xs)
        untc (Span ("",[],[]) xs)
          | hasLowercaseWord xs = Span ("",["nocase"],[]) xs
        untc x = x

protectCase :: [Inline] -> [Inline]
protectCase zs = evalState (caseTransform protect $ splitUpStr zs) SentenceBoundary
  where protect (Span ("",[],[]) xs)
          | hasLowercaseWord xs = Span ("",["nocase"],[]) xs
        protect x = x

titlecase :: [Inline] -> [Inline]
titlecase zs = evalState (caseTransform tc $ splitUpStr zs) SentenceBoundary
  where tc (Str (x:xs))
          | isLower x && not (isShortWord (x:xs)) = Str (toUpper x : xs)
          where isShortWord  s = s `elem`
                      ["a","an","and","as","at","but","by","d","de"
                      ,"down","for","from"
                      ,"in","into","nor","of","on","onto","or","over","so"
                      ,"the","till","to","up","van","von","via","with","yet"]
        tc (Span ("",["nocase"],[]) xs) = Span ("",["nocase"],[]) xs
        tc x = x

data CaseTransformState = WordBoundary | SentenceBoundary | NoBoundary

caseTransform :: (Inline -> Inline) -> [Inline]
              -> State CaseTransformState [Inline]
caseTransform xform = mapM go
  where go Space            = Space <$ modify (\st ->
                                case st of
                                     SentenceBoundary -> SentenceBoundary
                                     _                -> WordBoundary)
        go LineBreak        = Space <$ put WordBoundary
        go (Str "’")        = return $ Str "’"
        go (Str [x])
          | x `elem` "?!:"  = (Str [x]) <$ put SentenceBoundary
          | isPunctuation x || x == '\160' = (Str [x]) <$ put WordBoundary
        go (Str [])         = return $ Str []
        go (Str (x:xs)) = do
               st <- get
               put NoBoundary
               return $ case st of
                  WordBoundary -> xform $ Str (x:xs)
                  _            -> Str (x:xs)
        go (Span ("",classes,[]) xs) | null classes || classes == ["nocase"] =
            do st <- get
               put NoBoundary
               return $ case st of
                  WordBoundary -> xform (Span ("",classes,[]) xs)
                  _            -> (Span ("",classes,[]) xs)
        go (Quoted qt xs)   = Quoted qt <$> caseTransform xform xs
        go (Emph xs)        = Emph <$> caseTransform xform xs
        go (Strong xs)      = Strong <$> caseTransform xform xs
        go (Link xs t)      = Link <$> caseTransform xform xs <*> pure t
        go (Image xs t)     = Link <$> caseTransform xform xs <*> pure t
        go (Span attr xs)   = Span attr <$> caseTransform xform xs
        go x = return x

splitStrWhen :: (Char -> Bool) -> [Inline] -> [Inline]
splitStrWhen _ [] = []
splitStrWhen p (Str xs : ys)
  | any p xs = map Str ((split . dropBlanks) (whenElt p) xs) ++ splitStrWhen p ys
splitStrWhen p (x : ys) = x : splitStrWhen p ys

-- | A generic processing function.
proc :: (Typeable a, Data b) => (a -> a) -> b -> b
proc f = everywhere (mkT f)

-- | A generic processing function: process a data structure in
-- top-down manner.
proc' :: (Typeable a, Data b) => (a -> a) -> b -> b
proc' f = everywhere' (mkT f)

-- | A generic monadic processing function.
procM :: (Monad m, Typeable a, Data b) => (a -> m a) -> b -> m b
procM f = everywhereM (mkM f)

-- | A generic query function.
query :: (Typeable a, Data b, Monoid m) => (a -> m) -> b -> m
query f = everything mappend (mempty `mkQ` f)

betterThan :: [a] -> [a] -> [a]
betterThan [] b = b
betterThan a  _ = a

readable :: (Read a, Show b) => (String -> a, b -> String)
readable =  (read . toRead, toShow . show)

toShow :: String -> String
toShow = foldr g [] . f
    where g    x xs  = if isUpper x then '-' : toLower x : xs else x : xs
          f (  x:xs) = toLower x : xs
          f       [] = []

toRead :: String -> String
toRead    []  = []
toRead (s:ss) = toUpper s : camel ss
    where
      camel x
          | '-':y:ys <- x = toUpper y : camel ys
          | '_':y:ys <- x = toUpper y : camel ys
          |     y:ys <- x =         y : camel ys
          | otherwise     = []

inlinesToString :: [Inline] -> String
inlinesToString = stringify

headInline :: [Inline] -> String
headInline = take 1 . stringify

lastInline :: [Inline] -> String
lastInline xs = case stringify xs of
                      [] -> []
                      ys -> [last ys]

initInline :: [Inline] -> [Inline]
initInline [] = []
initInline (i:[])
    | Str          s <- i = return $ Str         (init'       s)
    | Emph        is <- i = return $ Emph        (initInline is)
    | Strong      is <- i = return $ Strong      (initInline is)
    | Superscript is <- i = return $ Superscript (initInline is)
    | Subscript   is <- i = return $ Subscript   (initInline is)
    | Quoted q    is <- i = return $ Quoted q    (initInline is)
    | SmallCaps   is <- i = return $ SmallCaps   (initInline is)
    | Strikeout   is <- i = return $ Strikeout   (initInline is)
    | Link      is t <- i = return $ Link        (initInline is) t
    | Span at     is <- i = return $ Span at     (initInline is)
    | otherwise           = []
initInline (i:xs) = i : initInline xs

tailInline :: [Inline] -> [Inline]
tailInline (Space:xs) = xs
tailInline xs         = removeEmpty $ tailFirstInlineStr xs
  where removeEmpty   = dropWhile (== Str "")

tailFirstInlineStr :: [Inline] -> [Inline]
tailFirstInlineStr = mapHeadInline (drop 1)

toCapital :: [Inline] -> [Inline]
toCapital = mapHeadInline capitalize

mapHeadInline :: (String -> String) -> [Inline] -> [Inline]
mapHeadInline _ [] = []
mapHeadInline f (i:xs)
    | Str         [] <- i =                      mapHeadInline f xs
    | Str          s <- i = Str         (f                s)   : xs
    | Emph        is <- i = Emph        (mapHeadInline f is)   : xs
    | Strong      is <- i = Strong      (mapHeadInline f is)   : xs
    | Superscript is <- i = Superscript (mapHeadInline f is)   : xs
    | Subscript   is <- i = Subscript   (mapHeadInline f is)   : xs
    | Quoted q    is <- i = Quoted q    (mapHeadInline f is)   : xs
    | SmallCaps   is <- i = SmallCaps   (mapHeadInline f is)   : xs
    | Strikeout   is <- i = Strikeout   (mapHeadInline f is)   : xs
    | Link      is t <- i = Link        (mapHeadInline f is) t : xs
    | Span     at is <- i = Span at     (mapHeadInline f is)   : xs
    | otherwise           = i : xs

tr' :: Show a => String -> a -> a
tr' note' x = Debug.Trace.trace (note' ++ ": " ++ show x) x