{-# LANGUAGE StrictData #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedStrings #-} -- | Provides functions that facilitate defining textcase transformations. -- To see how these can be used used, see the definitions of @addTextCase@ -- in "Citeproc.Pandoc" and "Citproc.CslJson". module Citeproc.CaseTransform ( CaseTransformState(..) , CaseTransformer(..) , withUppercaseAll , withLowercaseAll , withCapitalizeWords , withCapitalizeFirst , withSentenceCase , withTitleCase ) where import Data.Ord () import Data.Semigroup import Data.Char (isUpper, isLower, isAscii) import Data.Text (Text) import qualified Data.Text as T import Citeproc.Types (Lang(..)) -- | Wraps a function used to define textcase transformations. newtype CaseTransformer = CaseTransformer { unCaseTransformer :: Maybe Lang -> CaseTransformState -> Text -> Text } -- | Tracks context in textcase transformations. data CaseTransformState = Start | StartSentence | AfterWordEnd | AfterWordChar | AfterSentenceEndingPunctuation | AfterOtherPunctuation | BeforeLastWord deriving (Show, Eq) toUpper' :: Maybe Lang -> Text -> Text toUpper' mblang = T.toUpper . case mblang of Just (Lang "tr" _) -> T.map (\c -> case c of 'i' -> 'İ' 'ı' -> 'I' _ -> c) _ -> id toLower' :: Maybe Lang -> Text -> Text toLower' mblang = T.toLower . case mblang of Just (Lang "tr" _) -> T.map (\c -> case c of 'İ' -> 'i' 'I' -> 'ı' _ -> c) _ -> id -- | Uppercase everything. withUppercaseAll :: CaseTransformer withUppercaseAll = CaseTransformer (\mblang _ -> toUpper' mblang) -- | Lowercase everything. withLowercaseAll :: CaseTransformer withLowercaseAll = CaseTransformer (\mblang _ -> toLower' mblang) -- | Capitalize all words. withCapitalizeWords :: CaseTransformer withCapitalizeWords = CaseTransformer go where go mblang st chunk | isMixedCase chunk = chunk | st == Start || st == StartSentence || st == AfterWordEnd || st == BeforeLastWord = if T.all isLower chunk then capitalizeText mblang chunk else chunk | otherwise = chunk -- | Capitalize first letter. withCapitalizeFirst :: CaseTransformer withCapitalizeFirst = CaseTransformer go where go mblang st chunk | isMixedCase chunk = chunk | st == Start = if T.all isLower chunk then capitalizeText mblang chunk else chunk | otherwise = chunk -- | Capitalize first letter of each sentence. withSentenceCase :: CaseTransformer withSentenceCase = CaseTransformer go where go mblang st chunk | isCapitalized chunk , not (st == Start || st == StartSentence) = T.toLower chunk | isCapitalized chunk || T.all isLower chunk , st == Start || st == StartSentence = capitalizeText mblang $ T.toLower chunk | otherwise = chunk -- | Use title case. withTitleCase :: CaseTransformer withTitleCase = CaseTransformer go where go mblang st chunk | isMixedCase chunk = chunk | T.all isUpper chunk = chunk -- spec doesn't say this but tests do -- textcase_TitleCapitalization.txt | T.any (not . isAscii) chunk = chunk | st == StartSentence || st == Start = capitalizeText mblang $ T.toLower chunk | st == AfterWordEnd , not (isStopWord chunk) , T.compareLength chunk 1 == GT = capitalizeText mblang $ T.toLower chunk | st == BeforeLastWord , T.compareLength chunk 1 == GT = capitalizeText mblang $ T.toLower chunk | otherwise = chunk isCapitalized :: Text -> Bool isCapitalized t = case T.uncons t of Just (c, t') -> isUpper c && T.all isLower t' _ -> False isMixedCase :: Text -> Bool isMixedCase t = T.any isUpper t && T.any isLower t capitalizeText :: Maybe Lang -> Text -> Text capitalizeText mblang x = case T.uncons x of Just (c,x') -> toUpper' mblang (T.singleton c) <> x' Nothing -> x isStopWord :: Text -> Bool isStopWord "a" = True isStopWord "an" = True isStopWord "and" = True isStopWord "as" = True isStopWord "at" = True isStopWord "but" = True isStopWord "by" = True isStopWord "down" = True isStopWord "for" = True isStopWord "from" = True isStopWord "in" = True isStopWord "into" = True isStopWord "nor" = True isStopWord "of" = True isStopWord "on" = True isStopWord "onto" = True isStopWord "or" = True isStopWord "over" = True isStopWord "so" = True isStopWord "the" = True isStopWord "till" = True isStopWord "to" = True isStopWord "up" = True isStopWord "via" = True isStopWord "with" = True isStopWord "yet" = True -- not in original list but seems required by test flipflop_Apostrophes -- and textcase_SkipNameParticlesInTitleCase isStopWord "about" = True isStopWord "van" = True isStopWord "von" = True isStopWord "de" = True isStopWord "d" = True isStopWord "l" = True isStopWord _ = False