{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE FlexibleInstances #-} ----------------------------------------------------------------------------- -- | -- Module : Hasmin.Types.String -- Copyright : (c) 2017 Cristian Adrián Ontivero -- License : BSD3 -- Stability : experimental -- Portability : unknown -- ----------------------------------------------------------------------------- module Hasmin.Types.String ( removeQuotes , unquoteUrl , unquoteFontFamily , convertEscaped , mapString , StringType(..) ) where import Control.Monad.Reader (ask, Reader) import Control.Monad (mzero) import Control.Applicative ((<|>), many) import Data.Attoparsec.Text (Parser, parse, IResult(Done, Partial, Fail), maybeResult, feed) import Data.Monoid ((<>)) import Data.Text (Text) import qualified Data.Attoparsec.Text as A import qualified Data.Text as T import qualified Data.Text.Lazy as TL import Data.Text.Lazy.Builder (Builder) import qualified Data.Text.Lazy.Builder as B import qualified Data.Char as C import Data.Bits ((.|.), shiftL) import Data.Foldable (foldl') import Hasmin.Config import Hasmin.Class import Hasmin.Parser.Primitives import Hasmin.Parser.Utils -- | The > data type. -- It represents a string, formed by Unicode characters, delimited by either -- double (\") or single (\') quotes. data StringType = DoubleQuotes Text | SingleQuotes Text deriving (Show, Eq) instance ToText StringType where toBuilder (DoubleQuotes t) = B.singleton '\"' <> B.fromText t <> B.singleton '\"' toBuilder (SingleQuotes t) = B.singleton '\'' <> B.fromText t <> B.singleton '\'' instance Minifiable StringType where minify (DoubleQuotes t) = do conf <- ask convertedText <- convertEscapedText t pure $ if T.any ('\"' ==) convertedText then DoubleQuotes t else if shouldNormalizeQuotes conf -- TODO make it so that the best quotes are used for the file then DoubleQuotes convertedText else DoubleQuotes convertedText minify (SingleQuotes t) = do conf <- ask convertedText <- convertEscapedText t pure $ if T.any ('\'' ==) convertedText then SingleQuotes t else if shouldNormalizeQuotes conf && T.all ('\"' /=) convertedText then DoubleQuotes convertedText else SingleQuotes convertedText convertEscapedText :: Text -> Reader Config Text convertEscapedText t = do conf <- ask pure $ if shouldConvertEscaped conf then either (const t) id (A.parseOnly convertEscaped t) else t mapString :: (Text -> Reader Config Text) -> StringType -> Reader Config StringType mapString f (DoubleQuotes t) = DoubleQuotes <$> f t mapString f (SingleQuotes t) = SingleQuotes <$> f t unquoteStringType :: (Text -> Maybe Text) -> StringType -> Either Text StringType unquoteStringType g x@(DoubleQuotes s) = maybe (Right x) Left (g s) unquoteStringType g x@(SingleQuotes s) = maybe (Right x) Left (g s) removeQuotes :: StringType -> Either Text StringType removeQuotes = unquoteStringType (unquote ident) unquoteUrl :: StringType -> Either Text StringType unquoteUrl = unquoteStringType (unquote unquotedURL) unquoteFontFamily :: StringType -> Either Text StringType unquoteFontFamily = unquoteStringType (unquote fontfamilyname) unquote :: Parser Text -> Text -> Maybe Text unquote p s = case parse p s of Done i r -> if T.null i then Just r else Nothing par@Partial{} -> maybeResult (feed par mempty) Fail{} -> Nothing -- TODO can the Parser be avoided by a fold, or one of the provided library -- functions? Apart from being cleaner, doing so would simplify other functions. -- | Parse and convert any escaped unicode to its underlying Char. convertEscaped :: Parser Text convertEscaped = (TL.toStrict . B.toLazyText) <$> go where go = do nonescapedText <- B.fromText <$> A.takeWhile (/= '\\') cont nonescapedText <|> pure nonescapedText cont b = do _ <- A.char '\\' c <- A.peekChar case c of Just _ -> parseEscapedAndContinue b Nothing -> pure (b <> B.singleton '\\') parseEscapedAndContinue :: Builder -> Parser Builder parseEscapedAndContinue b = do u8 <- utf8 ((b <> u8) <>) <$> go utf8 :: Parser Builder utf8 = do mch <- atMost 6 hexadecimal pure $ maybe ("\\" <> B.fromString mch) B.singleton (hexToChar mch) -- Interpret a hexadecimal string as a decimal Int, and convert it into the -- corresponding Char. hexToChar :: [Char] -> Maybe Char hexToChar xs | i > maxChar = Nothing | otherwise = Just (C.chr i) where i = foldl' step 0 xs maxChar = fromEnum (maxBound :: Char) step a c | w - 48 < 10 = (a `shiftL` 4) .|. fromIntegral (w - 48) | w >= 97 = (a `shiftL` 4) .|. fromIntegral (w - 87) | otherwise = (a `shiftL` 4) .|. fromIntegral (w - 55) where w = C.ord c fontfamilyname :: Parser Text fontfamilyname = do i <- ident is <- many (skipComments *> ident) if T.toLower i `elem` invalidNames then mzero else pure $ i <> foldMap (" "<>) is where invalidNames = ["serif", "sans-serif", "monospace", "cursive", "fantasy", "inherit", "initial", "unset", "default"] -- | Parse a URL without enclosing quotes. -- See unquotedURL :: Parser Text unquotedURL = do t <- many (escape <|> (B.singleton <$> A.satisfy validChar)) pure $ TL.toStrict (B.toLazyText (mconcat t)) where validChar x = x /= '\"' && x /= '\'' && x /= '(' && x /= ')' && x /= '\\' && notWhitespace x && notNonprintable x notWhitespace x = x /= '\n' && x /= '\t' && x /= ' ' notNonprintable x = not (C.chr 0 <= x && x <= C.chr 8) && x /= '\t' && not ('\SO' <= x && x <= C.chr 31) && x /= '\DEL'