module Web.CSS.Escaping
       ( escapeIdentifier
       , escapeString
       ) where

import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as TLB
import qualified Data.Text.Lazy.Builder.Int as TLBI

-- | Format a character as a hex escape.
escapeChar :: Char -> TL.Text
escapeChar =
  TL.cons '\\' .
  TL.justifyRight 6 '0' .
  TLB.toLazyText .
  TLBI.hexadecimal .
  fromEnum

-- | Escape a CSS identifier. This function is slightly more
-- permissive than the CSS standard. For example, it does not reject
-- identifiers that begin with two hyphens. The function always
-- escapes 'special' characters such as the tilde (@~@) or left
-- bracket (@[@). As such, they will never be interpreted as special
-- characters.
escapeIdentifier :: TL.Text -> TL.Text
escapeIdentifier = TL.concatMap escape
  where
    escape c | c >= 'a' && c <= 'z' = TL.singleton c     -- No escaping
    escape c | c >= 'A' && c <= 'Z' = TL.singleton c     -- No escaping
    escape c | c >= '0' && c <= '9' = TL.singleton c     -- No escaping
    escape c @ '_' = TL.singleton c                      -- No escaping
    escape c @ '-' = TL.singleton c                      -- No escaping
    escape c | c >= ' ' && c <= '~' = TL.pack ['\\', c]  -- Simple escapes
    escape c = escapeChar c                              -- General escape

-- | Escape a CSS string value. This function is conservative and
-- produces only output characters in the US-ASCII range. This comes
-- at the cost of space usage, so this function should not be used to
-- encode strings expected to contain a disproportionate amount of
-- non-US-ASCII characters.
escapeString :: TL.Text -> TL.Text
escapeString s =
  TL.concat [ TL.pack "\""
            , TL.concatMap escape s
            , TL.pack "\""
            ]
    where
      escape c | c < ' ' = escapeChar c             -- Control characters
      escape c | c > '~' = escapeChar c             -- Outside US-ASCII.
      escape c @ '"' = TL.pack ['\\', c]            -- Quotes
      escape c = TL.singleton c                     -- Non-quote, US-ASCII.