{-# LANGUAGE FlexibleInstances #-} module Web.Encodings.StringLike ( StringLike (..) ) where import Prelude (Char, Bool (..), String, Int, Eq (..), Show, ($), (.), (<=), (-), otherwise, Maybe (..), (&&), not, elem, (+), toEnum, fromEnum, map, (<), (||), (>=), fmap) import qualified Prelude as P import qualified Data.List as L import qualified Web.Encodings.ListHelper as LH import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Lazy.Char8 as BL import qualified Data.Monoid as M import qualified Data.Text as TS import qualified Data.Text.Lazy as TL import Data.Maybe (fromMaybe) import Data.Bits ((.|.),(.&.),shiftL,shiftR) import Data.Word (Word8) import qualified Data.ByteString class (Eq a, Show a) => StringLike a where span :: (Char -> Bool) -> a -> (a, a) null :: a -> Bool concatMap :: (Char -> String) -> a -> a dropWhile :: (Char -> Bool) -> a -> a break :: (Char -> Bool) -> a -> (a, a) cons :: Char -> a -> a uncons :: a -> Maybe (Char, a) append :: a -> a -> a intercalate :: a -> [a] -> a isPrefixOf :: a -> a -> Bool take :: Int -> a -> a head :: a -> Char tail :: a -> a init :: a -> a last :: a -> Char empty :: a pack :: String -> a unpack :: a -> String packUtf8 :: String -> a unpackUtf8 :: BS.ByteString -> Maybe a dropPrefix :: a -> a -> Maybe a dropPrefix porig sorig = helper porig sorig where helper p s | null p && null s = Just empty | null p = Just s | null s = Nothing | head p == head s = helper (tail p) (tail s) | otherwise = Nothing dropPrefix' :: a -> a -> a dropPrefix' p c = case dropPrefix p c of Just x -> x Nothing -> c dropQuotes :: a -> a dropQuotes s | lengthGE 2 s && head s == '"' && last s == '"' = tail $ init s | otherwise = s chomp :: a -> a chomp s | null s = s chomp s = case last s of '\n' -> chomp $ init s '\r' -> chomp $ init s _ -> s split :: Char -> a -> [a] split c s = let (next, rest) = breakChar c s in if null next then (if null rest then [] else [rest]) else next : split c rest splitOneOf :: [Char] -> a -> [a] splitOneOf cs s = let (next, rest) = breakChars cs s in if null next then (if null rest then [] else [rest]) else next : splitOneOf cs rest breakCharsMaybe :: [Char] -> a -> Maybe (a, a) breakCharsMaybe c s | null s = Nothing | head s `elem` c = Just (empty, tail s) | otherwise = do (next, rest) <- breakCharsMaybe c (tail s) Just (cons (head s) next, rest) breakCharMaybe :: Char -> a -> Maybe (a, a) breakCharMaybe c s | null s = Nothing | c == head s = Just (empty, tail s) | otherwise = do (next, rest) <- breakCharMaybe c (tail s) Just (cons (head s) next, rest) breakChar :: Char -> a -> (a, a) breakChar c s = fromMaybe (s, empty) $ breakCharMaybe c s breakChars :: [Char] -> a -> (a, a) breakChars c s = fromMaybe (s, empty) $ breakCharsMaybe c s breakString :: a -> a -> (a, a) breakString _ c | null c = (empty, empty) breakString p c = case dropPrefix p c of Just x -> (empty, x) Nothing -> let x = head c xs = tail c (next, rest) = breakString p xs in (cons x next, rest) takeLine :: a -> (a, a) takeLine a = let (x, y) = breakChar '\n' a x' = chomp x in (x', y) takeUntilBlank :: a -> ([a], a) takeUntilBlank a = let (next, rest) = takeLine a in if null next then ([], rest) else let (nexts, rest') = takeUntilBlank rest in (next : nexts, rest') lengthLT :: Int -> a -> Bool lengthLT i _ | i <= 0 = False lengthLT i a | null a = True | otherwise = lengthLT (i - 1) $ tail a lengthGE :: Int -> a -> Bool lengthGE i = not . lengthLT i -- | UTF8 encode each character before passing to concatMap, if -- appropriate. concatMapUtf8 :: (Char -> String) -> a -> a instance StringLike [Char] where intercalate = L.intercalate null = P.null concatMap = P.concatMap tail = P.tail head = P.head cons = LH.cons uncons [] = Nothing uncons (x:xs) = Just (x, xs) span = P.span dropWhile = P.dropWhile break = P.break append = M.mappend isPrefixOf = L.isPrefixOf take = P.take empty = M.mempty pack = P.id unpack = P.id packUtf8 = pack unpackUtf8 = utf8Decode . Data.ByteString.unpack init = P.init last = P.last concatMapUtf8 f = concatMap (concatMap f . utf8EncodeChar) instance StringLike BS.ByteString where span = BS.span null = BS.null concatMap f = BS.concatMap $ pack . f dropWhile = BS.dropWhile break = BS.break cons = BS.cons uncons = BS.uncons append = BS.append intercalate = BS.intercalate isPrefixOf = BS.isPrefixOf take = BS.take head = BS.head tail = BS.tail empty = BS.empty pack = BS.pack unpack = BS.unpack packUtf8 = pack . concatMap utf8EncodeChar unpackUtf8 = Just init = BS.init last = BS.last concatMapUtf8 = concatMap instance StringLike BL.ByteString where span = BL.span null = BL.null concatMap f = BL.concatMap $ pack . f dropWhile = BL.dropWhile break = BL.break cons = BL.cons uncons = BL.uncons append = BL.append intercalate = BL.intercalate isPrefixOf = BL.isPrefixOf take i = BL.take $ P.fromIntegral i head = BL.head tail = BL.tail empty = BL.empty pack = BL.pack unpack = BL.unpack packUtf8 = pack . concatMap utf8EncodeChar unpackUtf8 bs = Just $ BL.fromChunks [bs] init = BL.init last = BL.last concatMapUtf8 = concatMap instance StringLike TS.Text where span = TS.spanBy null = TS.null concatMap f = TS.concatMap $ pack . f dropWhile = TS.dropWhile break = TS.breakBy cons = TS.cons uncons = TS.uncons append = TS.append intercalate = TS.intercalate isPrefixOf = TS.isPrefixOf take i = TS.take $ P.fromIntegral i head = TS.head tail = TS.tail empty = TS.empty pack = TS.pack unpack = TS.unpack packUtf8 = pack unpackUtf8 = fmap pack . unpackUtf8 init = TS.init last = TS.last concatMapUtf8 f = concatMap (concatMap f . utf8EncodeChar) instance StringLike TL.Text where span = TL.spanBy null = TL.null concatMap f = TL.concatMap $ pack . f dropWhile = TL.dropWhile break = TL.breakBy cons = TL.cons uncons = TL.uncons append = TL.append intercalate = TL.intercalate isPrefixOf = TL.isPrefixOf take i = TL.take $ P.fromIntegral i head = TL.head tail = TL.tail empty = TL.empty pack = TL.pack unpack = TL.unpack packUtf8 = pack unpackUtf8 = fmap pack . unpackUtf8 init = TL.init last = TL.last concatMapUtf8 f = concatMap (concatMap f . utf8EncodeChar) -- | Code taken from Codec.Binary.UTF8.String.decode utf8Decode :: [Word8] -> Maybe String utf8Decode [] = Just "" utf8Decode (c:cs) | c < 0x80 = do cs' <- utf8Decode cs Just $ toEnum (fromEnum c) : cs' | c < 0xc0 = Nothing | c < 0xe0 = multi1 | c < 0xf0 = multi_byte 2 0xf 0x800 | c < 0xf8 = multi_byte 3 0x7 0x10000 | c < 0xfc = multi_byte 4 0x3 0x200000 | c < 0xfe = multi_byte 5 0x1 0x4000000 | otherwise = Nothing where multi1 = case cs of c1 : ds | c1 .&. 0xc0 == 0x80 -> let d = ((fromEnum c .&. 0x1f) `shiftL` 6) .|. fromEnum (c1 .&. 0x3f) in if d >= 0x000080 then (do ds' <- utf8Decode ds Just $ toEnum d : ds') else Nothing _ -> Nothing multi_byte :: Int -> Word8 -> Int -> Maybe [Char] multi_byte i mask overlong = aux i cs (fromEnum (c .&. mask)) where aux 0 rs acc | overlong <= acc && acc <= 0x10ffff && (acc < 0xd800 || 0xdfff < acc) && (acc < 0xfffe || 0xffff < acc) = do rs' <- utf8Decode rs Just $ toEnum acc : rs' | otherwise = Nothing aux n (r:rs) acc | r .&. 0xc0 == 0x80 = aux (n-1) rs $ shiftL acc 6 .|. fromEnum (r .&. 0x3f) aux _ _rs _ = Nothing utf8EncodeChar :: Char -> String utf8EncodeChar = map toEnum . utf8EncodeInt . fromEnum -- | Code taken from Codec.Binary.UTF8.String.encode utf8EncodeInt :: Int -> [Int] utf8EncodeInt c | c <= 0x7f = [c] | c <= 0x7ff = [ 0xc0 + (c `shiftR` 6) , 0x80 + c .&. 0x3f ] | c <= 0xffff = [ 0xe0 + (c `shiftR` 12) , 0x80 + ((c `shiftR` 6) .&. 0x3f) , 0x80 + c .&. 0x3f ] | otherwise = [ 0xf0 + (c `shiftR` 18) , 0x80 + ((c `shiftR` 12) .&. 0x3f) , 0x80 + ((c `shiftR` 6) .&. 0x3f) , 0x80 + c .&. 0x3f ]