module Web.Encodings.StringLike
( StringLike (..)
) where
import Prelude (Char, Bool (..), String, Int, Eq (..), Show, ($), (.),
(<=), (), otherwise, Maybe (..), (&&), not, elem, (+),
toEnum, fromEnum, map, (<), (||), (>=), fmap)
import qualified Prelude as P
import qualified Data.List as L
import qualified Web.Encodings.ListHelper as LH
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy.Char8 as BL
import qualified Data.Monoid as M
import qualified Data.Text as TS
import qualified Data.Text.Lazy as TL
import Data.Maybe (fromMaybe)
import Data.Bits ((.|.),(.&.),shiftL,shiftR)
import Data.Word (Word8)
import qualified Data.ByteString
class (Eq a, Show a) => StringLike a where
span :: (Char -> Bool) -> a -> (a, a)
null :: a -> Bool
concatMap :: (Char -> String) -> a -> a
dropWhile :: (Char -> Bool) -> a -> a
break :: (Char -> Bool) -> a -> (a, a)
cons :: Char -> a -> a
uncons :: a -> Maybe (Char, a)
append :: a -> a -> a
intercalate :: a -> [a] -> a
isPrefixOf :: a -> a -> Bool
take :: Int -> a -> a
head :: a -> Char
tail :: a -> a
init :: a -> a
last :: a -> Char
empty :: a
pack :: String -> a
unpack :: a -> String
packUtf8 :: String -> a
unpackUtf8 :: BS.ByteString -> Maybe a
dropPrefix :: a -> a -> Maybe a
dropPrefix porig sorig = helper porig sorig where
helper p s
| null p && null s = Just empty
| null p = Just s
| null s = Nothing
| head p == head s = helper (tail p) (tail s)
| otherwise = Nothing
dropPrefix' :: a -> a -> a
dropPrefix' p c = case dropPrefix p c of
Just x -> x
Nothing -> c
dropQuotes :: a -> a
dropQuotes s
| lengthGE 2 s && head s == '"' && last s == '"' = tail $ init s
| otherwise = s
chomp :: a -> a
chomp s | null s = s
chomp s = case last s of
'\n' -> chomp $ init s
'\r' -> chomp $ init s
_ -> s
split :: Char -> a -> [a]
split c s =
let (next, rest) = breakChar c s
in if null next
then (if null rest then [] else [rest])
else next : split c rest
splitOneOf :: [Char] -> a -> [a]
splitOneOf cs s =
let (next, rest) = breakChars cs s
in if null next
then (if null rest then [] else [rest])
else next : splitOneOf cs rest
breakCharsMaybe :: [Char] -> a -> Maybe (a, a)
breakCharsMaybe c s
| null s = Nothing
| head s `elem` c = Just (empty, tail s)
| otherwise = do
(next, rest) <- breakCharsMaybe c (tail s)
Just (cons (head s) next, rest)
breakCharMaybe :: Char -> a -> Maybe (a, a)
breakCharMaybe c s
| null s = Nothing
| c == head s = Just (empty, tail s)
| otherwise = do
(next, rest) <- breakCharMaybe c (tail s)
Just (cons (head s) next, rest)
breakChar :: Char -> a -> (a, a)
breakChar c s = fromMaybe (s, empty) $ breakCharMaybe c s
breakChars :: [Char] -> a -> (a, a)
breakChars c s = fromMaybe (s, empty) $ breakCharsMaybe c s
breakString :: a -> a -> (a, a)
breakString _ c | null c = (empty, empty)
breakString p c = case dropPrefix p c of
Just x -> (empty, x)
Nothing ->
let x = head c
xs = tail c
(next, rest) = breakString p xs
in (cons x next, rest)
takeLine :: a -> (a, a)
takeLine a =
let (x, y) = breakChar '\n' a
x' = chomp x
in (x', y)
takeUntilBlank :: a -> ([a], a)
takeUntilBlank a =
let (next, rest) = takeLine a
in if null next
then ([], rest)
else let (nexts, rest') = takeUntilBlank rest
in (next : nexts, rest')
lengthLT :: Int -> a -> Bool
lengthLT i _ | i <= 0 = False
lengthLT i a
| null a = True
| otherwise = lengthLT (i 1) $ tail a
lengthGE :: Int -> a -> Bool
lengthGE i = not . lengthLT i
concatMapUtf8 :: (Char -> String) -> a -> a
instance StringLike [Char] where
intercalate = L.intercalate
null = P.null
concatMap = P.concatMap
tail = P.tail
head = P.head
cons = LH.cons
uncons [] = Nothing
uncons (x:xs) = Just (x, xs)
span = P.span
dropWhile = P.dropWhile
break = P.break
append = M.mappend
isPrefixOf = L.isPrefixOf
take = P.take
empty = M.mempty
pack = P.id
unpack = P.id
packUtf8 = pack
unpackUtf8 = utf8Decode . Data.ByteString.unpack
init = P.init
last = P.last
concatMapUtf8 f = concatMap (concatMap f . utf8EncodeChar)
instance StringLike BS.ByteString where
span = BS.span
null = BS.null
concatMap f = BS.concatMap $ pack . f
dropWhile = BS.dropWhile
break = BS.break
cons = BS.cons
uncons = BS.uncons
append = BS.append
intercalate = BS.intercalate
isPrefixOf = BS.isPrefixOf
take = BS.take
head = BS.head
tail = BS.tail
empty = BS.empty
pack = BS.pack
unpack = BS.unpack
packUtf8 = pack . concatMap utf8EncodeChar
unpackUtf8 = Just
init = BS.init
last = BS.last
concatMapUtf8 = concatMap
instance StringLike BL.ByteString where
span = BL.span
null = BL.null
concatMap f = BL.concatMap $ pack . f
dropWhile = BL.dropWhile
break = BL.break
cons = BL.cons
uncons = BL.uncons
append = BL.append
intercalate = BL.intercalate
isPrefixOf = BL.isPrefixOf
take i = BL.take $ P.fromIntegral i
head = BL.head
tail = BL.tail
empty = BL.empty
pack = BL.pack
unpack = BL.unpack
packUtf8 = pack . concatMap utf8EncodeChar
unpackUtf8 bs = Just $ BL.fromChunks [bs]
init = BL.init
last = BL.last
concatMapUtf8 = concatMap
instance StringLike TS.Text where
span = TS.spanBy
null = TS.null
concatMap f = TS.concatMap $ pack . f
dropWhile = TS.dropWhile
break = TS.breakBy
cons = TS.cons
uncons = TS.uncons
append = TS.append
intercalate = TS.intercalate
isPrefixOf = TS.isPrefixOf
take i = TS.take $ P.fromIntegral i
head = TS.head
tail = TS.tail
empty = TS.empty
pack = TS.pack
unpack = TS.unpack
packUtf8 = pack
unpackUtf8 = fmap pack . unpackUtf8
init = TS.init
last = TS.last
concatMapUtf8 f = concatMap (concatMap f . utf8EncodeChar)
instance StringLike TL.Text where
span = TL.spanBy
null = TL.null
concatMap f = TL.concatMap $ pack . f
dropWhile = TL.dropWhile
break = TL.breakBy
cons = TL.cons
uncons = TL.uncons
append = TL.append
intercalate = TL.intercalate
isPrefixOf = TL.isPrefixOf
take i = TL.take $ P.fromIntegral i
head = TL.head
tail = TL.tail
empty = TL.empty
pack = TL.pack
unpack = TL.unpack
packUtf8 = pack
unpackUtf8 = fmap pack . unpackUtf8
init = TL.init
last = TL.last
concatMapUtf8 f = concatMap (concatMap f . utf8EncodeChar)
utf8Decode :: [Word8] -> Maybe String
utf8Decode [] = Just ""
utf8Decode (c:cs)
| c < 0x80 = do cs' <- utf8Decode cs
Just $ toEnum (fromEnum c) : cs'
| c < 0xc0 = Nothing
| c < 0xe0 = multi1
| c < 0xf0 = multi_byte 2 0xf 0x800
| c < 0xf8 = multi_byte 3 0x7 0x10000
| c < 0xfc = multi_byte 4 0x3 0x200000
| c < 0xfe = multi_byte 5 0x1 0x4000000
| otherwise = Nothing
where
multi1 = case cs of
c1 : ds | c1 .&. 0xc0 == 0x80 ->
let d = ((fromEnum c .&. 0x1f) `shiftL` 6) .|. fromEnum (c1 .&. 0x3f)
in if d >= 0x000080 then (do ds' <- utf8Decode ds
Just $ toEnum d : ds')
else Nothing
_ -> Nothing
multi_byte :: Int -> Word8 -> Int -> Maybe [Char]
multi_byte i mask overlong = aux i cs (fromEnum (c .&. mask))
where
aux 0 rs acc
| overlong <= acc && acc <= 0x10ffff &&
(acc < 0xd800 || 0xdfff < acc) &&
(acc < 0xfffe || 0xffff < acc) = do
rs' <- utf8Decode rs
Just $ toEnum acc : rs'
| otherwise = Nothing
aux n (r:rs) acc
| r .&. 0xc0 == 0x80 = aux (n1) rs
$ shiftL acc 6 .|. fromEnum (r .&. 0x3f)
aux _ _rs _ = Nothing
utf8EncodeChar :: Char -> String
utf8EncodeChar = map toEnum . utf8EncodeInt . fromEnum
utf8EncodeInt :: Int -> [Int]
utf8EncodeInt c
| c <= 0x7f = [c]
| c <= 0x7ff = [ 0xc0 + (c `shiftR` 6)
, 0x80 + c .&. 0x3f
]
| c <= 0xffff = [ 0xe0 + (c `shiftR` 12)
, 0x80 + ((c `shiftR` 6) .&. 0x3f)
, 0x80 + c .&. 0x3f
]
| otherwise = [ 0xf0 + (c `shiftR` 18)
, 0x80 + ((c `shiftR` 12) .&. 0x3f)
, 0x80 + ((c `shiftR` 6) .&. 0x3f)
, 0x80 + c .&. 0x3f
]