-- "src/Dao/String.hs" provides the fundamental string data type -- called "UStr" which is used throughout the Dao System. -- -- Copyright (C) 2008-2014 Ramin Honary. -- This file is part of the Dao System. -- -- The Dao System is free software: you can redistribute it and/or -- modify it under the terms of the GNU General Public License as -- published by the Free Software Foundation, either version 3 of the -- License, or (at your option) any later version. -- -- The Dao System is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- GNU General Public License for more details. -- -- You should have received a copy of the GNU General Public License -- along with this program (see the file called "LICENSE"). If not, see -- . {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE FlexibleInstances #-} -- | This module has two purposes. Firstly, this module depends on no other module in the Dao -- program, so it may be imported by any other module, and as such it provides the classes and -- functions that must be available to every module. Most of these essential functions are related -- to strings, which is why this module is named so. -- -- Therefore secondly, this module provides the /universal string/ data type 'UStr', and a type -- class 'UStrType' that allows you to declare arbitrary data types to be convertible to and from -- universal strings. Universal strings are built upon the "Data.ByteString.Lazy.UTF8" module in -- the @utf8-string@ package of the Haskell platform. All strings used in the Dao runtime are stored -- as this data type. -- -- /NOTE:/ though this module is absolutely essential to every other module in the Dao system, not -- all data structures should need to instantiate 'UStrType'. By contrst, the -- 'Dao.Interpreter.Structured' (not defined in this module) should be instantiated by nearly all -- data structures, especially if it is necessary to manipulate these structures within the Dao -- programming language. -- -- Dao is a high-level language, like a macro language or a scripting language. One thing -- scripting/meta languages all have in common is the use of strings as a way to store and transmit -- data in a human-readable but structured format; strings are often the universal intermediate code -- of the runtime environment. Data structures can be converted to a string, stored in memory, -- transmitted over a socket or pipe, saved to disk. Data from the disk, the socket, or in memory -- can be parsed to reconstruct the data structures. -- -- However it is my opinion that use of strings as intermediate data structures is very poor design -- in any programming language; it is an anti-pattern. I believe the universal data type should the -- tree rather than the string. Therefore I have provided the "Dao.Tree" and "Dao.Struct" modules, -- and the 'Dao.Interpreter.Structured' type class which expand on the ideas of 'Prelude.Show' and -- 'Prelude.Read' by using a 'Dao.Tree.Tree' as the intermediate data structure, rather than a -- 'Dao.String.UStr'. module Dao.String where import Control.Monad import Control.Monad.State import Control.DeepSeq import Control.Exception (assert) import Data.String import Data.Monoid import Data.Typeable import qualified Data.Binary as B import Data.Bits import Data.Char import Data.List (partition, stripPrefix) import Data.Word import Data.Array.Unboxed import qualified Data.ByteString.Lazy.UTF8 as U import qualified Data.ByteString.Lazy as B import qualified Codec.Binary.UTF8.String as UTF8 import Numeric -- Necessary for the HasNullValue instances import Data.Int import Data.Ratio import Data.Complex import Data.Time.Clock import qualified Data.IntMap as IM import qualified Data.Map as M import qualified Data.Set as S -- | Objects which can be used as a predicate testing whether or not the object is null, or of a -- default value, should instantiate this class. class HasNullValue a where { nullValue :: a; testNull :: a -> Bool; } instance HasNullValue () where { nullValue = (); testNull () = True; } instance HasNullValue UStr where { nullValue = mempty; testNull = (==mempty); } instance HasNullValue [a] where { nullValue = []; testNull = null; } instance HasNullValue Char where { nullValue = '\0'; testNull = (==nullValue); } instance HasNullValue Int where { nullValue = 0; testNull = (==nullValue); } instance HasNullValue Int64 where { nullValue = 0; testNull = (==nullValue); } instance HasNullValue Word where { nullValue = 0; testNull = (==nullValue); } instance HasNullValue Word64 where { nullValue = 0; testNull = (==nullValue); } instance HasNullValue Double where { nullValue = 0; testNull = (==nullValue); } instance HasNullValue Integer where { nullValue = 0; testNull = (==nullValue); } instance HasNullValue (Ratio Integer) where { nullValue = 0%1; testNull = (==nullValue); } instance HasNullValue (Complex Double) where { nullValue = 0:+0; testNull = (==nullValue); } instance HasNullValue NominalDiffTime where { nullValue = fromRational 0; testNull = (==nullValue); } instance HasNullValue (IM.IntMap a) where { nullValue = IM.empty; testNull = IM.null } instance HasNullValue (M.Map k a) where { nullValue = M.empty; testNull = M.null } instance HasNullValue (S.Set a) where { nullValue = S.empty; testNull = S.null } instance HasNullValue U.ByteString where { nullValue = mempty; testNull = (==mempty); } -- | This is the /universal string/ type. It is a @newtype@ wrapper around -- 'Data.ByteString.Lazy.UTF8.ByteString', but has an API that is used throughout the Dao system. -- There is serious consideration to replace this module with "Data.Text", but even if that happens, -- this module will be kept to provide a stable API to the string package upon which it is built. newtype UStr = UStr { toUTF8ByteString :: U.ByteString } deriving (Eq, Ord, Typeable) instance Monoid UStr where { mempty = toUStr ""; mappend a b = toUStr (uchars a ++ uchars b); } -- | To provide intermediate string representations of data structures is one of the purposes of -- 'Prelude.Show' and 'Prelude.Read' in the Haskell language. In Haskell, 'Prelude.Read' and -- 'Prelude.Show' must, by convention, output a string that can be converted back to an exactly -- equivalent data structure to the structure that produced the output when the output string is -- parsed by 'Prelude.Read'. In other words @read (show a) == a && show (read a) == a@ should -- evaluate to 'Prelude.True' for any type of @a@. -- -- This is not merely a convention for the 'UStrType', it is a requirement. The minimal complete -- definition is 'toUStr' and one or both of 'fromUStr' and 'maybeFromUStr'. The 'nil' function is -- part of the minimal complete definition, except when your data type is also an instance of -- 'Data.Monoid.Monoid'. If your data type is also a 'Data.Monoid.Monoid', then the default instance -- of 'nil' is 'Data.monoid.mempty'. -- -- Another big difference between 'UStrType' and 'Prelude.Show'/'Prelude.Read' is that 'UStrType' is -- not intented to be used to construct parsers, it is used as an abstract interface to a parser. -- 'Prelude.Read' provides 'Prelude.lex' for taking a lexeme from the head of the input, -- 'Prelude.readParen' for parsing items from within parentheses, and 'readsPrec' which -- parameterizes the current precedence value and allows you to backtrack if a lexeme has a lower -- prescedence. All of this functionality (and more) is provided in the "Dao.Parser" -- module, it is not provided here in the 'UStrType'. -- -- When instantiating this class, you will may find the 'uchars' and 'ustr' to be useful if parsing -- strings is necessary. If you want to use 'Prelude.Show' to instantiate 'toUStr', you can simply -- use 'derive_toUStr' and 'derive_fromUStr'. The 'uchars' function is used to convert any -- 'UStrType' to a 'Prelude.String' by first converting the 'UStrType' to a 'UStr', and 'ustr' is -- does the inverse, however 'UStr' also instantiates 'UStrType' /so the way to convert a 'UStr' to -- a 'Prelude.String' is to use 'uchars', the way to convert a 'Prelude.String' to a 'UStr' is to -- use 'ustr'/. class UStrType a where -- | Like 'Prelude.Show.show', converts your data type to a universal string. toUStr :: a -> UStr -- | Like 'Prelude.read', constructs your data type from a universal string. fromUStr :: UStr -> a fromUStr str = maybe (error ("cannot construct data from UStr "++show str)) id (maybeFromUStr str) -- | Like 'Prelude.reads' except the entire string must be consumed, and the return type is a -- 'Prelude.Maybe' instead of a list. The return type here is not similar to -- 'Prelude.ReadS' which is a synonym for @'Prelude.String' -> [(a, 'Prelude.String')]@ a pair -- containing the read object and the remainder. maybeFromUStr :: UStr -> Maybe a maybeFromUStr = Just . fromUStr nil :: a nil = fromUStr mempty instance UStrType UStr where { toUStr = id; fromUStr = id; } instance UStrType String where toUStr = UStr . U.fromString fromUStr = U.toString . toUTF8ByteString instance UStrType U.ByteString where toUStr = UStr fromUStr = toUTF8ByteString maybeFromUStr = Just . fromUStr -- | This function lets you use the instantiation of 'Prelude.Show' to instantiate 'toUStr', -- typically used when your data type uses Haskell's @deriving@ keyword to derive 'Prelude.Show'. -- Note that this function also requires you to instantiate 'Prelude.Read' (also, perhaps, by the -- @deriving@ keyword), because although this function does not use any of the 'Prelude.Read' -- functions, this requirement emphasizes the importance of 'UStr' being a data structure that is -- used to store an intermediate representation of structured data. derive_ustr :: (Enum a, Read a, Show a) => a -> UStr derive_ustr = toUStr . show -- | This function lets you use the instantiation of 'Prelude.Read' to instantiate 'toUStr', -- typically used when your data type uses Haskell's @deriving@ keyword to derive 'Prelude.Read'. -- Note that this function also requires you to instantiate 'Prelude.Show' (also, perhaps, by the -- @deriving@ keyword), because although this function does not use any of the 'Prelude.Show' -- functions, this requirement emphasizes the importance of 'UStr' being a data structure that is -- used to store an intermediate representation of structured data. derive_fromUStr :: (Enum a, Read a, Show a) => UStr -> a derive_fromUStr = read . uchars -- | This function lets you use the instantiation of 'Prelude.Read' to instantiate 'toUStr', -- typically used when your data type uses Haskell's @deriving@ keyword to derive 'Prelude.Read'. -- Note that this function also requires you to instantiate 'Prelude.Show' (also, perhaps, by the -- @deriving@ keyword), because although this function does not use any of the 'Prelude.Show' -- functions, this requirement emphasizes the importance of 'UStr' being a data structure that is -- used to store an intermediate representation of structured data. derive_maybeFromUStr :: (Enum a, Read a, Show a) => UStr -> Maybe a derive_maybeFromUStr u = case reads (uchars u) of [(a, "")] -> Just a _ -> Nothing -- | Convert a 'Prelude.String' to an object classed as a 'UStrType' by first converting it to a -- 'UStr' using 'toUStr'. /NOTE:/ this is the function you use to convert a 'Prelude.String' to a -- 'UStr', and for the 'UStr' type, this function never fails (never evaluates to the "bottom" -- value). ustr :: UStrType str => String -> str ustr = fromUStr . toUStr -- | Convert a 'Prelude.String' to an object classed as a 'UStrType' by first converting it to a -- 'UStr' using 'toUStr', but uses 'maybeFromUStr' to convert from the 'UStrType' object. /NOTE:/ -- this is the function you use to convert a 'Prelude.String' to a 'UStr' (this is possible because -- 'UStr' instantiates 'UStrType'), and for the 'UStr' type, this function never evaluates to -- 'Prelude.Nothing'. maybeUStr :: UStrType str => String -> Maybe str maybeUStr = maybeFromUStr . toUStr -- | Convert an object classed as a 'UStrType' to a 'Prelude.String'. /NOTE:/ this is the function -- you should use to convert a 'UStr' to a 'Prelude.String' (this is possible because 'UStr' -- instantiates 'UStrType'). uchars :: UStrType str => str -> String uchars = U.toString . toUTF8ByteString . toUStr -- | Convert an object classed as a 'UStrType' to a @['Data.Word.Word8']@ list. Since 'UStr's store -- data as UTF-8 encoded strings, this function simply returns the UTF-8 formatted octet stream from -- that the 'Data.ByteString.Lazy.UTF8.ByteString' data structure. Of course, unless your 'UStrType' -- is simply a @newtype@ of 'UStr' a conversion to a 'UStr' is done behind the scenes, which will -- transparently encode a UTF8 string. utf8bytes :: UStrType str => str -> [Word8] utf8bytes = UTF8.encode . uchars . toUStr -- | The inverse of 'utf8bytes', tries to decode a stream of octets into a properly formatted UTF-8 -- 'Data.ByteString.Lazy.UTF8.ByteString'. If encoding fails, this function evaluates to -- 'Prelude.error' (evaluates to the "bottom" value). upack :: [Word8] -> UStr upack ax = toUStr (UTF8.decode ax) -- | Modify a 'UStr' with the APIs provided in the "Data.ByteString.UTF8.Lazy" module. fmapUTF8String :: (U.ByteString -> U.ByteString) -> UStr -> UStr fmapUTF8String f = UStr . f . toUTF8ByteString -- | Split a longer string up by the shorter string, for example: -- > splitString (ustr "--") (one-one -- two-two -----> three-three -- four-four") -- will be split to -- > ["one-one ", " two-two ", "", "-> three-three ", " four-four"] splitString :: UStr -> UStr -> [UStr] splitString a b = case compare la lb of EQ -> return $ if a==b then nil else a LT -> delstr a b GT -> delstr b a where la = ulength a lb = ulength b len = min la lb loop ox str i ax bx = if i>0 then case stripPrefix ax bx of Nothing -> loop ox (head bx : str) (i-1) ax (tail bx) Just bx -> loop (ustr (reverse str) : ox) "" (i-len) ax bx else reverse $ ustr (reverse str ++ bx) : ox delstr a b = loop [] "" (abs $ lb-la) (uchars a) (uchars b) ---------------------------------------------------------------------------------------------------- -- | A Variable-Length Integer (VLI) encoder. The bits of a variable-length integer will have a -- format like so: -- > bit column number: 7 6543210 -- > --------- -- > 1st highest order byte: 1 XXXXXXX -- > 2nd highest order byte: 1 XXXXXXX -- > 3rd highest order byte: 1 XXXXXXX -- > ... -- > lowest order byte : 0 XXXXXXX -- If the highest-order bit is a one, it indicates there are more bytes to follow. If the highest -- order bit is 0, then there are no more bytes. The 7 lower-order bits will be concatenated in -- /big-endian order/ to form the length value for the string. By this method, most all strings -- will have a length prefix of only one or two bytes. vlIntegralToWord8s :: (Integral a, Bits a) => a -> [Word8] vlIntegralToWord8s = reverse . (\ (a:ax) -> (a .&. 0x7F) : ax) . fix (\loop w -> let v = 0x80 .|. fromIntegral (w .&. 0x7F) in case shiftR w 7 of{ 0 -> [v]; w -> v : loop w; }) -- | Inverse operation of 'bitsToVLI' vlWord8sToIntegral :: (Integral a, Bits a) => [Word8] -> (a, [Word8]) vlWord8sToIntegral = loop 0 where fn a w = shiftL a 7 .|. fromIntegral (w .&. 0x7F) loop a wx = case wx of [] -> (a, []) w:wx -> if w .&. 0x80 == 0 then (fn a w, wx) else loop (fn a w) wx -- | Since a negative number expressed in a 'Prelude.Integer' type translates to an infinite -- sequence of 0xFF bytes when converting it to a VLI, it needs to be encoded specially with a -- negation bit in the very first position. vlIntegerToWord8s :: Integer -> [Word8] vlIntegerToWord8s w = reverse $ (\ (b:bx) -> (if w<0 then b .|. 0x40 else b):bx) $ loop (abs w) where loop w = fromInteger (w .&. 0x3F) : fix (\loop w -> case w of 0 -> [] w -> (0x80 .|. fromInteger (w .&. 0x7F)) : loop (shiftR w 7) ) (shiftR w 6) vlWord8sToInteger :: [Word8] -> (Integer, [Word8]) vlWord8sToInteger = loop 0 where fn s m a w = shiftL a s .|. fromIntegral (w .&. m) loop a wx = case wx of [] -> (a, []) w:wx -> if w .&. 0x80 == 0 then ((if w .&. 0x40 == 0 then id else negate) $ fn 6 0x3F a w, wx) else loop (fn 7 0x7F a w) wx -- | When reading from a binary file, gather the bits of a Variable-Length Integer. vlGatherWord8s :: B.Get [Word8] vlGatherWord8s = loop [] where loop wx = B.getWord8 >>= \w -> if w .&. 0x80 == 0 then return (wx++[w]) else loop (wx++[w]) -- | Encode only positive 'Prelude.Integer's. This differs from 'vlPutInteger' in that the sign of -- the integer is not stored in the byte stream, saving a single bit of space. This can actually -- simplify some equations that expect an VLInteger to be encoded as a multiple-of-7 length string -- of bits as you don't need to make additional rules for the final byte which would only have -- 6-bits if the sign is stored with it. vlPutPosInteger :: Integer -> B.Put vlPutPosInteger i = assert (i>=0) $ mapM_ B.putWord8 $ vlIntegralToWord8s $ i -- | Decode only positive 'Prelude.Integer's. This differs from 'vlPutInteger' in that the sign of -- the integer is not stored in the byte stream, saving a single bit of space. This can actually -- simplify some equations that expect an VLInteger to be encoded as a multiple-of-7 length string -- of bits as you don't need to make additional rules for the final byte which only have 6-bits if -- the sign is stored with it. vlGetPosInteger :: B.Get Integer vlGetPosInteger = fmap (fst . vlWord8sToIntegral) vlGatherWord8s -- | Encode a positive or negative 'Prelude.Integer' using 'vlWord8sToInteger'. The sign of the integer -- is stored in the final byte in the list of encoded bytes, so the final encoded byte only has 6 -- bits of information, rather than 7 in the case of positive integers. vlPutInteger :: Integer -> B.Put vlPutInteger = mapM_ B.putWord8 . vlIntegerToWord8s -- | Decode a positive or negative 'Prelude.Integer' using 'vlWord8sToInteger'. The sign of the integer -- is stored in the final byte in the list of encoded bytes, so the final encoded byte only has 6 -- bits of information, rather than 7 in the case of positive integers. vlGetInteger :: B.Get Integer vlGetInteger = fmap (fst . vlWord8sToInteger) vlGatherWord8s -- | Return the length of the 'UStr'. ulength :: UStr -> Int ulength = U.length . toUTF8ByteString -- | Length of a list, but unlike 'Data.List.length', allows a polymorphic length type. iLength :: Num len => [a] -> len iLength = foldl (+) 0 . map (const 1) -- | Used to encode a 'UStr' data type without any prefix at all. The instantiation of 'UStr' into -- the 'Data.Binary.Binary' class places a prefix before every 'UStr' as it is serialized, allowing -- it to be used more safely in more complex data types. encodeUStr :: UStr -> B.Put encodeUStr u = mapM_ B.putWord8 $ vlIntegralToWord8s (U.length (toUTF8ByteString u)) ++ (UTF8.encode (uchars u)) -- | Used to decode a 'UStr' data type without any prefix. The instantiation of 'UStr' into the -- 'Data.Binary.Binary' class places a prefix before every 'UStr' as it is serialized, allowing it -- to be used more safely in more complex data types. decodeUStr :: B.Get UStr decodeUStr = do (strlen, undecoded) <- fmap vlWord8sToIntegral vlGatherWord8s if null undecoded then fmap (toUStr . (UTF8.decode)) (replicateM strlen B.getWord8) else fail "binary data decoder failed on UStr" ---------------------------------------------------------------------------------------------------- bytesBitArith :: (Word8 -> Word8 -> Word8) -> B.ByteString -> B.ByteString -> B.ByteString bytesBitArith f a b = B.pack $ map (uncurry f) $ B.zip a b bytesShift :: B.ByteString -> Int64 -> B.ByteString bytesShift str i = let (len, r) = fmap fromIntegral (divMod (abs i) 8) in case compare i 0 of EQ -> str -- identity LT -> mappend (B.replicate len 0) $ if r==0 then str else -- shift right snd $ B.mapAccumL (\prev b -> (b, shiftR b r .|. rotateR ((2^r-1) .&. prev) r)) 0 str GT -> -- shift left (if r==0 then id else snd . B.mapAccumR (\prev b -> (b, shiftL b r .|. ((2^r-1) .&. prev))) 0 ) (B.drop len str) bytesRotate :: B.ByteString -> Int64 -> B.ByteString bytesRotate str i = let { len = B.length str; (cur8, r) = fmap fromIntegral (divMod i (len*8)); cur = div cur8 8; } in if r==0 then str else let { (a, b) = B.splitAt cur str; str = b<>a; } in snd $ B.mapAccumL (\prev b -> (b, shiftL b r .|. ((2^r-1) .&. prev))) (B.last str) str bytesBit :: Int64 -> B.ByteString bytesBit i = let (len, r) = fmap fromIntegral (divMod i 8) in if r==0 then B.snoc (B.replicate len 0) 1 else B.snoc (B.replicate (len-1) 0) (bit r) bytesTestBit :: B.ByteString -> Int64 -> Bool bytesTestBit str i = let (len, r) = fmap fromIntegral (divMod i 8) in testBit (B.index str len) r bytesBitSize :: B.ByteString -> Int64 bytesBitSize = (8*) . B.length bytesPopCount :: B.ByteString -> Int64 bytesPopCount = B.foldl (\count b -> count + fromIntegral (popCount b)) 0 ---------------------------------------------------------------------------------------------------- instance IsString UStr where { fromString = ustr } instance Read UStr where { readsPrec n str = map (\ (s, rem) -> (toUStr (s::String), rem)) $ readsPrec n str } instance Show UStr where { show u = show (uchars u) } instance B.Binary UStr where put u = encodeUStr u get = decodeUStr instance NFData UStr where { rnf (UStr a) = deepseq a () } -- | A type synonym for 'UStr' used where a string is used as some kind of identifier. newtype Name = Name { nameToUStr :: UStr } deriving (Eq, Ord, Typeable) instance Monoid Name where { mempty = nil; mappend (Name a) (Name b) = Name (a<>b); } instance Show Name where { show = show . nameToUStr } instance UStrType Name where toUStr = nameToUStr maybeFromUStr nm = let str = uchars nm ck f c = c=='_' || f c in case str of c:cx | ck isAlpha c || and (fmap (ck isAlphaNum) cx) -> Just (Name nm) _ -> Nothing fromUStr str = maybe (error msg) id $ maybeFromUStr str where msg = "'Dao.String.Name' object must be constructed from alpha-numeric and underscore characters only:\n" ++ take 256 (uchars str) instance IsString Name where { fromString = ustr } instance B.Binary Name where put (Name u) = encodeUStr u get = decodeUStr >>= \u -> case maybeFromUStr u of Just nm -> return nm Nothing -> fail "binary data contains invalid 'Dao.String.Name' object" instance NFData Name where { rnf (Name a) = deepseq a () } -- | A type synonym for 'UStr' used where a string is storing a file path or URL. type UPath = UStr ---------------------------------------------------------------------------------------------------- -- | Breaks a long list into a list of lists no longer than the specified length. breakInto :: Int -> [a] -> [[a]] breakInto i bx = if null bx then [] else let (grp, bx') = splitAt i bx in grp : breakInto i bx' -- | An array mapping 6-bit values to base-64 character symbols base64Symbols :: UArray Word Char base64Symbols = listArray (0,63) (['A'..'Z']++['a'..'z']++['0'..'9']++"+/") -- | Encoding arbitrary bytes in a 'Data.ByteString.Lazy.ByteString' to base-64 character symbols -- according to RFC 3548. b64Encode :: B.ByteString -> [[Char]] b64Encode = breakInto 76 . concatMap enc . breakInto 3 . B.unpack where windows = [(0xFC0000, 18), (0x03F000, 12), (0x000FC0, 6), (0x00003F, 0)] enc [] = [] enc bx = let len = length bx buf = foldl (\buf b -> shiftL buf 8 .|. fromIntegral b) 0 (take 3 (bx++replicate (3-len) 0)) in take 4 $ (++"==") $ take (len+1) $ flip map windows $ \ (mask, shft) -> base64Symbols ! shiftR (mask.&.buf) shft -- | An array mapping base-64 character symbols to their 6-bit values. base64Values :: UArray Char Int base64Values = array ('+', 'z') $ concat $ [ zip ['+', 'z'] (repeat 0xAAAAAAA) -- 0xAAAAAAA is the undefined value , zip ['A'..'Z'] [0..25] , zip ['a'..'z'] [26..51] , zip ['0'..'9'] [52..61] , [('+', 62), ('/', 63), ('=', 0xFFFFFFF)] -- 0xFFFFFFF is the end-of-input value ] -- | Decoding base-64 character symbols according to RFC 3548 into a string of bytes stored in a -- 'Data.ByteString.Lazy.ByteString'. If decoding fails, the invalid character and it's position in -- the input string are returned as a pair in a 'Data.Either.Left' value, otherwise the -- 'Data.ByteString.Lazy.ByteString' is returned as the 'Data.Either.Right' value. b64Decode :: [Char] -> Either (Char, Word64) B.ByteString b64Decode = loop 0 [] . breakInto 4 . filter (flip notElem " \t\r\n\v\f\0") where loop i bx cxx = case cxx of [] -> Right (B.pack bx) cx:cxx -> case sum 0 0 i cx of Left (c, i) -> Left (c, i) Right (i, bx') -> loop i (bx++bx') cxx sum tk b i cx = case cx of [] -> Right (i, take (3-tk) (splitup b)) c:cx -> if inRange (bounds base64Values) c then case base64Values!c of 0xAAAAAAA -> Left (c, i) 0xFFFFFFF -> sum (tk+1) (shiftL b 6) (i+1) cx c -> sum tk (shiftL b 6 .|. c) (i+1) cx else Left (c, i) splitup b = map fromIntegral [shiftR (b.&.0xFF0000) 16, shiftR (b.&.0xFF00) 8, b.&.0xFF] newtype Base64String = Base64String B.ByteString deriving Typeable instance Show Base64String where { show (Base64String s) = unlines (b64Encode s) } instance Read Base64String where readsPrec _ str = case partition (\c -> isSpace c || (inRange (bounds base64Values) c && base64Values!c /= 0xAAAAAAA)) str of ("" , _ ) -> [] (str, rem) -> case b64Decode $ filter (not . isSpace) str of Left (ch, pos) -> error ("invalid charcter "++show ch++" at index "++show pos++" in base64-encoded string") Right u -> [(Base64String u, rem)] newtype Base16String = Base16String B.ByteString deriving Typeable instance Show Base16String where show (Base16String u) = unlines $ fmap (unwords . fmap hex) $ breakInto 32 (B.unpack u) where hex b = [arr ! (shiftR (b.&.0xF0) 4), arr ! (b.&.0x0F)] arr :: Array Word8 Char arr = array (0,15) (zip [0..15] "0123456789ABCDEF") showHex :: (Show i, Integral i) => i -> String showHex = ("0x"++) . map toUpper . flip Numeric.showHex "" showOrdinal :: (Show n, Integral n) => n -> String showOrdinal n = show n ++ case mod n 100 of n | 11<=n||n<=13 -> "th" n -> case mod n 10 of { 1 -> "st"; 2 -> "nd"; 3 -> "rd"; _ -> "th"; } ---------------------------------------------------------------------------------------------------- -- | This is a simlpe string tokenizer for breaking up strings into tokens that can be easily used -- in rules in Doa scripts. simpleTokenizer :: String -> [String] simpleTokenizer = fix $ \loop cx -> case cx of "" -> [] c:cx -> maybe ([c] : loop cx) (\ (cx, rem) -> cx : loop rem) $ foldl (\f split -> mplus f (split (c:cx))) Nothing $ concat $ [ fmap (\predicate (c:cx) -> guard (predicate c) >> Just (span predicate (c:cx))) $ [isSpace, isNumber, isAlpha] , [\ (c:cx) -> guard (elem c "([{<>}])") >> Just ([c], cx)] , [\ (c:cx) -> Just $ span (==c) (c:cx)] ]