module Data.Repa.Convert.Format.String ( -- * Haskell Strings FixChars (..) , VarChars (..) , VarCharString (..) , ExactChars (..) , unpackCharList) where import Data.Repa.Convert.Internal.Format import Data.Repa.Convert.Internal.Packable import Data.Repa.Convert.Internal.Packer import Data.Repa.Convert.Format.Binary import Data.Monoid import Data.Word import Data.Char import GHC.Exts import qualified Foreign.Storable as S import qualified Foreign.Ptr as S import Prelude hiding (fail) #include "repa-convert.h" --------------------------------------------------------------------------------------------------- -- | Fixed length sequence of characters, represented as a (hated) Haskell `String`. -- -- * The runtime performance of the Haskell `String` is atrocious. -- You really shouldn't be using them for large data sets. -- -- * When packing, the length of the provided string must match the width -- of the format, else packing will fail. -- -- * When unpacking, the length of the result will be the width of the format. -- data FixChars = FixChars Int deriving (Eq, Show) instance Format FixChars where type Value (FixChars) = String fieldCount _ = 1 minSize (FixChars len) = len fixedSize (FixChars len) = Just len packedSize (FixChars len) _ = Just len {-# INLINE minSize #-} {-# INLINE fieldCount #-} {-# INLINE fixedSize #-} {-# INLINE packedSize #-} instance Packable FixChars where pack (FixChars len) xs | length xs == len = Packer $ \dst _fails eat -> do mapM_ (\(o, x) -> S.pokeByteOff (Ptr dst) o (w8 $ ord x)) $ zip [0 .. len - 1] xs let !(Ptr dst') = S.plusPtr (Ptr dst) len eat dst' | otherwise = Packer $ \_ fails _ -> fails {-# NOINLINE pack #-} packer f v = fromPacker (pack f v) {-# INLINE packer #-} instance Unpackable FixChars where unpacker (FixChars len@(I# len')) start end _stop fail eat = do let lenBuf = I# (minusAddr# end start) if lenBuf < len then fail else do let load_unpackChar o = do x :: Word8 <- S.peekByteOff (pw8 start) o return $ chr $ fromIntegral x {-# INLINE load_unpackChar #-} xs <- mapM load_unpackChar [0 .. len - 1] eat (plusAddr# start len') xs {-# INLINE unpacker #-} --------------------------------------------------------------------------------------------------- -- | Like `FixChars`, but with a variable length. data VarChars = VarChars deriving (Eq, Show) instance Format VarChars where type Value VarChars = String fieldCount _ = 1 {-# INLINE fieldCount #-} minSize _ = 0 {-# INLINE minSize #-} fixedSize VarChars = Nothing {-# INLINE fixedSize #-} packedSize VarChars xs = Just $ length xs {-# NOINLINE packedSize #-} instance Packable VarChars where pack VarChars xx = case xx of [] -> mempty (x : xs) -> pack Word8be (w8 $ ord x) <> pack VarChars xs {-# NOINLINE pack #-} packer f v = fromPacker (pack f v) {-# INLINE packer #-} instance Unpackable VarChars where unpacker VarChars start end stop _fail eat = do (Ptr ptr, str) <- unpackCharList (pw8 start) (pw8 end) stop eat ptr str {-# INLINE unpacker #-} -- | Unpack a ascii text from the given buffer. unpackCharList :: S.Ptr Word8 -- ^ First byte in buffer. -> S.Ptr Word8 -- ^ First byte after buffer. -> (Word8 -> Bool) -- ^ Detect field deliminator. -> IO (S.Ptr Word8, [Char]) unpackCharList start end stop = go start [] where go !ptr !acc | ptr >= end = return (ptr, reverse acc) | otherwise = do w :: Word8 <- S.peek ptr if stop w then do return (ptr, reverse acc) else do let !ptr' = S.plusPtr ptr 1 go ptr' ((chr $ fromIntegral w) : acc) {-# NOINLINE unpackCharList #-} --------------------------------------------------------------------------------------------------- -- | Variable length string in double quotes, -- and standard backslash encoding of non-printable characters. data VarCharString = VarCharString deriving (Eq, Show) instance Format VarCharString where type Value VarCharString = String fieldCount _ = 1 {-# INLINE fieldCount #-} minSize _ = 2 {-# INLINE minSize #-} fixedSize _ = Nothing {-# INLINE fixedSize #-} packedSize VarCharString xs = Just $ length $ show xs {-# NOINLINE packedSize #-} instance Packable VarCharString where -- ISSUE #43: Avoid intermediate lists when packing Ints and Strings. packer VarCharString xx start k = packer VarChars (show xx) start k {-# INLINE packer #-} instance Unpackable VarCharString where unpacker VarCharString start end _stop fail eat = unpackString (pw8 start) (pw8 end) fail eat {-# INLINE unpacker #-} -- | Unpack a string from the given buffer. --- -- We only handle the most common special character encodings. -- Is there a standard for which ones these are? -- unpackString :: S.Ptr Word8 -- ^ First byte in buffer. -> S.Ptr Word8 -- ^ First byte after buffer. -> IO () -- ^ Signal failure. -> (Addr# -> [Char] -> IO ()) -- ^ Eat an unpacked value. -> IO () unpackString start end fail eat = open start where -- Accept the open quotes. open !ptr | ptr >= end = fail | otherwise = do w :: Word8 <- S.peek ptr let !ptr' = S.plusPtr ptr 1 case chr $ fromIntegral w of '"' -> go_body ptr' [] _ -> fail -- Handle the next character in the string. go_body !ptr@(Ptr addr) !acc | ptr >= end = eat addr (reverse acc) | otherwise = do w :: Word8 <- S.peek ptr let !ptr'@(Ptr addr') = S.plusPtr ptr 1 case chr $ fromIntegral w of '"' -> eat addr' (reverse acc) '\\' -> go_escape ptr' acc c -> go_body ptr' (c : acc) -- Handle escaped character. -- The previous character was a '\\' go_escape !ptr !acc | ptr >= end = fail | otherwise = do w :: Word8 <- S.peek ptr let ptr' = S.plusPtr ptr 1 case chr $ fromIntegral w of 'a' -> go_body ptr' ('\a' : acc) 'b' -> go_body ptr' ('\b' : acc) 'f' -> go_body ptr' ('\f' : acc) 'n' -> go_body ptr' ('\n' : acc) 'r' -> go_body ptr' ('\r' : acc) 't' -> go_body ptr' ('\t' : acc) 'v' -> go_body ptr' ('\v' : acc) '\\' -> go_body ptr' ('\\' : acc) '"' -> go_body ptr' ('"' : acc) _ -> fail {-# NOINLINE unpackString #-} --------------------------------------------------------------------------------------------------- -- | Match an exact sequence of characters. data ExactChars = ExactChars String deriving Show instance Format ExactChars where type Value ExactChars = () fieldCount (ExactChars _) = 0 {-# INLINE fieldCount #-} minSize (ExactChars str) = length str {-# NOINLINE minSize #-} fixedSize (ExactChars str) = return (length str) {-# NOINLINE fixedSize #-} packedSize (ExactChars str) () = return (length str) {-# NOINLINE packedSize #-} instance Packable ExactChars where packer (ExactChars str) _ dst _fails k = do let !len = length str mapM_ (\(o, x) -> S.pokeByteOff (Ptr dst) o (w8 $ ord x)) $ zip [0 .. len - 1] str let !(Ptr dst') = S.plusPtr (Ptr dst) len k dst' {-# NOINLINE packer #-} instance Unpackable ExactChars where unpacker (ExactChars str) start end _stop fails eat = do let !len@(I# len') = length str let !lenBuf = I# (minusAddr# end start) if lenBuf < len then fails else do let load_unpackChar o = do x :: Word8 <- S.peekByteOff (pw8 start) o return $ chr $ fromIntegral x {-# INLINE load_unpackChar #-} xs <- mapM load_unpackChar [0 .. len - 1] if (xs == str) then eat (plusAddr# start len') () else fails {-# NOINLINE unpacker #-} --------------------------------------------------------------------------------------------------- w8 :: Integral a => a -> Word8 w8 = fromIntegral {-# INLINE w8 #-} pw8 :: Addr# -> Ptr Word8 pw8 addr = Ptr addr {-# INLINE pw8 #-}