{-# LANGUAGE BangPatterns #-} {-# LANGUAGE ImportQualifiedPost #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} -- | -- Description : Main interface for reading CBF files -- -- Look at 'readCBF' as a starting point. module Data.CBF (CBFImage (..), readCBF, decodePixels) where import Control.Monad (mzero, void, when) import Control.Monad.ST (runST) import Data.Attoparsec.ByteString.Lazy qualified as A import Data.Bifunctor (bimap, first) import Data.Binary.Get (getInt16le, getInt32le, getInt64le, runGet) import Data.Bits import Data.ByteString qualified as BS import Data.ByteString.Char8 qualified as BS8 import Data.ByteString.Lazy qualified as BSL import Data.Int import Data.Text qualified as Text import Data.Text.Encoding (decodeLatin1) import Data.Vector.Unboxed qualified as V import Data.Vector.Unboxed.Mutable qualified as MV import Data.Word import Unsafe.Coerce (unsafeCoerce) -- | Decoded CBF image data, with contents data CBFImage = CBFImage { -- | Raw image properties, after the binary format section header (not the comments at the begining of the file) imageProperties :: ![(Text.Text, Text.Text)], -- | Fastest image dimension (CBF avoids "x" and "y" or "width"/"height" here) imageFastestDimension :: !Int, -- | Second image dimension (CBF avoids "x" and "y" or "width"/"height" here) imageSecondDimension :: !Int, -- | Raw image data, to be decoded/decompressed using 'decodePixels' imageDataRaw :: !BSL.ByteString } breakSubstringWithoutDelimiter :: BS.ByteString -> BS.ByteString -> (BS.ByteString, BS.ByteString) breakSubstringWithoutDelimiter needle haystack = let (prefix, suffix) = BS.breakSubstring needle haystack in (prefix, BS.drop (BS.length needle) suffix) cbfParser :: A.Parser CBFImage cbfParser = do let takeLine = do contents <- A.takeWhile (\x -> x /= 0xd && x /= 0xa) void (A.string "\r\n") pure contents cbfStartingLine = do line <- takeLine when ("--CIF-BINARY-FORMAT-SECTION--" `BS.isPrefixOf` line) mzero A.skipMany cbfStartingLine -- skip the starting line void takeLine let cbfPropertyLine = do line <- takeLine if BS.null line then mzero else do -- Example of a suffix: -- Content-Type: application/octet-stream; -- conversions="x-CBF_BYTE_OFFSET" lineSuffix <- if (";" `BS.isSuffixOf` line) then ((<> " ") . BS8.strip) <$> takeLine else pure "" pure (breakSubstringWithoutDelimiter ": " (line <> lineSuffix)) propertyLines <- A.many1 cbfPropertyLine -- the empty line void takeLine mapM_ A.word8 [0x0c, 0x1a, 0x04, 0xd5] let properties = bimap decodeLatin1 decodeLatin1 <$> propertyLines bsToInt :: BS.ByteString -> Maybe Int bsToInt = (fst <$>) . BS8.readInt case (,) <$> (lookup "X-Binary-Size-Fastest-Dimension" propertyLines >>= bsToInt) <*> (lookup "X-Binary-Size-Second-Dimension" propertyLines >>= bsToInt) of Just (fastestDimension, secondDimension) -> CBFImage properties fastestDimension secondDimension <$> A.takeLazyByteString Nothing -> fail ("couldn't extract dimensions from properties") -- | Read a CBF file, without decoding its contents (see 'decodePixels' for that) readCBF :: FilePath -> IO (Either Text.Text CBFImage) readCBF fn = do c <- BSL.readFile fn pure (first Text.pack (A.parseOnly cbfParser c)) unconsW8 :: BSL.ByteString -> Maybe (Word8, BSL.ByteString) unconsW8 = BSL.uncons unconsW16 :: BSL.ByteString -> Maybe (Word16, BSL.ByteString) unconsW16 bs = do (x, bs') <- unconsW8 bs (y, bs'') <- unconsW8 bs' pure (fromIntegral x .|. (fromIntegral y `shiftL` 8), bs'') unconsW32 :: BSL.ByteString -> Maybe (Word32, BSL.ByteString) unconsW32 bs = do (x, bs') <- unconsW16 bs (y, bs'') <- unconsW16 bs' pure (fromIntegral x .|. (fromIntegral y `shiftL` 16), bs'') unconsW64 :: BSL.ByteString -> Maybe (Word64, BSL.ByteString) unconsW64 bs = do (x, bs') <- unconsW32 bs (y, bs'') <- unconsW32 bs' pure (fromIntegral x .|. (fromIntegral y `shiftL` 32), bs'') unconsI8 :: BSL.ByteString -> Maybe (Int8, BSL.ByteString) unconsI8 bs = do (x, bs') <- unconsW8 bs pure (fromIntegral x, bs') unconsI16 :: BSL.ByteString -> Maybe (Int16, BSL.ByteString) unconsI16 bs = do (x, bs') <- unconsW16 bs pure (fromIntegral x, bs') unconsI32 :: BSL.ByteString -> Maybe (Int32, BSL.ByteString) unconsI32 bs = do (x, bs') <- unconsW32 bs pure (fromIntegral x, bs') unconsI64 :: BSL.ByteString -> Maybe (Int64, BSL.ByteString) unconsI64 bs = do (x, bs') <- unconsW64 bs pure (fromIntegral x, bs') decompressBinary :: Int -> Int64 -> BSL.ByteString -> [Int64] decompressBinary !0 !_ _ = [] decompressBinary !i !x bs = do case unconsI8 bs of Nothing -> [] -- fail Just (delta8, bs1) | -127 <= delta8 && delta8 <= 127 -> let !y = x + fromIntegral delta8 in y : decompressBinary (i - 1) y bs1 | otherwise -> case unconsI16 bs1 of Nothing -> [] -- fail Just (delta16, bs2) | -32767 <= delta16 && delta16 <= 32767 -> let !y = x + fromIntegral delta16 in y : decompressBinary (i - 1) y bs2 | otherwise -> case unconsI32 bs2 of Nothing -> [] -- fail Just (delta32, bs3) | -2147483647 <= delta32 && delta32 <= 2147483647 -> let !y = x + fromIntegral delta32 in y : decompressBinary (i - 1) y bs3 | otherwise -> case unconsI64 bs3 of Nothing -> [] -- fail Just (delta64, bs4) -> let !y = x + fromIntegral delta64 in y : decompressBinary (i - 1) y bs4 decompressBinaryBSL :: Int -> BSL.ByteString -> Either String [Int64] decompressBinaryBSL numberOfElements s = Right $ decompressBinary numberOfElements 0 s decompressST :: (MV.PrimMonad m) => Int -> BSL.ByteString -> m (V.Vector Int64) decompressST numberOfElements s = do mutableVector <- MV.new numberOfElements _ <- decompressSingleChunk (BSL.length s) s mutableVector 0 0 0 V.freeze mutableVector decompress :: Int -> BSL.ByteString -> V.Vector Int64 decompress numberOfElements s = runST (decompressST numberOfElements s) decompressSingleChunk :: (MV.PrimMonad m) => Int64 -> BSL.ByteString -> MV.MVector (MV.PrimState m) Int64 -> Int -> Int64 -> Int64 -> m () decompressSingleChunk slen s mutableVector outPos inPos value = do if inPos >= slen - 1 then pure () else do let readInt8 :: Int64 -> Maybe Int64 readInt8 p = case {-# SCC "read8" #-} BSL.indexMaybe s p of Nothing -> Nothing Just v -> Just (fromIntegral (unsafeCoerce v :: Int8)) readInt16 :: Int64 -> Maybe Int64 readInt16 p | p < slen - 2 = Just $ {-# SCC "read16" #-} fromIntegral (runGet getInt16le (BSL.drop p s)) | otherwise = Nothing readInt32 :: Int64 -> Maybe Int64 readInt32 p | p < slen - 4 = Just $ {-# SCC "read32" #-} fromIntegral (runGet getInt32le (BSL.drop p s)) | otherwise = Nothing readInt64 :: Int64 -> Maybe Int64 readInt64 p | p < slen - 8 = Just $ {-# SCC "read64" #-} fromIntegral (runGet getInt64le (BSL.drop p s)) | otherwise = Nothing recurse _bitDepth newInPos d = do if outPos >= MV.length mutableVector then pure () else {-# SCC "writeV" #-} do MV.write mutableVector outPos (value + d) decompressSingleChunk slen s mutableVector (outPos + 1) newInPos (value + d) case readInt8 inPos of Nothing -> pure () Just delta8 -> if -127 <= delta8 && delta8 <= 127 then recurse (8 :: Int) (inPos + 1) delta8 else case readInt16 (inPos + 1) of Nothing -> pure () Just delta16 -> if -32767 <= delta16 && delta16 <= 32767 then recurse (16 :: Int) (inPos + 3) delta16 else case readInt32 (inPos + 3) of Nothing -> pure () Just delta32 -> if -2147483647 <= delta32 && delta32 <= 2147483647 then recurse (32 :: Int) (inPos + 7) delta32 else case readInt64 (inPos + 7) of Nothing -> pure () Just delta64 -> recurse (64 :: Int) (inPos + 11) delta64 -- | Decode the actual pixel values inside the CBF file, possibly decompressing it. decodePixels :: CBFImage -> Either String [Int64] decodePixels (CBFImage {imageDataRaw, imageFastestDimension, imageSecondDimension}) = let numberOfElements = imageFastestDimension * imageSecondDimension in decompressBinaryBSL numberOfElements imageDataRaw