{-# LANGUAGE BangPatterns #-} -- simple unoptimized implementation of base32(hex) module Codec.Base32.Impl ( decodeBs2Bs , decodeBsL2BsL , encodeBs2Bs , encodeBsL2BsL , Fmt(..) ) where import qualified Data.ByteString as BS import qualified Data.ByteString.Builder as BB import qualified Data.ByteString.Char8 as BS.C import qualified Data.ByteString.Lazy as BS.L import Control.Monad import Data.Bits import Data.Word import Internal -- exported primitives data Fmt = Fmt'base32 | Fmt'base32hex {-# INLINE decodeBs2Bs #-} decodeBs2Bs :: Fmt -> BS.ByteString -> Either String BS.ByteString decodeBs2Bs !fmt bs0 = case runDecode fmt bs0 of DR'Final chunk -> pure $! bsToStrict $ BB.toLazyByteString chunk DR'Error ofs err -> Left $ showDecErr ofs err DR'Partial chunk0 cont -> case cont mempty of DR'Final chunk1 -> pure $! bsToStrict $ BB.toLazyByteString (chunk0<>chunk1) -- chunk1 ought to be empty -- TODO: maybe refactor internal API DR'Partial _ _ -> error "decodeBs2Bs: the impossible just happened" -- broken invariant DR'Error ofs err -> Left $ showDecErr ofs err {-# INLINE decodeBsL2BsL #-} decodeBsL2BsL :: Fmt -> BS.L.ByteString -> Either String BS.L.ByteString decodeBsL2BsL !fmt bs0 = go mempty (runDecode fmt) (filter (not . BS.null) $ BS.L.toChunks bs0) where go acc runDec (c:cs) = case runDec c of DR'Final _ -> error "impossible" DR'Error ofs err -> Left $! showDecErr ofs err -- TODO DR'Partial chunk cont -> go (acc <> chunk) cont cs go acc runDec [] = case runDec mempty of DR'Final chunk -> pure $! BB.toLazyByteString (acc <> chunk) DR'Partial _ _ -> error "decodeBsL2BsL: the impossible just happened" -- broken invariant DR'Error ofs err -> Left $ showDecErr ofs err encodeBs2Bs :: Fmt -> BS.ByteString -> BS.ByteString encodeBs2Bs !fmt = BS.C.pack . slowEnc fmt True . BS.unpack encodeBsL2BsL :: Fmt -> BS.L.ByteString -> BS.L.ByteString encodeBsL2BsL !fmt = BS.L.fromChunks . go mempty . BS.L.toChunks where go rest [] = encodeBs2Bs fmt rest : [] go rest (c:cs) | rclen < 5 = go rc cs -- shortcut | otherwise = encodeBs2Bs fmt c' : go rest' cs where rlen = BS.length rest clen = BS.length c rc = rest <> c -- TODO: avoid copying large chunks of data rclen = rlen + clen rlen' = rclen `rem` 5 (c',rest') = BS.splitAt (rclen - rlen') rc ---------------------------------------------------------------------------- -- encoding -- | Relatively slow streaming base32 encoder slowEnc :: Fmt -> Bool -> [Word8] -> [Char] slowEnc !fmt doPad = go0 where go0 [] = [] go0 (x:xs) = sym hi : go1 lo xs where -- 5|3 (hi,lo) = x `quotRem` 0x08 go1 rest [] = sym (rest * 0x04) : pad "======" go1 rest (x:xs) = sym (rest * 0x04 + hi) : sym mid : go2 lo xs where -- 2|5|1 (hi,tmp) = x `quotRem` 0x40 (mid,lo) = tmp `quotRem` 0x02 go2 rest [] = sym (rest * 0x10) : pad "====" go2 rest (x:xs) = sym (rest * 0x10 + hi) : go3 lo xs where -- 4|4 (hi,lo) = x `quotRem` 0x10 go3 rest [] = sym (rest * 0x02) : pad "===" go3 rest (x:xs) = sym (rest * 0x02 + hi) : sym mid : go4 lo xs where -- 1|5|2 (hi,tmp) = x `quotRem` 0x80 (mid,lo) = tmp `quotRem` 0x04 go4 rest [] = sym (rest * 0x08) : pad "=" go4 rest (x:xs) = sym (rest * 0x08 + hi) : sym lo : go0 xs where -- 3|5 (hi,lo) = x `quotRem` 0x20 pad x | doPad = x | otherwise = [] sym = case fmt of Fmt'base32 -> sym0 Fmt'base32hex -> symx -- plain base32 alphabet sym0 :: Word8 -> Char sym0 w | w < 26 = toEnum (fromIntegral w + 0x41) | w < 32 = toEnum (fromIntegral w + 24) | otherwise = undefined -- sym (w `rem` 32) -- base32hex alphabet symx :: Word8 -> Char symx w | w < 10 = toEnum (fromIntegral w + 0x30) | w < 32 = toEnum (fromIntegral w + 55) | otherwise = undefined -- sym (w `rem` 32) ---------------------------------------------------------------------------- -- decoding -- internal abstraction data DecodeRes = DR'Final !BB.Builder -- only emitted on empty EOF-signalling input | DR'Partial !BB.Builder (BS.ByteString -> DecodeRes) | DR'Error !Word DecodeError instance Show DecodeRes where show (DR'Final x) = "DR'Final " ++ show (BB.toLazyByteString x) show (DR'Partial x _) = "DR'Partial " ++ show (BB.toLazyByteString x) ++ " " show (DR'Error ofs err) = "DR'Err " ++ show ofs ++ " " ++ show err data DecodeError = Error'IncompleteInput | Error'InvalidChar | Error'InvalidPad deriving Show showDecErr :: Word -> DecodeError -> String showDecErr ofs Error'InvalidPad = "Base32-encoded data has invalid padding at offset: " ++ show ofs showDecErr ofs Error'IncompleteInput = "Base32-encoded data ended prematurely at offset: " ++ show ofs showDecErr ofs Error'InvalidChar = "Base32-encoded data has invalid character at offset: " ++ show ofs runDecode :: Fmt -> BS.ByteString -> DecodeRes runDecode !fmt = go 0 mempty where -- invariants: -- len(buf0) < 8 go ofs0 buf0 buf1 | doflush, BS.null buf0 = DR'Final mempty | doflush = case decodePaddedChunk fmt buf0 of Left (ofs,err) -> DR'Error (ofs0+ofs) err Right (out,rest) | BS.null rest -> DR'Final out -- actually not possible for padded input | otherwise -> error "runDecode: the impossible just happened" -- because buf0 invariant | buf01len < 8 = DR'Partial mempty (go ofs0 buf01) | otherwise = case decodeChunks fmt buf01 of Left (ofs,err) -> DR'Error (ofs0+ofs) err Right (chunks,rest) -> DR'Partial chunks (go (ofs0 + fromIntegral (buf01len - BS.length rest)) rest) where -- TODO: consider avoiding copying all of buf1 and instead copy only the prefix needed to -- complete buf0 into a full chunk buf01 = buf0 <> buf1 buf01len = BS.length buf01 doflush = BS.null buf1 -- | Iterate over multiple chunks; calls 'decodeChunk' repeatedly -- -- The remaining ByteString is guaranteed to be smaller than 8 octets decodeChunks :: Fmt -> BS.ByteString -> Either (Word,DecodeError) (BB.Builder,BS.ByteString) decodeChunks !fmt = go 0 mempty where go !ofs0 bb0 bs0 | BS.null bs0 = pure (bb0,mempty) | otherwise = case decodePaddedChunk fmt bs0 of Left (_, Error'IncompleteInput) -> pure (bb0,bs0) Left (ofs1,err) -> Left (ofs0+ofs1, err) Right (chunk, rest) -> go (ofs0+8) (bb0 <> chunk) rest -- | Tries do decode a single 40bit base32 chunk (encoded as eight case-insensitive ASCII chars) -- -- NB: An empty input leads to an incomplete input error result, as this function will either decode exactly a single -- chunk or report an error decodePaddedChunk :: Fmt -> BS.ByteString -> Either (Word,DecodeError) (BB.Builder,BS.ByteString) decodePaddedChunk !fmt bs0 | bs0len < 8 = Left (fromIntegral $ bs0len, Error'IncompleteInput) | otherwise = do let (digs,padding) = BS.break (== 0x3d) $ BS.take 8 bs0 let vlen = fromIntegral $ BS.length digs vals <- forM (zip [0..] (BS.C.unpack digs)) $ \(i,c) -> maybe (Left (i,Error'InvalidChar)) pure $ desym c olen <- case vlen of 0 -> Left (0, Error'InvalidPad) 1 -> Left (1, Error'InvalidPad) 2 -> pure 1 3 -> Left (3, Error'InvalidPad) 4 -> pure 2 5 -> pure 3 6 -> Left (6, Error'InvalidPad) 7 -> pure 4 8 -> pure 5 _ -> undefined -- check padding trailer has no garbage forM_ (zip [vlen ..] (BS.unpack padding)) $ \(i,c) -> unless (c == 0x3d) $ Left (i, Error'InvalidPad) -- bit shuffle 8*5bit into 5*8bit let buf64 = sum $ zipWith shiftL (map fromIntegral vals) [35, 30 .. 0] buf8 = map (fromIntegral . shiftR (buf64 :: Word64)) [32, 24 .. 0] -- check last 5bit digit didn't have unused bits set unless (all (== 0) $ drop olen buf8) $ do Left (fromIntegral vlen - 1, Error'InvalidPad) pure $ (BB.byteString $ BS.pack $ take olen buf8, BS.drop 8 bs0) where bs0len = BS.length bs0 desym = case fmt of Fmt'base32 -> desym0 Fmt'base32hex -> desymx -- plain base32 alphabet (case-insens) desym0 :: Char -> Maybe Word8 desym0 c0 = case fromEnum c0 of c | c < 0x32 -> Nothing | c < 0x38 -> Just $! fromIntegral (c - 24) | c < 0x41 -> Nothing | c < 0x5b -> Just $! fromIntegral (c - 0x41) | c < 0x61 -> Nothing | c < 0x7b -> Just $! fromIntegral (c - 0x61) | otherwise -> Nothing -- base32hex alphabet (case-insens) desymx :: Char -> Maybe Word8 desymx c0 = case fromEnum c0 of c | c < 0x30 -> Nothing | c < 0x3a -> Just $! fromIntegral (c - 0x30) | c < 0x41 -> Nothing | c < 0x57 -> Just $! fromIntegral (c - 55) | c < 0x61 -> Nothing | c < 0x77 -> Just $! fromIntegral (c - 87) | otherwise -> Nothing