{-# LANGUAGE DataKinds #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE TypeApplications #-} -- | -- Copyright : 2019 Monadic GmbH -- License : BSD3 -- Maintainer : kim@monadic.xyz, alfredo@monadic.xyz, team@monadic.xyz -- Stability : provisional -- Portability : non-portable (GHC extensions) -- -- encoding of strict 'ByteString's. -- module Data.ByteString.Multibase ( -- * Supported bases BaseN.Base(..) , ToCode -- * Multibase encoding and decoding , Multibase , fromMultibase , encodedBytes , encode , decode -- * Compact represenation , CompactMultibase , compact , expand ) where import Control.DeepSeq (NFData) import Data.ByteString (ByteString) import Data.ByteString.BaseN (AtBase, ValidBase) import qualified Data.ByteString.BaseN as BaseN import qualified Data.ByteString.Char8 as C8 import Data.ByteString.Short (ShortByteString, fromShort, toShort) import Data.Coerce (coerce) import Data.Hashable (Hashable) import Data.Typeable -- $setup -- >>> :set -XOverloadedStrings -- >>> import Test.QuickCheck -- >>> import qualified Data.ByteString as BS -- >>> newtype Bytes = Bytes ByteString deriving (Eq, Show) -- >>> instance Arbitrary Bytes where arbitrary = Bytes . BS.pack <$> arbitrary -- Bases we don't support yet -- Base1 -- ^ unary (11111) ----- WTF? -- | Base2 -- ^ binary (01010101) -- | Base8 -- ^ octal -- | Base10 -- ^ decimal -- | Symbols for which a multibase code is defined and supported by this library class ValidBase b => ToCode b where toCode :: proxy b -> Char instance ToCode "id" where toCode = const '\000' instance ToCode "16" where toCode = const 'f' instance ToCode "16u" where toCode = const 'F' instance ToCode "32x" where toCode = const 'v' instance ToCode "32xu" where toCode = const 'V' instance ToCode "32xp" where toCode = const 't' instance ToCode "32xpu" where toCode = const 'T' instance ToCode "32" where toCode = const 'b' instance ToCode "32z" where toCode = const 'h' instance ToCode "32u" where toCode = const 'B' instance ToCode "32p" where toCode = const 'c' instance ToCode "32pu" where toCode = const 'C' instance ToCode "58flickr" where toCode = const 'Z' instance ToCode "58btc" where toCode = const 'z' instance ToCode "64" where toCode = const 'm' instance ToCode "64p" where toCode = const 'M' instance ToCode "64url" where toCode = const 'u' instance ToCode "64urlpad" where toCode = const 'U' -- codes we don't support yet --toCode Base2 = '0' --toCode Base8 = '7' --toCode Base10 = '9' fromCode :: Char -> Maybe (ByteString -> Either String ByteString) fromCode '\000' = pure pure -- 2 times pure, 2 times better! fromCode 'f' = pure (BaseN.decodeAtBaseEither (Proxy @"16")) fromCode 'F' = pure (BaseN.decodeAtBaseEither (Proxy @"16u")) fromCode 'v' = pure (BaseN.decodeAtBaseEither (Proxy @"32x")) fromCode 'V' = pure (BaseN.decodeAtBaseEither (Proxy @"32xu")) fromCode 't' = pure (BaseN.decodeAtBaseEither (Proxy @"32xp")) fromCode 'T' = pure (BaseN.decodeAtBaseEither (Proxy @"32xpu")) fromCode 'b' = pure (BaseN.decodeAtBaseEither (Proxy @"32")) fromCode 'h' = pure (BaseN.decodeAtBaseEither (Proxy @"32z")) fromCode 'B' = pure (BaseN.decodeAtBaseEither (Proxy @"32u")) fromCode 'c' = pure (BaseN.decodeAtBaseEither (Proxy @"32p")) fromCode 'C' = pure (BaseN.decodeAtBaseEither (Proxy @"32pu")) fromCode 'Z' = pure (BaseN.decodeAtBaseEither (Proxy @"58flickr")) fromCode 'z' = pure (BaseN.decodeAtBaseEither (Proxy @"58btc")) fromCode 'm' = pure (BaseN.decodeAtBaseEither (Proxy @"64")) fromCode 'M' = pure (BaseN.decodeAtBaseEither (Proxy @"64p")) fromCode 'u' = pure (BaseN.decodeAtBaseEither (Proxy @"64url")) fromCode 'U' = pure (BaseN.decodeAtBaseEither (Proxy @"64urlpad")) fromCode _ = Nothing -- | A multibase-encoded strict 'ByteString'. newtype Multibase = Multibase ByteString deriving (Eq, Ord, Hashable, NFData) -- | A 'Multibase backed by a 'ShortByteString'. -- -- This is useful when holding many 'Multibase' values in memory, due to lower -- memory overhead and less heap fragmentation. See the documentation for -- 'ShortByteString' for details. newtype CompactMultibase = Compact ShortByteString deriving (Eq, Ord, Hashable, NFData) -- | Extract the encoded bytes, including the code 'Char', from a 'Multibase'. fromMultibase :: Multibase -> ByteString fromMultibase = coerce -- | Extract the encoded bytes from a 'Multibase'. -- -- The code 'Char' signifying the base is stripped. encodedBytes :: Multibase -> ByteString encodedBytes = C8.tail . coerce -- | Encode a strict 'ByteString' at 'Base'. encode :: ToCode b => AtBase b -> Multibase encode base = coerce . C8.cons (toCode base) . BaseN.encodedBytes $ base -- | Decode a strict 'ByteString', assumed to be 'Multibase'-encoded. decode :: ByteString -> Either String ByteString decode bs = do (c, bs') <- note "Empty input" $ C8.uncons bs case fromCode c of Just decodeIt -> decodeIt bs' Nothing -> Left $ "Unknown encoding " <> show c -- | /O(n)/. Convert a 'Multibase' encoding to a compact representation. -- -- Involves copying the underlying 'ByteString'. compact :: Multibase -> CompactMultibase compact = coerce . toShort . coerce {-# INLINE compact #-} -- | /O(n)/. Convert from the compact to the regular representation. -- -- Involves copying the underlying 'ShortByteString'. expand :: CompactMultibase -> Multibase expand = coerce . fromShort . coerce {-# INLINE expand #-} -- Helpers --------------------------------------------------------------------- note :: a -> Maybe b -> Either a b note a = maybe (Left a) pure