{-# LANGUAGE DataKinds #-} {-# LANGUAGE OverloadedStrings #-} -- | -- Copyright : 2018 Monadic GmbH -- License : BSD3 -- Maintainer : kim@monadic.xyz, team@monadic.xyz -- Stability : provisional -- Portability : non-portable (GHC extensions) -- -- -- -- \"CID is a format for referencing content in distributed information systems, -- like IPFS. It leverages content addressing, cryptographic hashing, and -- self-describing formats. It is the core identifier used by IPFS and IPLD.\" module Data.IPLD.CID ( Version (..) , Codec (..) , CID , cidVersion , cidCodec , cidHash , newCidV0 , newCidV1 , buildCid , decodeCid , getCid , cidFromText , cidToText , codecToCode , codecFromCode ) where import Control.DeepSeq (NFData) import Control.Monad ((>=>)) import Crypto.Hash (Digest, SHA256) import Data.Bifunctor (bimap) import Data.Binary.Get as Binary import Data.Binary.VarInt (buildVarInt, getVarInt) import Data.ByteString (ByteString) import qualified Data.ByteString as BS import qualified Data.ByteString.BaseN as BaseN import Data.ByteString.Builder (Builder) import qualified Data.ByteString.Builder as Builder import qualified Data.ByteString.Lazy as LBS import qualified Data.ByteString.Multibase as Multibase import Data.Hashable (Hashable) import Data.Multihash (Multihash, Multihashable) import qualified Data.Multihash as Multihash import Data.Text (Text) import qualified Data.Text as Text import Data.Text.Encoding (decodeUtf8, encodeUtf8) import Data.Word (Word8) import GHC.Generics (Generic) -- | Specification version. data Version = V0 | V1 deriving (Eq, Show, Ord, Enum, Bounded, Generic) instance Hashable Version instance NFData Version -- | The content type or format of the data being addressed, specified as a -- . -- -- Note that we do not currently have a full multicodec implementation, as it is -- overly complicated for our purposes. We also only support 'Codec's on an -- as-needed basis. Future versions may utilise a separate library. data Codec = Raw | DagProtobuf | DagCbor | GitRaw deriving (Eq, Show, Ord, Generic) instance Hashable Codec instance NFData Codec -- | A Content IDentifier. -- -- * 'V0' 'CID's are merely SHA256 hashes, base58-encoded using the bitcoin -- alphabet. The 'Codec' is implicitly 'DagProtobuf'. -- * 'V1' 'CID's may use any 'Multihash', and any of the supported 'Codec's. data CID = CID { cidVersion :: Version , cidCodec :: Codec , cidHash :: Multihash } deriving (Eq, Ord, Generic) instance Hashable CID instance NFData CID instance Show CID where show = Text.unpack . cidToText instance Read CID where readsPrec _ = either (const []) (\cid -> [(cid, "")]) . cidFromText . Text.pack -- CID ------------------------------------------------------------------------- -- | Create a 'V0' 'CID'. newCidV0 :: Digest SHA256 -> CID newCidV0 dig = CID { cidVersion = V0 , cidCodec = DagProtobuf , cidHash = Multihash.fromDigest dig } -- | Create a 'V1' 'CID'. newCidV1 :: Multihashable a => Codec -> Digest a -> CID newCidV1 codec dig = CID { cidVersion = V1 , cidCodec = codec , cidHash = Multihash.fromDigest dig } -- | Serialise a 'CID'. buildCid :: CID -> Builder buildCid CID{..} = case cidVersion of V0 -> Builder.byteString (Multihash.encodedBytes cidHash) V1 -> buildVarInt (fromEnum cidVersion) <> buildVarInt (codecToCode cidCodec) <> Builder.byteString (Multihash.encodedBytes cidHash) -- | Decode a 'CID' from a strict 'ByteString'. -- -- @ -- decodeCid . buildCid ≡ Right -- @ decodeCid :: ByteString -> Either String CID decodeCid bs | isV0 = newCidV0 <$> Multihash.decodeDigest bs | otherwise = bimap _3 _3 . Binary.runGetOrFail getCid $ LBS.fromStrict bs where isV0 = BS.length bs == 34 && magic == BS.take 2 bs magic = BS.pack [18,32] _3 (_,_,x) = x -- | Deserialise a 'CID' in the 'Binary.Get' monad. -- -- Note that this does __/not/__ support 'V0' 'CID's. getCid :: Binary.Get CID getCid = do cidVersion <- do v <- Binary.getWord8 >>= getVarInt if v < minVersion || v > maxVersion then fail $ "CID: Invalid version: " <> show v else pure $ toEnum v case cidVersion of V1 -> do cidCodec <- do c <- Binary.getWord8 >>= getVarInt maybe (fail ("CID: Unknown Codec: " <> show c)) pure $ codecFromCode c cidHash <- Multihash.getMultihash pure CID{..} v -> fail $ "CID: Unsupported version: " <> show v where maxVersion = fromEnum (maxBound :: Version) minVersion = fromEnum (minBound :: Version) -- | Decode a 'CID' from a textual representation. -- -- The 'Text' value is expected to be base58 (bitcoin) encoded (for 'V0' -- 'CID's), or a valid 'Multibase'. -- -- @ -- cidFromText . cidToText ≡ id -- @ cidFromText :: Text -> Either String CID cidFromText t = decodeBase >=> decodeCid $ encodeUtf8 t where isV0 = Text.length t == 46 && "Qm" `Text.isPrefixOf` t decodeBase | isV0 = BaseN.decodeBase58btc | otherwise = Multibase.decode >=> guardReserved -- "If the first decoded byte is 0x12, return an error. CIDv0 CIDs may not -- be multibase encoded and there will be no CIDv18 (0x12 = 18) to prevent -- ambiguity with decoded CIDv0s." guardReserved bs = case BS.uncons bs of Just (x, _) | x == 18 -> Left "CID > V0 starts with reserved byte 0x12" _ -> Right bs -- | Encode a 'CID' to a textual representation. -- -- The result is either a base58 (bitcoin) encoded string of just the 'cidHash' -- value for 'V0' 'CID's, or otherwise a 'Multibase' value at base 'Base58btc' -- of the binary representation of the 'CID' (as produced by 'buildCid'). cidToText :: CID -> Text cidToText cid = decodeUtf8 $ case cidVersion cid of V0 -> BaseN.encodedBytes . BaseN.encodeBase58btc . Multihash.encodedBytes $ cidHash cid V1 -> Multibase.fromMultibase . Multibase.encode . BaseN.encodeBase58btc . LBS.toStrict . Builder.toLazyByteString $ buildCid cid -- Codec ----------------------------------------------------------------------- -- | numerical code of -- the given 'Codec'. codecToCode :: Codec -> Word8 codecToCode Raw = 0x55 codecToCode DagProtobuf = 0x70 codecToCode DagCbor = 0x71 codecToCode GitRaw = 0x78 -- | Attempt to convert from a -- numerical code to a 'Codec'. codecFromCode :: Word8 -> Maybe Codec codecFromCode 0x55 = pure Raw codecFromCode 0x70 = pure DagProtobuf codecFromCode 0x71 = pure DagCbor codecFromCode 0x78 = pure GitRaw codecFromCode _ = Nothing