{-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE TypeApplications #-} -- -- Glossary -- -- CHK - An encryption and encoding scheme for storing immutable data. -- -- Data - The plaintext used to construct CHK. -- -- Share - One complete unit of encrypted and FEC encoded data. -- -- Segment - One piece of ciphertext used to construct CHK. All segments -- belonging to a CHK are the same size except the last one may be short. -- -- k, required - The number of "primary" erasure-encoding outputs. Equal to -- the minimum number of erasure-encoding outputs needed to reconstruct -- the erasure-encoding input. -- -- n, total - The total number of erasure-encoding outputs. Always greater -- than or equal to required. -- -- Block - One output resulting from erasure-encoding one segment using -- required, total. If necessary, the input segment is nul-padded so its -- size is a multiple of required. -- -- Plaintext Hash Tree - Not actually implemented by Tahoe so I'm not really -- sure. Probably something like a sha256d merkle tree where the leaves -- are hashes of the plaintext corresponding to each ciphertext segment. -- Since all shares for a CHK are derived from the same plaintext, every -- share has the same plaintext hash tree. -- -- Crypttext Hash Tree - A sha256d merkle tree where the leaves are hashes of -- the ciphertext segments. Since all shares for a CHK are derived from -- the same ciphertext, every share has the same ciphertext hash tree. -- -- Crypttext Root Hash - The hash at the root of Crypttext Hash Tree. -- -- Block Hash Tree - A sha256d merkle tree where the leaves are hashes of the -- blocks. Since the erasure-encoding output is different for each share, -- every share has a different block hash tree. -- -- Share Hash Tree - A sha256d merkle tree where the leaves are the root -- hashes of the block hash trees for all shares. -- -- Share Hashes - A list of hashes from the Share Hash Tree which are required -- to verify one block hash tree. Each share includes the Share Hashes -- required to verify the Block Hash Tree contained within that share. -- Since every share contains a different Block Hash Tree, every share -- contains a different list of Share Hashes. Each Share Hash in this -- list is accompanied by information about its position in the Share Hash -- Tree though it may not be strictly required (since it could be inferred -- from position in the list). -- -- URI Extension - A collection of metadata describing the encryption and -- encoding used to create the CHK and (largely) necessary for either -- decoding or verifying the integrity of the contained data. module Tahoe.CHK ( zfec, zunfec, encode, decode, padCiphertext, segmentCiphertext, ) where import qualified Codec.FEC as ZFEC import Crypto.Cipher.AES128 ( AESKey128, ) import Data.Int (Int64) import Data.Word (Word64) -- import Debug.Trace import Crypto.Hash ( Context, HashAlgorithm (hashDigestSize), SHA256 (SHA256), hashFinalize, hashInit, hashUpdate, ) import Data.Bifunctor (first, second) import qualified Data.ByteArray as BA import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as LB import Data.Foldable (foldlM) import Data.List (sort, transpose) import Data.List.Extra ( snoc, ) import Data.Maybe (fromJust) import qualified Tahoe.CHK.Capability as Cap import Tahoe.CHK.Crypto ( blockHash, ciphertextSegmentHash, ciphertextTag, sha256, uriExtensionHash, ) import Tahoe.CHK.Merkle ( MerkleTree, buildTreeOutOfAllTheNodes, leafHashes, leafNumberToNodeNumber, makeTreePartial, neededHashes, rootHash, ) import Tahoe.CHK.Share (Share (..)) import Tahoe.CHK.Types ( BlockHash, CrypttextHash, Parameters (..), Required, requiredToInt, totalToInt, ) import Tahoe.CHK.URIExtension ( URIExtension (..), ) import Tahoe.Netstring ( netstring, ) import Tahoe.Util ( ceilDiv, chunkedBy, nextMultipleOf, nextPowerOf, ) -- | Erasure encode some bytes using ZFEC. zfec :: -- | The number of outputs that will be required to reverse the encoding. -- Also known as `k`. Int -> -- | The total number of outputs to produce. Also known as `n`. Int -> -- | Application data to divide into encoding inputs. B.ByteString -> -- | `n` encoding outputs. IO [B.ByteString] zfec k n segment = pure $ chunks ++ ZFEC.encode (ZFEC.fec k n) chunks where chunks_ = chunkedBy (B.length segment `div` k) segment _msg = "zfec" <> " k=" <> show k <> " n=" <> show n <> ", segment len " <> show (B.length segment) <> ", chunk lengths " <> show (map B.length chunks_) <> ", segment " <> show segment <> "-> chunks " <> show chunks_ chunks = {- trace _msg -} chunks_ -- | Version of `zfec` that operates on lazy ByteStrings. zfecLazy :: Int -> Int -> LB.ByteString -> IO [LB.ByteString] zfecLazy k n segment = (LB.fromStrict <$>) <$> zfec k n (LB.toStrict segment) -- | Erasure decode some bytes using ZFEC. zunfec :: -- | The `k` parameter used when encoding the data to decode. Int -> -- | The `n` parameter used when encoding the data to decode. Int -> -- | The encoding outputs annotated with their position (or "share number"). [(Int, B.ByteString)] -> -- | The bytes which were originally encoded. IO B.ByteString zunfec k n blocks = pure $ B.concat (ZFEC.decode (ZFEC.fec k n) blocks) -- | Version of `zunfec` that operates on lazy ByteStrings. zunfecLazy :: Int -> Int -> [(Int, LB.ByteString)] -> IO LB.ByteString zunfecLazy k n blocks = do segment_ <- LB.fromStrict <$> zunfec k n (second LB.toStrict <$> blocks) let _msg = "zunfec" <> " k=" <> show k <> " n=" <> show n <> " blocks=" <> show blocks <> " -> segment " <> show segment_ -- pure (trace _msg) segment_ pure segment_ {- | Represent progress encoding some ciphertext into a CHK share. This carries along intermediate hash values used at the end to build extra self-authenticating fields into the share. -} data EncodingState = CPState { -- A single hash of all crypttext segments encoded so far. cpCrypttextHash :: Crypto.Hash.Context Crypto.Hash.SHA256 , -- A list of hashes of each ciphertext segment encoded so far cpCrypttextHashes :: [CrypttextHash] , -- Hashes of blocks encoded so far. cpBlockHashes :: [[BlockHash]] , -- Blocks encoded so far. cpBlocks :: [[LB.ByteString]] } -- | The initial state for CHK encoding. initEncodingState :: EncodingState initEncodingState = CPState { cpCrypttextHash = hashUpdate (hashInit :: Context SHA256) (netstring ciphertextTag) , cpCrypttextHashes = mempty , cpBlockHashes = mempty , cpBlocks = mempty } {- | Split a full ciphertext string into the separate ciphertext segments required by most of CHK encoding. -} segmentCiphertext :: -- | The encoding parameters which determine how to split the ciphertext. Parameters -> -- | The ciphertext. LB.ByteString -> -- | The segments. [LB.ByteString] segmentCiphertext Parameters{paramSegmentSize} ciphertext = result where result = {- trace ("segmentCiphertext: " <> show ciphertext) -} result_ result_ = LB.fromStrict <$> chunkedBy (fromIntegral paramSegmentSize) (LB.toStrict ciphertext) {- | Process ciphertext into blocks, carrying hashes computed along the way as state. -} processCiphertext :: Parameters -> [LB.ByteString] -> IO EncodingState processCiphertext Parameters{paramRequiredShares, paramTotalShares} = foldlM processSegment initEncodingState where processSegment CPState{..} segment = do -- Produce the FEC blocks for this piece of ciphertext. blocks <- zfecLazy (requiredToInt paramRequiredShares) (totalToInt paramTotalShares) (padCiphertext paramRequiredShares segment) pure $ CPState { cpCrypttextHash = hashUpdate cpCrypttextHash (LB.toStrict segment) , cpCrypttextHashes = snoc cpCrypttextHashes (ciphertextSegmentHash (LB.toStrict segment)) , cpBlockHashes = snoc cpBlockHashes (blockHash . LB.toStrict <$> blocks) , cpBlocks = snoc cpBlocks blocks } -- Compute the correctly padded ciphertext. The only ciphertext which is -- expected to require padding is the final segment - in case the original -- ciphertext did not have a length that was a multiple of the `required` -- parameter. -- -- allmydata.immutable.encode.Encoder._gather_data NUL pads up to num_chunks -- times input_chunk_size. num_chunks is our requiredShares. -- input_chunk_size is taken from codec.get_block_size() which returns -- codec.share_size. share_size is div_ceil(data_size, required_shares). -- data_size is our segmentSize and required_shares is our requiredShares. padCiphertext :: Required -> LB.ByteString -> LB.ByteString padCiphertext requiredShares bs | paddingLength > 0 = bs <> LB.replicate paddingLength 0x00 | otherwise = bs where desiredLength = nextMultipleOf requiredShares (LB.length bs) paddingLength = desiredLength - LB.length bs {- | Encode some application data (typically ciphertext, but this function only weakly assumes this is the case) into some CHK shares. This replaces much of allmydata.immutable.encode. -} encode :: -- | The encryption/decryption key. AESKey128 -> -- | The ZFEC parameters for this encoding. This determines how many shares -- will come out of this function. Parameters -> -- | The data to encode. This is typically ciphertext. LB.ByteString -> -- | An IO which can be evaluated to get the encoded share data and the -- read capability. The number of Shares will equal the `total` value -- from the given Parameters. IO ([Share], Cap.Reader) encode readKey initParams@(Parameters maximumSegmentSize total _ required) ciphertext = processCiphertext p (segmentCiphertext p ciphertext) >>= \CPState{..} -> let -- The number of segments encoded in the share. There are the same number -- of plaintext and ciphertext segments and this is also the number of -- blocks in each share (though each share may have a different _value_ -- for each block). -- -- allmydata.immutable.encode.Encoder._got_all_encoding_parameters numSegments = length cpBlocks -- Our merkle trees need a number of leaves equal to a power of 2. -- Compute that here so we can pad as necessary. -- -- allmydata.immutable.layout.WriteBucketProxy effectiveSegments = nextPowerOf 2 numSegments -- XXX Unused by Tahoe so we don't even try for a sensible value right -- now. Just fill it with zeros. -- -- As long as we calculate a valid number of nodes for a tree -- buildTreeOutOfAllTheNodes won't give us a Nothing back ... cross -- your fingers. Just plaintextHashTree = buildTreeOutOfAllTheNodes -- We have to fill the *whole* tree with nul, not just the -- leaves. Compute the total number of nodes in a tree that -- can hold our number of segments. . replicate (2 * effectiveSegments - 1) -- And make every node all nul. $ B.replicate (hashDigestSize SHA256) 0 -- The merkle tree of ciphertext segment hashes. crypttextHashTree = makeTreePartial cpCrypttextHashes -- shareTree is a MerkleTree of MerkleTree shareTree = -- trace ("shareTree: " <> show shareTree') shareTree' where shareTree' = makeShareTree . map makeTreePartial . transpose $ cpBlockHashes -- A bag of additional metadata about the share and encoded object. uriExtension = URIExtension { uriExtCodecName = "crs" , uriExtCodecParams = p -- trace ("Params: " <> show p) p , uriExtSize = fromIntegral $ LB.length ciphertext , uriExtSegmentSize = segmentSize , uriExtNeededShares = required , uriExtTotalShares = total , uriExtNumSegments = numSegments , uriExtTailCodecParams = tailParams p (LB.length ciphertext) , uriExtCrypttextHash = makeCrypttextHash cpCrypttextHash , uriExtCrypttextRootHash = makeCrypttextRootHash cpCrypttextHashes , uriExtShareRootHash = rootHash shareTree } -- The read capability for the encoded object. cap = Cap.makeReader readKey (uriExtensionHash uriExtension) required total (fromIntegral $ LB.length ciphertext) toShare sharenum blocks blockHashes = Share { shareBlockSize = shareBlockSize p , shareDataSize = fromIntegral $ LB.length ciphertext `ceilDiv` fromIntegral required , shareBlocks = blocks , sharePlaintextHashTree = plaintextHashTree , shareCrypttextHashTree = crypttextHashTree , shareBlockHashTree = makeTreePartial blockHashes , shareNeededHashes = sort . fmap (first fromIntegral) $ computeNeededShares shareTree sharenum , shareURIExtension = uriExtension } -- The size in bytes of one erasure-encoded block of data. -- allmydata.immutable.encode.Encoder._got_all_encoding_parameters + -- allmydata.codec.CRSEncoder.set_params shareBlockSize :: Parameters -> Word64 shareBlockSize Parameters{paramSegmentSize, paramRequiredShares} = fromIntegral paramSegmentSize `ceilDiv` fromIntegral paramRequiredShares in pure ( zipWith3 toShare [0 ..] (transpose cpBlocks) (transpose cpBlockHashes) , cap ) where -- If we have little enough ciphertext, the maximum configured segment -- size may be greater than the length of the single segment we produce. -- Segment size is also required to be a multiple of the number of -- required shares so that segments can be evenly divided across the -- shares. p@(Parameters segmentSize _ _ required') = initParams { paramSegmentSize = nextMultipleOf required' $ min maximumSegmentSize (fromIntegral $ LB.length ciphertext) } {- | Decode some CHK shares to recover some application data. This is roughly the inverse of ``encode``. -} decode :: -- | The read capability for the application data. Cap.Reader -> -- | At least as many shares as are required to erasure decode the -- ciphertext. [(Int, Share)] -> -- | An action that results in the ciphertext contained by the shares if -- it is possible to recover it, or Nothing. IO (Maybe LB.ByteString) decode Cap.Reader{verifier = Cap.Verifier{required, total, size}} shares | size > fromIntegral @Int64 @Integer maxBound = pure Nothing | length shares < fromIntegral required = pure Nothing | otherwise = do let -- Enough shares to satisfy the ZFEC decoder. enoughShares = take (fromIntegral required) shares -- A list of erasure encoded blocks and positional information. -- The outer list gives a share number along with all of the -- blocks held in that share. blocks :: [(Int, [LB.ByteString])] blocks = second shareBlocks <$> enoughShares -- The outer is corresponds to erasure-encoded segments. The -- order corresponds to the order of the segments from the -- original input. Each inner list contains enough blocks to be -- erasure-decoded back to a segment. explodedBlocks :: [[(Int, LB.ByteString)]] explodedBlocks = transpose $ fixBlocks <$> blocks -- Figure out how many bytes are expected to be in each segment. -- Depending on the ZFEC encoding parameters, it is possible that -- we will end up with blocks that are not completely "filled" -- with real data. When these are decoded, we will get _extra_ -- bytes in the result. By knowing how many bytes were originally -- in our segments, we can recognize and discard these extra -- bytes. segSize = fromIntegral . paramSegmentSize . uriExtCodecParams . shareURIExtension . snd . head $ enoughShares -- A helper that knows the correct parameters to do ZFEC decoding -- for us. zunfec' = (LB.take segSize <$>) . zunfecLazy (fromIntegral required) (fromIntegral total) -- Decode every group of blocks back to the original segments. segments <- mapM zunfec' explodedBlocks -- Combine the segments and perform one more truncation to get the -- complete result. Above where we computed segSize we weren't -- careful to find the tail segment size for use with the tail segment -- so there might still be some extra bytes in the `segments` list -- here. This additional truncation addresses that. pure $ Just . LB.take (fromIntegral size) . LB.concat $ segments where -- Project the share number out across all of that share's blocks. The -- result is something we can transpose into the correct form for ZFEC -- decoding. fixBlocks :: (Int, [LB.ByteString]) -> [(Int, LB.ByteString)] fixBlocks (sharenum, bs) = zip (repeat sharenum) bs makeShareTree :: [MerkleTree] -> MerkleTree makeShareTree = makeTreePartial . map rootHash makeCrypttextHash :: Context SHA256 -> CrypttextHash makeCrypttextHash = sha256 . toBytes . hashFinalize where toBytes = B.pack . BA.unpack makeCrypttextRootHash :: [CrypttextHash] -> CrypttextHash makeCrypttextRootHash = rootHash . makeTreePartial -- Construct the encoding parameters for the final segment which may be -- smaller than the earlier segments (if the size of the data to be encoded is -- not a multiple of the segment size). -- allmydata.immutable.encode.Encoder._got_all_encoding_parameters tailParams :: Integral a => Parameters -> a -> Parameters tailParams p@Parameters{paramSegmentSize, paramRequiredShares} dataSize = p{paramSegmentSize = nextMultipleOf paramRequiredShares tailSize'} where tailSize' = if tailSize == 0 then paramSegmentSize else tailSize tailSize = fromIntegral dataSize `mod` paramSegmentSize {- | Determine the node numbers of the share tree which are required to verify the indicated share number. The indicated share number is included in the result, as are the corresponding hashes from the given tree. -} computeNeededShares :: MerkleTree -> Int -> [(Int, B.ByteString)] computeNeededShares shareTree sharenum = -- In addition to what neededHashes computes we also need to include this -- share's own block hash root in the result. Shove it on the front of -- the result here. This will place it out of order so we'll fix it up -- below when we construct the Share. We also have to translate between -- zero-indexed share numbers and 1-indexed leaf numbers. -- -- Is fromJust here safe? neededHashes returns Nothing when it fails to -- compute a merkle proof. Given the way we're using it, that can -- probably only happen if there's a bug inside neededHashes (as opposed -- to our passing in some value it doesn't want to provide a result for). (leafNumberToNodeNumber shareTree sharenum - 1, blockHashRoot shareTree sharenum) : fromJust (neededHashes shareTree sharenum) -- | Find the nth leaf hash in the given tree. blockHashRoot :: MerkleTree -> Int -> B.ByteString blockHashRoot tree n | n < 0 = error "Cannot have a negative leaf number" | n >= length leafs = error "Leaf number goes past the end of the tree" | otherwise = leafs !! n where leafs = leafHashes tree