{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE TypeSynonymInstances #-} {- | Maintainer: simons@cryp.to Stability: provisional Portability: portable This module provides a generic high-level API to the message digest algorithms found in OpenSSL's @crypto@ library. There are two functions of particular interest: 'digestByName' and 'digest'. The former can be used to retrieve an 'Algorithm', i.e. an OpenSSL object that implements a particular algorithm. That type can then be used to compute actual message digests with the latter function: >>> import Data.ByteString.Char8 ( pack ) >>> digest (digestByName "md5") (pack "Hello, world.") "\b\n\239\131\155\149\250\207s\236Y\147u\233-G" Neat pretty-printing can be achieved with 'toHex', which converts the binary representation of a message digest into the common hexadecimal one: >>> toHex $ digest (digestByName "md5") (pack "Hello, world.") "080aef839b95facf73ec599375e92d47" >>> toHex $ digest (digestByName "sha1") (pack "Hello, world.") "2ae01472317d1935a84797ec1983ae243fc6aa28" The precise set of available digest algorithms provided by OpenSSL depends on the version of the library installed into the system, obviously, but it's reasonable to expect the following algorithms to be present: MD5, RIPEMD160, SHA1, SHA224, SHA256, SHA384, and SHA512. If an algorithm is not available, 'digestByName' will throw an 'DigestAlgorithmNotAvailableInOpenSSL' exception. If you don't like exceptions, use the tamer 'digestByName'' variant: >>> digestByName' "i bet this algorithm won't exist" Nothing 'Algorithm' is an instance of 'IsString', so with the proper GHC extensions enabled it's possible to simplify the call to 'digest' even further: >>> :set -XOverloadedStrings >>> toHex $ digest "sha256" (pack "The 'Through the Universe' podcast rules.") "73624694a9435095c8fdaad711273a23c02226196c452f817cfd86f965895614" Last but not least, 'digest' is actually a class method of 'Digestable', which collects things we can compute digests of. The defaults are conservative, i.e. we support all things that correspond roughly to C's construct of "void pointer plus a length". @digest@ can use with any of the following signatures: >>> let shape1 = digest :: Algorithm -> (Ptr (), CSize) -> MessageDigest >>> let shape2 = digest :: Algorithm -> (Ptr Word8, CSize) -> MessageDigest >>> let shape3 = digest :: Algorithm -> (Ptr Word8, CUInt) -> MessageDigest >>> let shape4 = digest :: Algorithm -> (Ptr (), Int) -> MessageDigest >>> let shape5 = digest :: Algorithm -> StrictByteString -> MessageDigest >>> let shape6 = digest :: Algorithm -> LazyByteString -> MessageDigest 'StrictByteString' and 'LazyByteString' are also instances of 'IsString' and therefore subject to implicit construction from string literals: >>> shape5 "sha256" "hello" == shape6 "sha256" "hello" True Note that this code offers no overloaded 'digest' version for 'String', because that function would produce non-deterministic results for Unicode characters. There is an instance for @[Word8]@, though, so strings can be hashed after a proper encoding has been applied. For those who don't care about determinism, there is the following specialized function: >>> toHex $ digestString "md5" "no Digestable instance for this sucker" "a74827f849005794565f83fbd68ad189" If you don't mind orphaned instances, however, feel free to shoot yourself in the foot: >>> :set -XFlexibleInstances >>> instance Digestable String where updateChunk ctx str = withCStringLen str (updateChunk ctx) >>> toHex $ digest "sha256" ("now we can hash strings" :: String) "7f2989f173125810aa917c4ffe0e26ae1b5f7fb852274829c210297a43dfc7f9" -} module OpenSSL.Digest ( -- * Generic digest API MessageDigest, digest, Digestable(..), digestByName, digestByName', Algorithm , -- * Special instances digestString , -- * Helper Types and Functions toHex, StrictByteString, LazyByteString ) where import OpenSSL.EVP.Digest import qualified OpenSSL.Util as Util import Control.Exception import qualified Data.ByteString as Strict ( ByteString, packCStringLen, concatMap ) import Data.ByteString.Char8 as Strict8 ( pack ) import qualified Data.ByteString.Lazy as Lazy ( ByteString, toChunks ) import Data.ByteString.Unsafe ( unsafeUseAsCStringLen ) import Foreign import Foreign.C import System.IO.Unsafe as IO -- Generic Class API ---------------------------------------------------------- -- | A message digest is essentially an array of 'Word8' octets. type MessageDigest = StrictByteString -- | Compute the given message digest of any 'Digestable' thing, i.e. any type -- that can be converted /efficiently/ and /unambiguously/ into a continuous -- memory buffer or a sequence of continuous memory buffers. Note that 'String' -- does /not/ have that property, because the binary representation chosen for -- Unicode characters during the marshaling process is determined by the -- system's locale and is therefore non-deterministic. digest :: Digestable a => Algorithm -> a -> MessageDigest digest algo input = IO.unsafePerformIO $ bracket newContext freeContext $ \ctx -> do initDigest algo ctx updateChunk ctx input let mdSize = fromIntegral (digestSize algo) allocaArray mdSize $ \md -> do finalizeDigest ctx md Strict.packCStringLen (castPtr md, mdSize) -- | A class of things that can be part of a digest computations. By default, -- we define instances only for various representations of plain memory -- buffers, but in theory that class can be extended to contain all kinds of -- complex data types. class Digestable a where updateChunk :: Context -> a -> IO () instance Digestable (Ptr a, CSize) where {-# INLINE updateChunk #-} updateChunk ctx = uncurry (updateDigest ctx) instance Digestable (Ptr a, CUInt) where {-# INLINE updateChunk #-} updateChunk ctx = updateChunk ctx . fmap (fromIntegral :: CUInt -> CSize) instance Digestable (Ptr a, CInt) where {-# INLINE updateChunk #-} updateChunk ctx = updateChunk ctx . fmap (fromIntegral :: CInt -> CSize) instance Digestable (Ptr a, Int) where {-# INLINE updateChunk #-} updateChunk ctx = updateChunk ctx . fmap (fromIntegral :: Int -> CSize) instance Digestable [Word8] where {-# INLINE updateChunk #-} updateChunk ctx buf = withArrayLen buf $ \len ptr -> updateChunk ctx (ptr,len) instance Digestable StrictByteString where {-# INLINE updateChunk #-} updateChunk ctx str = unsafeUseAsCStringLen str (updateChunk ctx) instance Digestable LazyByteString where {-# INLINE updateChunk #-} updateChunk ctx = mapM_ (updateChunk ctx) . Lazy.toChunks -- |We do /not/ define a 'Digestable' instance for 'String', because there is -- no one obviously correct way to encode Unicode characters for purposes of -- calculating a digest. We have, however, this specialized function which -- computes a digest over a @String@ by means of 'withCStrinLen'. This means -- that the representation of Unicode characters depends on the process locale -- a.k.a. it's non-deterministc! -- -- >>> toHex $ digestString (digestByName "sha1") "Hello, world." -- "2ae01472317d1935a84797ec1983ae243fc6aa28" digestString :: Algorithm -> String -> MessageDigest digestString algo str = IO.unsafePerformIO $ withCStringLen str (return . digest algo) -- Helper functions ----------------------------------------------------------- -- | Synonym for the strict 'Strict.ByteString' variant to improve readability. type StrictByteString = Strict.ByteString -- | Synonym for the lazy 'Lazy.ByteString' variant to improve readability. type LazyByteString = Lazy.ByteString -- | Pretty-print a given message digest from binary into hexadecimal -- representation. -- -- >>> toHex (Data.ByteString.pack [0..15]) -- "000102030405060708090a0b0c0d0e0f" toHex :: MessageDigest -> StrictByteString toHex = Strict.concatMap (pack . Util.toHex)