{-# LANGUAGE FlexibleInstances #-}

{- |
   Maintainer:  simons@cryp.to
   Stability:   provisional
   Portability: portable

   This module provides a generic high-level API to the message digest
   algorithms found in OpenSSL's @crypto@ library. There are two functions of
   particular interest: 'digestByName' and 'digest'. The former can be used to
   retrieve an 'Algorithm', i.e. an OpenSSL object that implements a particular
   algorithm. That type can then be used to compute actual message digests with
   the latter function:

   >>> import Data.ByteString.Char8 ( pack )
   >>> digest (digestByName "md5") (pack "Hello, world.")
   "\b\n\239\131\155\149\250\207s\236Y\147u\233-G"

   Neat pretty-printing can be achieved with 'toHex', which converts the binary
   representation of a message digest into the common hexadecimal one:

   >>> toHex $ digest (digestByName "md5") (pack "Hello, world.")
   "080aef839b95facf73ec599375e92d47"
   >>> toHex $ digest (digestByName "sha1") (pack "Hello, world.")
   "2ae01472317d1935a84797ec1983ae243fc6aa28"

   The precise set of available digest algorithms provided by OpenSSL depends
   on the version of the library installed into the system, obviously, but it's
   reasonable to expect the following algorithms to be present: MD5, RIPEMD160,
   SHA1, SHA224, SHA256, SHA384, and SHA512. If an algorithm is not available,
   'digestByName' will throw an 'DigestAlgorithmNotAvailableInOpenSSL'
   exception. If you don't like exceptions, use the tamer 'digestByName''
   variant:

   >>> digestByName' "i bet this algorithm won't exist"
   Nothing

   'Algorithm' is an instance of 'IsString', so with the proper GHC extensions
   enabled it's possible to simplify the call to 'digest' even further:

   >>> :set -XOverloadedStrings
   >>> toHex $ digest "sha256" (pack "The 'Through the Universe' podcast rules.")
   "73624694a9435095c8fdaad711273a23c02226196c452f817cfd86f965895614"

   Last but not least, 'digest' is actually a class method of 'Digestable',
   which collects things we can compute digests of. The defaults are
   conservative, i.e. we support all things that correspond roughly to C's
   construct of "void pointer plus a length". @digest@ can use with any of the
   following signatures:

   >>> let shape1 = digest :: Algorithm -> (Ptr (),    CSize) -> MessageDigest
   >>> let shape2 = digest :: Algorithm -> (Ptr Word8, CSize) -> MessageDigest
   >>> let shape3 = digest :: Algorithm -> (Ptr Word8, CUInt) -> MessageDigest
   >>> let shape4 = digest :: Algorithm -> (Ptr (),    Int)   -> MessageDigest
   >>> let shape5 = digest :: Algorithm -> StrictByteString   -> MessageDigest
   >>> let shape6 = digest :: Algorithm -> LazyByteString     -> MessageDigest

   'StrictByteString' and 'LazyByteString' are also instances of 'IsString' and
   therefore subject to implicit construction from string literals:

   >>> shape5 "sha256" "hello" == shape6 "sha256" "hello"
   True

   Note that this code offers no overloaded 'digest' version for 'String',
   because that function would produce non-deterministic results for Unicode
   characters. There is an instance for @[Word8]@, though, so strings can be
   hashed after a proper encoding has been applied. For those who don't care
   about determinism, there is the following specialized function:

   >>> toHex $ digestString "md5" "no Digestable instance for this sucker"
   "a74827f849005794565f83fbd68ad189"

   If you don't mind orphaned instances, however, feel free to shoot yourself
   in the foot:

   >>> :set -XFlexibleInstances
   >>> instance Digestable String where updateChunk ctx str = withCStringLen str (updateChunk ctx)
   >>> toHex $ digest "sha256" ("now we can hash strings" :: String)
   "7f2989f173125810aa917c4ffe0e26ae1b5f7fb852274829c210297a43dfc7f9"
-}

module OpenSSL.Digest
  ( -- * Generic digest API
    MessageDigest, digest, Digestable(..), digestByName, digestByName', Algorithm
  , -- * Special instances
    digestString
  , -- * Helper Types and Functions
    toHex, StrictByteString, LazyByteString
  )
  where

import OpenSSL.EVP.Digest
import qualified OpenSSL.Util as Util

import Control.Exception
import qualified Data.ByteString as Strict ( ByteString, packCStringLen, concatMap )
import Data.ByteString.Char8 as Strict8 ( pack )
import qualified Data.ByteString.Lazy as Lazy ( ByteString, toChunks )
import Data.ByteString.Unsafe ( unsafeUseAsCStringLen )
import Foreign
import Foreign.C
import System.IO.Unsafe as IO

-- Generic Class API ----------------------------------------------------------

-- | A message digest is essentially an array of 'Word8' octets.

type MessageDigest = StrictByteString

-- | Compute the given message digest of any 'Digestable' thing, i.e. any type
-- that can be converted /efficiently/ and /unambiguously/ into a continuous
-- memory buffer or a sequence of continuous memory buffers. Note that 'String'
-- does /not/ have that property, because the binary representation chosen for
-- Unicode characters during the marshaling process is determined by the
-- system's locale and is therefore non-deterministic.

digest :: Digestable a => Algorithm -> a -> MessageDigest
digest :: forall a. Digestable a => Algorithm -> a -> MessageDigest
digest Algorithm
algo a
input =
  forall a. IO a -> a
IO.unsafePerformIO forall a b. (a -> b) -> a -> b
$
    forall a b c. IO a -> (a -> IO b) -> (a -> IO c) -> IO c
bracket IO Context
newContext Context -> IO ()
freeContext forall a b. (a -> b) -> a -> b
$ \Context
ctx -> do
      Algorithm -> Context -> IO ()
initDigest Algorithm
algo Context
ctx
      forall a. Digestable a => Context -> a -> IO ()
updateChunk Context
ctx a
input
      let mdSize :: Int
mdSize = forall a b. (Integral a, Num b) => a -> b
fromIntegral (Algorithm -> Int
digestSize Algorithm
algo)
      forall a b. Storable a => Int -> (Ptr a -> IO b) -> IO b
allocaArray Int
mdSize forall a b. (a -> b) -> a -> b
$ \Ptr Word8
md -> do
        Context -> Ptr Word8 -> IO ()
finalizeDigest Context
ctx Ptr Word8
md
        CStringLen -> IO MessageDigest
Strict.packCStringLen (forall a b. Ptr a -> Ptr b
castPtr Ptr Word8
md, Int
mdSize)

-- | A class of things that can be part of a digest computations. By default,
-- we define instances only for various representations of plain memory
-- buffers, but in theory that class can be extended to contain all kinds of
-- complex data types.

class Digestable a where
  updateChunk :: Context -> a -> IO ()

instance Digestable (Ptr a, CSize) where
  {-# INLINE updateChunk #-}
  updateChunk :: Context -> (Ptr a, CSize) -> IO ()
updateChunk Context
ctx = forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry (forall a. Context -> Ptr a -> CSize -> IO ()
updateDigest Context
ctx)

instance Digestable (Ptr a, CUInt) where
  {-# INLINE updateChunk #-}
  updateChunk :: Context -> (Ptr a, CUInt) -> IO ()
updateChunk Context
ctx = forall a. Digestable a => Context -> a -> IO ()
updateChunk Context
ctx forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall a b. (Integral a, Num b) => a -> b
fromIntegral :: CUInt -> CSize)

instance Digestable (Ptr a, CInt) where
  {-# INLINE updateChunk #-}
  updateChunk :: Context -> (Ptr a, CInt) -> IO ()
updateChunk Context
ctx = forall a. Digestable a => Context -> a -> IO ()
updateChunk Context
ctx forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall a b. (Integral a, Num b) => a -> b
fromIntegral :: CInt -> CSize)

instance Digestable (Ptr a, Int) where
  {-# INLINE updateChunk #-}
  updateChunk :: Context -> (Ptr a, Int) -> IO ()
updateChunk Context
ctx = forall a. Digestable a => Context -> a -> IO ()
updateChunk Context
ctx forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall a b. (Integral a, Num b) => a -> b
fromIntegral :: Int -> CSize)

instance Digestable [Word8] where
  {-# INLINE updateChunk #-}
  updateChunk :: Context -> [Word8] -> IO ()
updateChunk Context
ctx [Word8]
buf = forall a b. Storable a => [a] -> (Int -> Ptr a -> IO b) -> IO b
withArrayLen [Word8]
buf forall a b. (a -> b) -> a -> b
$ \Int
len Ptr Word8
ptr -> forall a. Digestable a => Context -> a -> IO ()
updateChunk Context
ctx (Ptr Word8
ptr,Int
len)

instance Digestable StrictByteString where
  {-# INLINE updateChunk #-}
  updateChunk :: Context -> MessageDigest -> IO ()
updateChunk Context
ctx MessageDigest
str = forall a. MessageDigest -> (CStringLen -> IO a) -> IO a
unsafeUseAsCStringLen MessageDigest
str (forall a. Digestable a => Context -> a -> IO ()
updateChunk Context
ctx)

instance Digestable LazyByteString where
  {-# INLINE updateChunk #-}
  updateChunk :: Context -> LazyByteString -> IO ()
updateChunk Context
ctx = forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ (forall a. Digestable a => Context -> a -> IO ()
updateChunk Context
ctx) forall b c a. (b -> c) -> (a -> b) -> a -> c
. LazyByteString -> [MessageDigest]
Lazy.toChunks

-- |We do /not/ define a 'Digestable' instance for 'String', because there is
-- no one obviously correct way to encode Unicode characters for purposes of
-- calculating a digest. We have, however, this specialized function which
-- computes a digest over a @String@ by means of 'withCStrinLen'. This means
-- that the representation of Unicode characters depends on the process locale
-- a.k.a. it's non-deterministc!
--
-- >>> toHex $ digestString (digestByName "sha1") "Hello, world."
-- "2ae01472317d1935a84797ec1983ae243fc6aa28"

digestString :: Algorithm -> String -> MessageDigest
digestString :: Algorithm -> String -> MessageDigest
digestString Algorithm
algo String
str = forall a. IO a -> a
IO.unsafePerformIO forall a b. (a -> b) -> a -> b
$
  forall a. String -> (CStringLen -> IO a) -> IO a
withCStringLen String
str (forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Digestable a => Algorithm -> a -> MessageDigest
digest Algorithm
algo)

-- Helper functions -----------------------------------------------------------

-- | Synonym for the strict 'Strict.ByteString' variant to improve readability.

type StrictByteString = Strict.ByteString

-- | Synonym for the lazy 'Lazy.ByteString' variant to improve readability.

type LazyByteString = Lazy.ByteString

-- | Pretty-print a given message digest from binary into hexadecimal
-- representation.
--
-- >>> toHex (Data.ByteString.pack [0..15])
-- "000102030405060708090a0b0c0d0e0f"

toHex :: MessageDigest -> StrictByteString
toHex :: MessageDigest -> MessageDigest
toHex = (Word8 -> MessageDigest) -> MessageDigest -> MessageDigest
Strict.concatMap (String -> MessageDigest
pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word8 -> String
Util.toHex)