-- Copyright (c) 2014 Eric McCorkle.  All rights reserved.
--
-- Redistribution and use in source and binary forms, with or without
-- modification, are permitted provided that the following conditions
-- are met:
--
-- 1. Redistributions of source code must retain the above copyright
--    notice, this list of conditions and the following disclaimer.
--
-- 2. Redistributions in binary form must reproduce the above copyright
--    notice, this list of conditions and the following disclaimer in the
--    documentation and/or other materials provided with the distribution.
--
-- 3. Neither the name of the author nor the names of any contributors
--    may be used to endorse or promote products derived from this software
--    without specific prior written permission.
--
-- THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS''
-- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-- TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-- PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS
-- OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-- USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-- OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-- OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-- SUCH DAMAGE.
{-# OPTIONS_GHC -Wall -Werror #-}

-- | Facilities for using @Encoding@s as binary serializers.  The
-- resulting binary format is, for the most part, determined by the
-- @Encoding@, and therefore is within a constant factor of
-- succintness.
--
-- In all cases, little-endian byte ordering is used in order to allow
-- for very large data to be read in an decoded lazily.  (Note:
-- Haskell's libraries do not provide support for this functionality
-- at this time).
--
-- For finite @Encoding@s, the binary format is just the little-endian
-- encoding of the encoded value, using as few bytes as necessary to
-- represent the largest encoded value.
--
-- For infinite @Encoding@s, the binary format includes a length field
-- for most values.  The current encoding uses length fields of
-- different sizes, depending on the size of the encoded value.
module Data.ArithEncode.Binary(
       getWithEncoding,
       putWithEncoding
       ) where

import Data.ArithEncode.Basic
import Data.Binary.Put
import Data.Binary.Get hiding (remaining)
import Data.Bits
import Math.NumberTheory.Logarithms

-- Read in a natural number as a sequence of some number of bytes
getNatural :: Int -> Get Integer
getNatural bytes =
  let
    getNatural' :: Integer -> Int -> Get Integer
    getNatural' accum count
      | count + 8 < bytes =
        do
          input <- getWord64le
          getNatural' ((toInteger input `shiftL` (count * 8)) .|. accum) (count + 8)
      | count + 4 < bytes =
        do
          input <- getWord32le
          getNatural' ((toInteger input `shiftL` (count * 8)) .|. accum) (count + 4)
      | count + 2 < bytes =
        do
          input <- getWord16le
          getNatural' ((toInteger input `shiftL` (count * 8)) .|. accum) (count + 2)
      | count < bytes =
        do
          input <- getWord8
          getNatural' ((toInteger input `shiftL` (count * 8)) .|. accum) (count + 1)
      | otherwise = return accum
  in
    getNatural' 0 0

-- | Use an @Encoding@ to extract a @ty@ from binary data.
getWithEncoding :: Encoding ty
                -- ^ The encoding to use.
                -> Get ty
getWithEncoding enc =
  case size enc of
    Just 0 -> error "Cannot decode with empty encoding"
    -- For the degenerate case of a singleton, no need to encode anything at all
    Just 1 -> return (decode enc 0)
    -- Otherwise store the natural as a sequence of bytes.  We store
    -- this in little-endian order to allow lazy handling of very large
    -- numbers.
    Just finitesize ->
      let
        bytes = ((integerLog2 (finitesize - 1)) `quot` 3) + 1
      in do
        encoded <- getNatural bytes
        return (decode enc encoded)
    -- Arbitrary-length naturals are encoded with a more complex
    -- scheme.  The first two bits are a tag, which tells how to
    -- interpret the rest.
    Nothing ->
      do
        firstbyte <- lookAhead getWord8
        encoded <-
          case firstbyte .&. 0x03 of
            -- Naturals less than 64 get packed into the same byte as the tag
            0x0 ->
              do
                datafield <- getWord8
                return (toInteger (datafield `shiftR` 2))
            -- One-byte length field, and then up to 64 bytes of data
            0x1 ->
              do
                lenfield <- getWord8
                getNatural (fromIntegral (lenfield `shiftR` 2) + 1)
            -- Two-byte length field, and then up to 16384 bytes of data
            0x2 ->
              do
                lenfield <- getWord16le
                getNatural (fromIntegral (lenfield `shiftR` 2) + 1)
            -- Eight-byte length field, and then data
            0x3 ->
              do
                lenfield <- getWord64le
                getNatural (fromIntegral (lenfield `shiftR` 2) + 1)
            _ -> error "Impossible case"
        return (decode enc encoded)

-- Emit a natural number as a sequence of some number of bytes
putNatural :: Int -> Integer -> Put
putNatural 0 0 = return ()
putNatural 0 _ = error "Data remaining at end of encoding"
putNatural remaining natural
  | remaining > 8 =
    let
      output = fromInteger (natural .&. 0xffffffffffffffff)
      rest = natural `shiftR` 64
    in do
      putWord64le output
      putNatural (remaining - 8) rest
  | remaining > 4 =
    let
      output = fromInteger (natural .&. 0xffffffff)
      rest = natural `shiftR` 32
    in do
      putWord32le output
      putNatural (remaining - 4) rest
  | remaining > 2 =
    let
      output = fromInteger (natural .&. 0xffff)
      rest = natural `shiftR` 16
    in do
      putWord16le output
      putNatural (remaining - 2) rest
  | otherwise =
    let
      output = fromInteger (natural .&. 0xff)
      rest = natural `shiftR` 8
    in do
      putWord8 output
      putNatural (remaining - 1) rest

-- | Use an @Encoding@ to write a @ty@ out as binary data.
putWithEncoding :: Encoding ty
                -- ^ The encoding to use.
                -> ty
                -- ^ The value to encode.
                -> Put
putWithEncoding enc val =
  case size enc of
    Just 0 -> error "Cannot encode with empty encoding"
    -- For the degenerate case of a singleton, no need to encode anything at all
    Just 1 -> return ()
    -- Otherwise store the natural as a sequence of bytes.  We store
    -- this in little-endian order to allow lazy handling of very large
    -- numbers.
    Just finitesize ->
      let
        bytes = ((integerLog2 (finitesize - 1)) `quot` 3) + 1
        encoded = encode enc val
      in
        putNatural bytes encoded
    Nothing ->
      let
        encoded = encode enc val
      in
        if encoded < 64
          then putWord8 (fromInteger encoded `shiftL` 2)
          else
            let
              bytes = ((integerLog2 (encoded - 1)) `quot` 3) + 1
            in do
              if bytes <= 64
                then putWord8 (fromIntegral (((bytes - 1) `shiftL` 2) .|. 0x1))
                else if bytes <= 16384
                  then putWord16le (fromIntegral (((bytes - 1) `shiftL` 2) .|. 0x2))
                  else putWord64le (fromIntegral (((bytes - 1) `shiftL` 2) .|. 0x3))
              putNatural bytes encoded