{-# LANGUAGE OverloadedStrings #-}

module Subversion.Dump.Raw
       ( FieldMap
       , Entry(..)
       , readInt
       , readSvnDumpRaw
       ) where

import           Control.Applicative hiding (many)
import           Control.Monad
import qualified Data.Attoparsec.Char8 as AC
import           Data.Attoparsec.Combinator
import           Data.Attoparsec.Lazy as AL
import           Data.ByteString as B hiding (map)
import qualified Data.ByteString.Lazy as BL hiding (map)
import qualified Data.List as L
import           Data.Maybe
import           Data.Word (Word8)

import           Prelude hiding (getContents)

default (ByteString)

type FieldMap = [(ByteString, ByteString)]

data Entry = Entry { entryTags  :: FieldMap
                   , entryProps :: FieldMap
                   , entryBody  :: BL.ByteString }
           deriving Show

readSvnDumpRaw :: BL.ByteString -> [Entry]
readSvnDumpRaw dump =
  case parse parseHeader dump of
    Fail {}         -> error "Stream is not a Subversion dump file"
    Done contents _ -> parseDumpFile contents

parseHeader :: Parser ByteString
parseHeader =    string "SVN-fs-dump-format-version: 2\n\n"
              *> string "UUID: " *> takeWhile1 uuidMember
              <* newline <* newline
  -- Accept any hexadecimal character or '-'
  where uuidMember w =   w == 45
                       || (w >= 48 && w <= 57)
                       || (w >= 97 && w <= 102)

parseDumpFile :: BL.ByteString -> [Entry]
parseDumpFile contents =
  case parse parseEntry contents of
    --Fail _ _ y -> error y
    Fail {} -> []
    Done contents' (entry, bodyLen) ->
        entry { entryBody = BL.take (fromIntegral bodyLen) contents' }
      : parseDumpFile (BL.drop (fromIntegral bodyLen) contents')

-- Don't read the entry body here in the parser, rather let the caller extract
-- it from the ByteString (which might be lazy, saving us from needlessly
-- strictifying it here).

parseEntry :: Parser (Entry, Int)
parseEntry = do
  fields <- skipWhile (== 10) *> many1 parseTag <* newline
  props  <- case L.lookup "Prop-content-length" fields of
              Nothing -> return []
              Just _  -> manyTill parseProperty (try (string "PROPS-END\n"))
  return ( Entry { entryTags  = fields
                 , entryProps = props
                 , entryBody  = BL.empty }
         , fromMaybe 0 (readInt <$> L.lookup "Text-content-length" fields) )

parseTag :: Parser (ByteString, ByteString)
parseTag = (,) <$> takeWhile1 fieldChar <* string ": "
               <*> takeWhile1 (/= 10) <* newline
  where fieldChar w =   (w >= 97 && w <= 121) -- a-z
                      || (w >= 65 && w <= 90)  -- A-Z
                      || w == 45            -- -
                      || (w >= 48 && w <= 57)  -- 0-9

parseProperty :: Parser (ByteString, ByteString)
parseProperty = (,) <$> (string "K " *> getField <* newline)
                    <*> (string "V " *> getField <* newline)
  -- Read a decimal integer followed by \n and 'take' that many bytes
  where getField = AC.decimal <* newline >>= AL.take

newline :: Parser Word8
newline = word8 10

-- | Efficiently convert a ByteString of integers into an Int.
--
--   >>> readInt (Data.ByteString.Char8.pack "12345")
--   12345

readInt :: ByteString -> Int
readInt = B.foldl' addup 0
  where addup acc x = acc * 10 + (fromIntegral x - 48) -- '0'

-- SvnDump.hs ends here