{-# LANGUAGE OverloadedStrings #-}

module Subversion.Dump.Raw
       ( FieldMap
       , Entry(..)

       , readInt
       , readSvnDumpRaw
       ) where

import           Control.Applicative hiding (many)
import           Control.Monad
import qualified Data.Attoparsec.Char8 as AC
import           Data.Attoparsec.Combinator
import           Data.Attoparsec.Lazy as AL
import           Data.ByteString as B hiding (map)
import qualified Data.ByteString.Lazy as BL hiding (map)
import qualified Data.List as L
import           Data.Maybe
import           Data.Word (Word8)

import           Prelude hiding (getContents)

default (ByteString)

type FieldMap = [(ByteString, ByteString)]

data Entry = Entry { entryTags  :: FieldMap
                   , entryProps :: FieldMap
                   , entryBody  :: BL.ByteString }
           deriving Show

readSvnDumpRaw :: BL.ByteString -> [Entry]
readSvnDumpRaw dump =
  case parse parseHeader dump of
    Fail {}         -> error "Stream is not a Subversion dump file"
    Done contents _ -> parseDumpFile contents

parseDumpFile :: BL.ByteString -> [Entry]
parseDumpFile contents =
  case parse parseEntry contents of
    Fail {} -> []
    Done contents' (entry, bodyLen) ->
        entry { entryBody = BL.take (fromIntegral bodyLen) contents' }
      : parseDumpFile (BL.drop (fromIntegral bodyLen) contents')

-- These are the Parsec parsers for the various parts of the input file.

space :: Parser Word8
space = word8 32

newline :: Parser Word8
newline = word8 10

parseTag :: Parser (ByteString, ByteString)
parseTag =
  (,) <$> takeWhile1 fieldChar <* string ": " -- :
      <*> takeWhile1 (/= 10) <* newline
  where fieldChar w =   (w >= 65 && w <= 90)  -- A-Z
                      || (w >= 97 && w <= 121) -- a-z
                      || (w >= 48 && w <= 57)  -- 0-9
                      || w == 45            -- -
                      || w == 95            -- _

parseIndicator :: Parser (Word8, Int)
parseIndicator = (,) <$> satisfy (oneOf 75 86) <* space -- K or V
                     <*> AC.decimal <* newline
  where oneOf x y w = w == x || w == y

parseSpecValue :: Parser ByteString
parseSpecValue = do
  (_, len) <- parseIndicator
  AL.take len <* newline

parseProperty :: Parser (ByteString, ByteString)
parseProperty = (,) <$> parseSpecValue -- K
                    <*> parseSpecValue -- V

parseEntry :: Parser (Entry, Int)
parseEntry = do
  fields  <- AL.takeWhile (== 10) *> many1 parseTag <* newline
  props   <- case L.lookup "Prop-content-length" fields of
               Nothing -> return []
               Just _  -> manyTill parseProperty (try (string "PROPS-END\n"))

  -- Don't read the body here in AttoParsec, let the caller extract it from
  -- the ByteString (which might be lazy, saving us from strictifying it in
  -- AttoParsec).
  let bodyLen = fromMaybe 0 (readInt <$> L.lookup "Text-content-length" fields)

  return ( Entry { entryTags  = fields
                 , entryProps = props
                 , entryBody  = BL.empty }
         , bodyLen )

parseHeader :: Parser ByteString
parseHeader = do
  _ <- string "SVN-fs-dump-format-version: 2\n\n"
       <?> "Dump file starts without a recognizable tag"
  string "UUID: " *> takeWhile1 uuidMember
    <* newline <* newline
  where
    -- Accept any hexadecimal character, or '-'
    uuidMember w = w == 45 || (w >= 48 && w <= 57) || (w >= 97 && w <= 102)

-- | Efficiently convert a ByteString of integers into an Int.
--
--   >>> readInt (Data.ByteString.Char8.pack "12345")
--   12345

readInt :: ByteString -> Int
readInt = B.foldl' addup 0
  where addup acc x = acc * 10 + (fromIntegral x - 48) -- '0'

-- SvnDump.hs ends here