module Subversion.Dump
( RevDate
, Revision(..)
, OpKind(..)
, OpAction(..)
, Operation(..)
, FieldMap
, Entry(..)
, readSvnDump
, readSvnDumpRaw
, parseHeader
, parseEntry
) where
import Control.Applicative hiding (many)
import Control.Monad
import qualified Data.Attoparsec.Char8 as AC
import Data.Attoparsec.Combinator
import Data.Attoparsec.Lazy as AL
import Data.ByteString as B hiding (map)
import qualified Data.ByteString.Char8 as BC hiding (map)
import qualified Data.ByteString.Lazy as BL hiding (map)
import qualified Data.List as L
import Data.Text (Text)
import qualified Data.Text.Encoding as E
import Data.Maybe
import Data.Word (Word8)
import System.FilePath
import Prelude hiding (getContents)
default (ByteString)
type RevDate = Text
data Revision = Revision { revNumber :: Int
, revDate :: RevDate
, revAuthor :: Maybe Text
, revComment :: Maybe Text
, revOperations :: [Operation] }
deriving Show
data OpKind = File | Directory deriving (Show, Enum, Eq)
data OpAction = Add | Change | Replace | Delete deriving (Show, Enum, Eq)
data Operation = Operation { opKind :: OpKind
, opAction :: OpAction
, opPathname :: FilePath
, opContents :: ByteString
, opContentLength :: Int
, opChecksumMD5 :: Maybe Text
, opChecksumSHA1 :: Maybe Text
, opCopyFromRev :: Maybe Int
, opCopyFromPath :: Maybe FilePath }
deriving Show
readSvnDump :: BL.ByteString -> Either String [Revision]
readSvnDump io = do
case readSvnDumpRaw io of
Fail _ _ y -> Left y
Done _ result -> Right $ map processRevs (L.groupBy sameRev result)
where sameRev _ y = isNothing $
L.lookup "Revision-number" (entryTags y)
getField f n x = L.lookup n (f x)
getField' f n x = fromMaybe "" (getField f n x)
tagM = getField entryTags
propM = getField entryProps
tag = getField' entryTags
prop = getField' entryProps
processRevs [] = error "Unexpected"
processRevs (rev:ops) =
Revision {
revNumber = readInt $ tag "Revision-number" rev
, revDate = parseDate $ prop "svn:date" rev
, revAuthor = E.decodeUtf8 <$> propM "svn:author" rev
, revComment = E.decodeUtf8 <$> propM "svn:log" rev
, revOperations = map processOp ops }
processOp op =
Operation {
opKind = getOpKind $ tag "Node-kind" op
, opAction = getOpAction $ tag "Node-action" op
, opPathname = BC.unpack $ tag "Node-path" op
, opContents = entryBody op
, opContentLength = readInt $ tag "Text-content-length" op
, opCopyFromRev = readInt <$>
tagM "Node-copyfrom-rev" op
, opCopyFromPath = BC.unpack <$> tagM "Node-copyfrom-path" op
, opChecksumMD5 = E.decodeUtf8 <$> tagM "Text-content-md5" op
, opChecksumSHA1 = E.decodeUtf8 <$> tagM "Text-content-sha1" op }
getOpKind kind = case kind of
"file" -> File
"dir" -> Directory
_ -> error "Unexpected"
getOpAction kind = case kind of
"add" -> Add
"delete" -> Delete
"change" -> Change
"replace" -> Replace
_ -> error "Unexpected"
type FieldMap = [(ByteString, ByteString)]
data Entry = Entry { entryTags :: FieldMap
, entryProps :: FieldMap
, entryBody :: ByteString }
deriving Show
readSvnDumpRaw :: BL.ByteString -> Result [Entry]
readSvnDumpRaw dump = parse parseSvnDump dump
space :: Parser Word8
space = satisfy (== 32)
newline :: Parser Word8
newline = satisfy (== 10)
parseTag :: Parser (ByteString, ByteString)
parseTag =
(,) <$> takeWhile1 fieldChar <* string ": "
<*> takeWhile1 (/= 10) <* newline
where fieldChar w = (w >= 65 && w <= 90)
|| (w >= 97 && w <= 121)
|| (w >= 48 && w <= 57)
|| w == 45
|| w == 95
parseIndicator :: Parser (Word8, Int)
parseIndicator = (,) <$> satisfy (oneOf 75 86) <* space
<*> AC.decimal <* newline
where oneOf x y w = w == x || w == y
parseSpecValue :: Parser ByteString
parseSpecValue = do
(_, len) <- parseIndicator
AL.take len <* newline
parseProperty :: Parser (ByteString, ByteString)
parseProperty = (,) <$> parseSpecValue
<*> parseSpecValue
readInt :: ByteString -> Int
readInt bs = B.foldl' addup 0 bs
where addup acc x = acc * 10 + (fromIntegral x 48)
parseEntry :: Parser Entry
parseEntry = do
fields <- many1 parseTag <* newline
props <- case L.lookup "Prop-content-length" fields of
Nothing -> return []
Just _ -> manyTill parseProperty (try (string "PROPS-END\n"))
body <- case L.lookup "Text-content-length" fields of
Nothing -> return B.empty
Just len -> AL.take (readInt len)
_ <- AL.takeWhile (== 10)
return Entry { entryTags = fields
, entryProps = props
, entryBody = body }
parseHeader :: Parser ByteString
parseHeader = do
_ <- string "SVN-fs-dump-format-version: 2\n\n"
<?> "Dump file starts without a recognizable tag"
string "UUID: " *> takeWhile1 uuidMember
<* newline <* newline
where
uuidMember w = w == 45 || (w >= 48 && w <= 57) || (w >= 97 && w <= 102)
parseSvnDump :: Parser [Entry]
parseSvnDump = parseHeader >> many1 parseEntry
parseDate :: ByteString -> RevDate
parseDate = E.decodeUtf8