{- |
Module      : Web.RBB.Crawler
Description :  Implementation of a meta data collector for the blog entry
               repository
Copyright   :  (c) Sebastian Witte
License     :  BSD3

Maintainer  :  woozletoff@gmail.com
Stability   :  experimental

-}
module Web.RBB.Crawler.Repository
    where

import           Control.Applicative
import           Control.Concurrent.STM
import           Control.Lens
import           Control.Monad
import           Control.Monad.State
import           Control.Monad.Trans.Except
import           Data.FileStore               (Change (..), FileStore,
                                               Revision (..), darcsFileStore,
                                               gitFileStore, mercurialFileStore)
import qualified Data.FileStore               as FS
import           Data.IxSet
import qualified Data.IxSet                   as IxSet
import           Data.List                    (foldl')
import           Data.Maybe
import           Data.Monoid
import           Data.Time
import           System.Directory
import           System.FilePath
import           Web.RBB.Config
import           Web.RBB.Crawler.MetaCombiner
import           Web.RBB.Crawler.MetaParser
import           Web.RBB.Types                as E
import           Web.RBB.Types.Blog
import           Web.RBB.Util

-- | Initialize the 'Blog' state by providing a path inside a repository.
initBlog :: (Functor io, MonadIO io) => BlogConfig m -> ExceptT String io (Blog m)
initBlog bcfg = do
    b <- initialBlog
    collectEntryData Nothing b

  where
    initialBlog = do
        (rp, crp, fs) <- initializeFileStore (entryPath bcfg)
        Blog <$> pure 1
             <*> pure mempty
             <*> pure (EntryUpdate (UTCTime (ModifiedJulianDay 0) 0) "")
             <*> liftIO getCurrentTime
             <*> pure fs
             <*> pure mempty
             <*> (liftIO . atomically) newTChan
             <*> pure rp
             <*> pure crp
             <*> pure bcfg

-- | Update the entries in the 'Blog' state.
updateBlog :: (Functor io, MonadIO io) => Blog m -> ExceptT String io (Blog m)
updateBlog blog = collectEntryData (Just (blog^.lastEntryUpdate)) blog

collectEntryData :: (Functor io, MonadIO io)
                 => Maybe EntryUpdate -- initial (Nothing) or update?
                 -> Blog m
                 -> ExceptT String io (Blog m)
collectEntryData eu blog =
    let interval = FS.TimeRange (entryUpdateTime <$> eu) Nothing
        fs = blog^.fileStore
        hist = FS.history fs
        notLatestKnownEntry = case entryRevisionId <$> eu of
            Nothing -> const True
            Just commit -> not . FS.idsMatch fs commit . revId
    in foldr collect blog . takeWhile notLatestKnownEntry -- . sortBy (compare `on` revDateTime)
        <$> liftIO (hist [blog^.contentRelativePath] interval Nothing)

collect :: Revision -> Blog m -> Blog m
collect r blog = foldl' go blog (revChanges r)
  where
    go b (Added fp)    = maybe b (addEntry r b fp) $ fileTypeFromExtension fp
    go b (Modified fp) = maybe b (modEntry r b fp) $ fileTypeFromExtension fp
    go b (Deleted fp)  = b & entries %~ IxSet.deleteIx (RelativePath fp)
                           & lastEntryUpdate .~ EntryUpdate (revDateTime r) (revId r)

metaFromRevision :: Revision -> [Meta]
metaFromRevision = either (const []) id . parseMeta . revDescription

addEntry :: Revision -> Blog m -> FilePath -> FileType -> Blog m
addEntry r blog fp ft =
    let meta = metaFromRevision r
        eu  = EntryUpdate (revDateTime r) (revId r)
        newEntry = Entry
                { _entryId      = blog^.nextEntryId
                , E._title      = (pack . takeBaseName . dropExtensions) fp
                , _author       = (pack . FS.authorName . revAuthor) r
                , _authorEmail  = (pack . FS.authorEmail . revAuthor) r
                , E._tags       = mempty
                , _fileType     = ft
                , _relativePath = fp
                , _fullPath     = blog^.repositoryPath </> fp
                , _updates      = fromList [eu]
                , _lastUpdate   = eu
                }
    in blog & nextEntryId %~ succ
            & entries     %~ contract (Just fp) meta . IxSet.insert newEntry
            & lastEntryUpdate .~ eu

modEntry :: Revision -> Blog m -> FilePath -> FileType -> Blog m
modEntry r blog fp _ =
    let meta = metaFromRevision r
        eu = EntryUpdate (revDateTime r) (revId r)
        insertUpdateTime = ixSetModifyIx (RelativePath fp) $ \e ->
                            e & updates %~ IxSet.insert eu
                              & lastUpdate .~ eu
    in blog & entries %~ (contract (Just fp) meta . insertUpdateTime)
            & lastEntryUpdate .~ eu

-- | Initialize a 'FileStore' object for the given directory. This function
-- should automatically detect the underlying repository type and traverse into
-- parent directories if necessary. The result is the associated 'FileStore'
-- object together with the relative path relative to the repository for the
-- blog content.
--
-- The return value is a triplet containing:
-- * The absolute path to the repository
-- * The content relative path inside the repository
-- * The associated 'FileStore' object for the repository
initializeFileStore :: (Functor io, MonadIO io)
                    => FilePath
                    -> ExceptT String io (FilePath, FilePath, FileStore)
initializeFileStore dir = do
    cd <- liftIO $ canonicalizePath dir
    d <- liftIO $ doesDirectoryExist cd
    unless d $ throwE $ "The directory '" ++ cd ++ "' does not exist."

    fileStores <- catMaybes `liftM` sequence
        [ lift (maybeGit cd)
        , lift (maybeDarcs cd)
        , lift (maybeMercurial cd)
        ]

    when (Prelude.null fileStores) $ throwE $ concat
        [ "The directory '",  dir, "' which has been canonicalized to '"
        , cd, "' points to an unsupported repository "
        , "(includes no repository)."
        ]
    return $ head fileStores

  where

    maybeGit       = maybeFileStore gitFileStore ".git"
    maybeDarcs     = maybeFileStore darcsFileStore "_darcs"
    maybeMercurial = maybeFileStore mercurialFileStore ".hg"

    maybeFileStore :: (Functor io, MonadIO io)
                   => (FilePath -> FileStore)
                   -> FilePath
                   -> FilePath
                   -> io (Maybe (FilePath, FilePath, FileStore))
    maybeFileStore f qry cd =
        fmap (\p -> (cd, makeRelative p cd, f p)) <$> findDirInParents cd qry

-- | Search for a directory named as the second argument to thins function.
-- Traverse the directory tree up to the root if the directory cannot be found
-- in one of the starting directory's parent directories.
findDirInParents :: (MonadIO io) => FilePath -> FilePath -> io (Maybe FilePath)
findDirInParents dir qry = do
    adir <- normalise `liftM` liftIO (canonicalizePath dir)
    containsQry . takeWhile (not . isDrive) $ iterate takeDirectory adir

  where
    containsQry [] = return Nothing
    containsQry (d:ds) = do
        p <- liftIO $ doesDirectoryExist (d </> qry)
        case () of
            _ | p -> return $ Just d
            _     -> containsQry ds