module Text.XML.HXT.Arrow.XmlCache
( withCache
, withoutCache
, isInCache
, lookupCache
, readCache
, writeCache
, sha1HashValue
, sha1HashString
)
where
import Control.DeepSeq
import Control.Concurrent.ResourceTable
import Control.Exception ( SomeException , try )
import Data.Binary
import qualified
Data.ByteString.Lazy as B
import Data.Char
import Data.Either
import Data.Maybe
import Data.Digest.Pure.SHA
import System.FilePath
import System.Directory
import System.IO
import System.Locale
import System.Posix ( touchFile )
import System.Time
import System.IO.Unsafe ( unsafePerformIO )
import Text.XML.HXT.Core
import Text.XML.HXT.Arrow.XmlState.TypeDefs
import Text.XML.HXT.Arrow.Binary
withCache :: String -> Int -> Bool -> SysConfig
withCache cachePath documentAge cache404
= setS (theWithCache .&&&.
theCacheDir .&&&.
theDocumentAge .&&&.
theCache404Err .&&&.
theCacheRead
) (True, (cachePath, (documentAge, (cache404, readDocCache))))
withoutCache :: SysConfig
withoutCache = setS theWithCache False
readDocCache :: String -> IOStateArrow s b XmlTree
readDocCache src = localSysVar theWithCache
$
configSysVar withoutCache
>>>
( flip readDocCache' src
$< getSysVar (theCacheDir .&&&.
theDocumentAge .&&&.
theCache404Err
)
)
where
readDocCache' config src'
= applyA $ arrIO0 (lookupCache' config src')
isInCache :: IOStateArrow s String String
isInCache = uncurry isInC $< getSysVar (theDocumentAge .&&&. theCacheDir)
where
isInC age cdir = ( traceValue 2 (\ x -> "isInCache: file=" ++ show x ++ " age=" ++ show age ++ " cache dir=" ++ show cdir)
>>>
arrIO (isInCache' age cdir)
>>>
arrL ( \ x ->
case x of
Just Nothing -> [x]
_ -> []
)
) `guards` this
isInCache' age cdir f
= cacheHit age cf
where
cf = uncurry (</>) $ cacheFile cdir f
lookupCache' :: (FilePath, (Int, Bool)) -> String -> IO (IOStateArrow s a XmlTree)
lookupCache' (dir, (age, e404)) src
= do
ch <- cacheHit age cf
return $
case ch of
Nothing -> readAndCacheDocument
Just Nothing -> readDocumentFromCache
Just (Just mt) -> readDocumentCond mt
where
cf = uncurry (</>) $ cacheFile dir src
is200
| e404 = hasAttrValue transferStatus (`elem` ["200", "404"])
| otherwise = hasAttrValue transferStatus (== "200")
is304 = hasAttrValue transferStatus (== "304")
readDocumentFromCache
= traceMsg 1 ("cache hit for " ++ show src ++ " reading " ++ show cf)
>>>
( readCache' cf
>>>
traceMsg 2 "cache read"
)
`orElse`
( clearErrStatus
>>>
traceMsg 1 "cache file was corrupted, reading original"
>>>
readAndCacheDocument
)
readAndCacheDocument
= traceMsg 1 ("cache miss, reading original document " ++ show src)
>>>
readDocument [] src
>>>
perform ( choiceA
[ is200 :-> ( writeCache src >>> none )
, this :-> traceMsg 1 "transfer status /= 200, page not cached"
]
)
readDocumentCond mt
= traceMsg 1 ("cache out of date, read original document if modified " ++ show src)
>>>
readDocument [withInputOption a_if_modified_since (fmtTime mt)] src
>>>
choiceA
[ is304 :-> ( traceMsg 1 ("document not modified, using cache data from " ++ show cf)
>>>
perform (arrIO0 $ touchFile cf)
>>>
readDocumentFromCache
)
, is200 :-> ( traceMsg 1 "document read, cache will be updated"
>>>
perform (writeCache src
>>>
traceMsg 2 "cache is updated"
)
)
, this :-> ( traceMsg 1 "document read without caching"
>>>
perform ( arrIO0 $ remFile cf )
)
]
where
fmtTime = formatCalendarTime defaultTimeLocale rfc822DateFormat . toUTCTime
lookupCache :: (NFData b, Binary b) => String -> IOStateArrow s a b
lookupCache f = uncurry lookupC $< getSysVar (theDocumentAge .&&&. theCacheDir)
where
lookupC age cdir = isIOA (const $ hit)
`guards`
readCache' cf
where
cf = uncurry (</>) $ cacheFile cdir f
hit = do
ch <- cacheHit age cf
return $ case ch of
Just Nothing -> True
_ -> False
readCache :: (NFData c, Binary c) => String -> IOStateArrow s b c
readCache f = readC $< getSysVar theCacheDir
where
readC cdir = readCache' $ uncurry (</>) $ cacheFile cdir f
readCache' :: (NFData c, Binary c) => String -> IOStateArrow s b c
readCache' cf = rnfA $ withLock cf $ readBinaryValue cf
writeCache :: (Binary b) => String -> IOStateArrow s b ()
writeCache f = writeC $< getSysVar theCacheDir
where
writeC cdir = traceMsg 1 ("writing cache file " ++ show cf ++ " for document " ++ show f)
>>>
perform (arrIO0 createDir)
>>>
withLock cf (writeBinaryValue cf)
>>>
perform (withLock ixf (arrIO0 $ writeIndex ixf f cf))
where
cf = dir </> file
ixf = cdir </> "index"
(dir, file) = cacheFile cdir f
createDir = createDirectoryIfMissing True dir
remFile :: FilePath -> IO ()
remFile f = ( try' $ do ex <- doesFileExist f
if ex
then removeFile f
else return ()
) >> return ()
cacheFile :: FilePath -> String -> (FilePath, FilePath)
cacheFile dir f = (dir </> fd, fn)
where
(fd, fn) = splitAt 2 . sha1HashString $ f
cacheHit :: Int -> FilePath -> IO (Maybe (Maybe ClockTime))
cacheHit age cf = ( try' $
do
e <- doesFileExist cf
if not e
then return Nothing
else do
mt <- getModificationTime cf
ct <- getClockTime
return . Just $ if (dt `addToClockTime` mt) >= ct
then Nothing
else Just mt
) >>= return . either (const Nothing) id
where
seconds = age `mod` 60
minutes = age `div` 60
dt = normalizeTimeDiff $ TimeDiff 0 0 0 0 minutes seconds 0
try' :: IO a -> IO (Either SomeException a)
try' = try
writeIndex :: String -> String -> FilePath -> IO ()
writeIndex ixf f cf = ( try' $
do
h <- openBinaryFile ixf AppendMode
hPutStrLn h $ show (cf, f)
hClose h
return ()
) >> return ()
sha1HashValue :: (Arrow a, Binary b) => a b Integer
sha1HashValue = arr $ integerDigest . sha1 . encode
sha1HashString :: (Arrow a, Binary b) => a b String
sha1HashString = arr $ showDigest . sha1 . encode
theLockedFiles :: ResourceTable String
theLockedFiles = unsafePerformIO newResourceTable
lockFile, unlockFile :: String -> IO ()
lockFile = requestResource theLockedFiles
unlockFile = releaseResource theLockedFiles
withLock :: String -> IOStateArrow s b c -> IOStateArrow s b c
withLock l a = ( perform (arrIO0 $ lockFile l)
>>>
listA a
>>>
perform (arrIO0 $ unlockFile l)
)
>>>
unlistA