{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE RankNTypes #-}
module BDCS.Import.NPM(loadFromURI)
where
import Control.Monad(void)
import Control.Monad.Catch(MonadThrow)
import Control.Monad.Except(MonadError, runExceptT, throwError)
import Control.Monad.IO.Class(MonadIO, liftIO)
import Control.Monad.Reader(ReaderT, ask)
import Control.Monad.State(MonadState, get, modify)
import Control.Monad.Trans.Resource(MonadBaseControl, MonadResource)
import Data.Aeson(FromJSON(..), Object, Value(..), (.:), (.:?), (.!=), eitherDecode, withObject, withText)
import Data.Aeson.Types(Parser, typeMismatch)
import Data.Bits((.|.))
import Data.ByteArray(convert)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Lazy as BSL
import Data.Conduit(Conduit, Consumer, ZipConduit(..), (.|), getZipConduit, runConduitRes, toConsumer, yield)
import Data.Conduit.Binary(sinkLbs)
import qualified Data.Conduit.Combinators as CC
import qualified Data.Conduit.List as CL
import Data.Conduit.Lift(evalStateLC)
import qualified Data.Conduit.Tar as CT
import Data.ContentStore(ContentStore, storeByteStringSink)
import Data.ContentStore.Digest(ObjectDigest)
import qualified Data.HashMap.Lazy as HM
import Data.List(isPrefixOf)
import Data.Maybe(fromMaybe)
import qualified Data.Text as T
import Data.Text.Encoding(decodeUtf8)
import Database.Persist.Sql(SqlPersistT)
import Network.URI(URI(..), URIAuth(..), nullURI, parseURI, relativeTo)
import System.FilePath((</>), makeRelative, normalise, takeDirectory, takeFileName)
import System.Posix.Files(blockSpecialMode, characterSpecialMode, directoryMode, namedPipeMode, regularFileMode, symbolicLinkMode)
import Text.Regex.PCRE((=~))
import BDCS.Build.NPM(rebuildNPM)
import BDCS.DB
import BDCS.Files(associateFilesWithSource, insertFiles)
import BDCS.Import.Conduit(getFromURI, ungzipIfCompressed)
import BDCS.Import.State(ImportState(..))
import BDCS.KeyType
import BDCS.Projects(insertProject)
import BDCS.Sources(insertSource, insertSourceKeyValue)
import BDCS.Utils.Either(whenLeft)
import BDCS.Utils.Monad((>>?))
npmRegistry :: URI
npmRegistry = URI {uriScheme = "https:",
uriAuthority = Just URIAuth{uriUserInfo = "", uriRegName = "registry.npmjs.org", uriPort = ""},
uriPath = "/",
uriQuery = "",
uriFragment = "" }
data PackageDist = PackageDist {
_integrity :: Maybe T.Text,
_shasum :: Maybe T.Text,
tarball :: T.Text }
deriving(Show)
instance FromJSON PackageDist where
parseJSON = withObject "registry JSON" $ \v ->
case HM.lookup "dist" v of
Nothing -> fail "Missing dist object"
Just (Object d) -> PackageDist <$> d .:? "integrity"
<*> d .:? "shasum"
<*> d .: "tarball"
Just _ -> fail "Dist not an object"
data PackageJSON = PackageJSON {
packageName :: T.Text,
packageVersion :: T.Text,
description :: T.Text,
homepage :: Maybe T.Text,
license :: T.Text,
bin :: Maybe [(T.Text, T.Text)],
man :: Maybe [T.Text],
binDirectory :: Maybe T.Text,
manDirectory :: Maybe T.Text,
dependencies :: Maybe [(T.Text, T.Text)] }
deriving(Show)
instance FromJSON PackageJSON where
parseJSON = withObject "package.json" $ \v -> PackageJSON
<$> v .: "name"
<*> v .: "version"
<*> v .:? "description" .!= ""
<*> v .:? "homepage"
<*> v .:? "license" .!= ""
<*> parseBin v
<*> v .:? "man"
<*> parseDirectories v "bin"
<*> parseDirectories v "man"
<*> ((v .:? "dependencies") >>? parseTextObject)
where
parseDirectories :: Object -> T.Text -> Parser (Maybe T.Text)
parseDirectories obj key =
if HM.member key obj then return Nothing
else case HM.lookup "directories" obj of
Nothing -> return Nothing
Just (Object v) -> v .:? key
Just err -> typeMismatch "Object" err
parseBin :: Object -> Parser (Maybe [(T.Text, T.Text)])
parseBin obj = do
name <- (obj .: "name") >>= withText "String" return
case HM.lookup "bin" obj of
Nothing -> return Nothing
Just v@(Object _) -> Just <$> parseTextObject v
Just (String s) -> return $ Just [(name, s)]
Just err -> typeMismatch "Object or String" err
parseTextObject :: Value -> Parser [(T.Text, T.Text)]
parseTextObject = withObject "Object" $ HM.foldrWithKey f (return [])
where
f :: T.Text -> Value -> Parser [(T.Text, T.Text)] -> Parser [(T.Text, T.Text)]
f key val acc = withText "String" (\s -> ((key, s):) <$> acc) val
readRegistryJSON :: (MonadError String m, MonadBaseControl IO m, MonadThrow m, MonadIO m) => String -> m PackageDist
readRegistryJSON pkgname = do
let uri = relativeTo (nullURI {uriPath = pkgname ++ "/latest"}) npmRegistry
jsonData <- runConduitRes $ getFromURI uri .| sinkLbs
either throwError return $ eitherDecode jsonData
loadIntoMDDB :: MonadIO m => PackageJSON -> [Files] -> SqlPersistT m (Key Sources)
loadIntoMDDB PackageJSON{..} files = do
projectId <- insertProject $ Projects packageName description "" homepage ""
sourceId <- insertSource $ Sources projectId license packageVersion ""
fileIds <- insertFiles files
void $ associateFilesWithSource fileIds sourceId
case (bin, binDirectory) of
(Just binlist, _) -> mapM_ (addBin sourceId) binlist
(Nothing, Just binDir) -> addBinDir sourceId binDir
_ -> return ()
case (man, manDirectory) of
(Just manList, _) -> mapM_ (addMan sourceId) manList
(Nothing, Just manDir) -> addManDir sourceId manDir
_ -> return ()
mapM_ (\(reqname, reqver) -> insertSourceKeyValue (TextKey "dependency") reqname (Just reqver) sourceId) $
fromMaybe [] dependencies
void $ insertSourceKeyValue (TextKey "npm") "" Nothing sourceId
return sourceId
where
normaliseText :: T.Text -> T.Text
normaliseText path = T.pack $ normalise $ T.unpack path
addBin :: MonadIO m => Key Sources -> (T.Text, T.Text) -> SqlPersistT m ()
addBin sourceId (binName, path) = void $ insertSourceKeyValue (TextKey "bin") binName (Just (normaliseText path)) sourceId
addBinDir :: MonadIO m => Key Sources -> T.Text -> SqlPersistT m ()
addBinDir sourceId binDir = let
binPrefix = normalise (T.unpack binDir)
binFiles = filter (\p -> takeDirectory p == binPrefix) $ map (makeRelative "/" . T.unpack . filesPath) files
in
mapM_ (\p -> insertSourceKeyValue (TextKey "bin") (T.pack $ takeFileName p) (Just (T.pack p)) sourceId) binFiles
addMan :: MonadIO m => Key Sources -> T.Text -> SqlPersistT m ()
addMan sourceId manName = void $ insertSourceKeyValue (TextKey "man") (normaliseText manName) Nothing sourceId
addManDir :: MonadIO m => Key Sources -> T.Text -> SqlPersistT m ()
addManDir sourceId manDir = let
manPrefix = normalise (T.unpack manDir)
paths = map (makeRelative "/" . T.unpack . filesPath) files
manFiles = filter (\p -> (manPrefix `isPrefixOf` p) && (p =~ ("\\.[0-9]$" :: String))) paths
in
mapM_ (\p -> insertSourceKeyValue (TextKey "man") (T.pack p) Nothing sourceId) manFiles
loadFromURI :: URI -> ReaderT ImportState IO ()
loadFromURI uri@URI{..} = do
db <- stDB <$> ask
repo <- stRepo <$> ask
result <- runExceptT $ do
distJson <- readRegistryJSON uriPath
let distTarball = T.unpack $ tarball distJson
distURI <- maybe (throwError $ "Error parsing dist URI: " ++ distTarball) return $ parseURI distTarball
let pathPipe = tarEntryToFile repo .| CL.consume
let jsonPipe = parsePackageJson
let tarSink = getZipConduit ((,) <$> ZipConduit pathPipe
<*> ZipConduit jsonPipe)
(files, packageJson) <- runConduitRes $
getFromURI distURI
.| ungzipIfCompressed
.| CT.untar
.| tarSink
checkAndRunSqlite (T.pack db) $ do
source <- loadIntoMDDB packageJson files
rebuildNPM source
whenLeft result (\e -> liftIO $ print $ "Error importing " ++ show uri ++ ": " ++ show e)
where
tarEntryToFile :: (MonadError String m, MonadThrow m, MonadResource m) => ContentStore -> Conduit CT.TarChunk m Files
tarEntryToFile cs =
evalStateLC HM.empty $ CT.withEntries handleEntry
where
handleEntry :: (MonadState (HM.HashMap FilePath (ObjectDigest, CT.Size)) m, MonadError String m, MonadResource m) => CT.Header -> Conduit BS.ByteString m Files
handleEntry header@CT.Header{..} = do
let entryPath = CT.headerFilePath header
let modeBits = if CT.headerFileType header == CT.FTDirectory then 0o0755 else 0o0644
let typeBits = case CT.headerFileType header of
CT.FTNormal -> regularFileMode
CT.FTHardLink -> regularFileMode
CT.FTSymbolicLink -> symbolicLinkMode
CT.FTCharacterSpecial -> characterSpecialMode
CT.FTBlockSpecial -> blockSpecialMode
CT.FTDirectory -> directoryMode
CT.FTFifo -> namedPipeMode
CT.FTOther _ -> 0
let baseFile = Files{filesPath = T.pack ("/" </> normalise entryPath),
filesFile_user = "root",
filesFile_group = "root",
filesMtime = fromIntegral headerTime,
filesMode = modeBits .|. fromIntegral typeBits,
filesTarget = Nothing,
filesCs_object = Nothing,
filesSize = 0}
file <- case CT.headerFileType header of
CT.FTNormal -> handleRegularFile baseFile entryPath headerPayloadSize
CT.FTHardLink -> handleHardLink baseFile
CT.FTSymbolicLink -> handleSymlink baseFile
CT.FTOther code -> throwError $ "Unknown tar entry type " ++ show code
_ -> return baseFile
yield file
handleRegularFile :: (MonadState (HM.HashMap FilePath (ObjectDigest, CT.Size)) m, MonadError String m, MonadResource m) => Files -> FilePath -> CT.Size -> Consumer BS.ByteString m Files
handleRegularFile baseFile entryPath size = do
digest <- toConsumer $ storeByteStringSink cs
modify (HM.insert entryPath (digest, size))
return $ baseFile {filesSize = fromIntegral size,
filesCs_object = Just $ convert digest}
handleHardLink :: (MonadState (HM.HashMap FilePath (ObjectDigest, CT.Size)) m, MonadError String m) => Files -> Consumer BS.ByteString m Files
handleHardLink baseFile = do
target <- S8.unpack <$> CC.fold
(HM.lookup target <$> get) >>=
maybe (throwError $ "Broken hard link to " ++ target)
(\(digest, size) -> return $ baseFile {filesSize = fromIntegral size,
filesCs_object = Just $ convert digest})
handleSymlink :: Monad m => Files -> Consumer BS.ByteString m Files
handleSymlink baseFile = do
target <- decodeUtf8 <$> CC.fold
return $ baseFile {filesTarget = Just target}
parsePackageJson :: (MonadError String m, MonadThrow m) => Consumer CT.TarChunk m PackageJSON
parsePackageJson = CT.withEntry handler >>= maybe parsePackageJson return
where
handler :: MonadError String m => CT.Header -> Consumer BS.ByteString m (Maybe PackageJSON)
handler header = let
path = makeRelative "/" $ normalise $ CT.headerFilePath header
in
if path == ("package" </> "package.json") then
(eitherDecode <$> BSL.fromStrict <$> CC.fold) >>= either throwError (return . Just)
else
return Nothing