{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DeriveFunctor #-}
{-# language GeneralizedNewtypeDeriving #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE OverloadedStrings #-}
{-# language DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# options_ghc -Wno-unused-imports -Wno-unused-top-binds #-}
module Algebra.Graph.IO.Datasets.LINQS (
restoreContent, CitesRow(..), ContentRow(..),
stash,
sourceGraphEdges, loadGraph
) where
import Control.Applicative (Alternative(..))
import Control.Monad (when)
import Control.Monad.IO.Class (MonadIO(..))
import Data.Functor (($>), void)
import GHC.Generics (Generic(..))
import GHC.Int (Int16)
import qualified Algebra.Graph as G (Graph, empty, overlay, edge)
import Data.ByteString (ByteString)
import Data.ByteString.Char8 (unpack)
import Data.Binary (Binary(..), encode, decode, encodeFile, decodeFileOrFail)
import qualified Data.Conduit.Serialization.Binary as CB (conduitDecode, conduitEncode, ParseError(..))
import Conduit (MonadUnliftIO(..), MonadResource, runResourceT)
import Data.Conduit (runConduit, ConduitT, (.|), yield, await, runConduitRes)
import qualified Data.Conduit.Combinators as C (print, sourceFile, sinkFile, map, mapM, foldM, mapWhile, mapAccumWhile, foldMap, foldl, scanl)
import Data.Sequence (Seq, (|>))
import qualified Data.Map as M (Map, singleton, lookup)
import System.Directory (createDirectoryIfMissing)
import Control.Monad.Catch (MonadThrow(..))
import System.FilePath ((</>), takeFileName, takeExtension)
import Network.HTTP.Simple (httpSource, getResponseBody, Response, Request, parseRequest, setRequestMethod)
import Text.Megaparsec (parse, runParserT)
import Text.Megaparsec.Char (char)
import Text.Megaparsec.Error (errorBundlePretty, ParseErrorBundle)
import Control.Monad.Combinators (count)
import Data.Conduit.Tar (Header(..), untarChunks, TarChunk, withEntries, FileInfo, filePath, withFileInfo, headerFileType, FileType(..), headerFilePath)
import qualified Data.Text as T (Text, unwords)
import Algebra.Graph.IO.Internal.Conduit (fetch, unTarGz)
import Algebra.Graph.IO.Internal.Megaparsec (Parser, ParserT, ParseE, symbol, lexeme, alphaNum)
import Algebra.Graph.IO.SV (parseTSV)
stash :: (Binary c) =>
FilePath
-> String
-> Int
-> Parser c
-> IO ()
stash :: FilePath -> FilePath -> Int -> Parser c -> IO ()
stash FilePath
dir FilePath
uri Int
n Parser c
pc = do
Request
rq <- FilePath -> IO Request
forall (m :: * -> *). MonadThrow m => FilePath -> m Request
parseRequest FilePath
uri
Bool -> FilePath -> IO ()
createDirectoryIfMissing Bool
True FilePath
dir
ResourceT IO () -> IO ()
forall (m :: * -> *) a. MonadUnliftIO m => ResourceT m a -> m a
runResourceT (ResourceT IO () -> IO ()) -> ResourceT IO () -> IO ()
forall a b. (a -> b) -> a -> b
$ ConduitT () Void (ResourceT IO) () -> ResourceT IO ()
forall (m :: * -> *) r. Monad m => ConduitT () Void m r -> m r
runConduit (ConduitT () Void (ResourceT IO) () -> ResourceT IO ())
-> ConduitT () Void (ResourceT IO) () -> ResourceT IO ()
forall a b. (a -> b) -> a -> b
$
Request -> ConduitT () ByteString (ResourceT IO) ()
forall (m :: * -> *) i.
MonadResource m =>
Request -> ConduitT i ByteString m ()
fetch Request
rq ConduitT () ByteString (ResourceT IO) ()
-> ConduitM ByteString Void (ResourceT IO) ()
-> ConduitT () Void (ResourceT IO) ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.|
ConduitT ByteString TarChunk (ResourceT IO) ()
forall (m :: * -> *).
(PrimMonad m, MonadThrow m) =>
ConduitT ByteString TarChunk m ()
unTarGz ConduitT ByteString TarChunk (ResourceT IO) ()
-> ConduitM TarChunk Void (ResourceT IO) ()
-> ConduitM ByteString Void (ResourceT IO) ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.|
(FileInfo -> ConduitM ByteString Void (ResourceT IO) ())
-> ConduitM TarChunk Void (ResourceT IO) ()
forall (m :: * -> *) o.
MonadThrow m =>
(FileInfo -> ConduitM ByteString o m ())
-> ConduitM TarChunk o m ()
withFileInfo ( \FileInfo
fi -> do
FilePath
-> Int
-> Parser c
-> FileInfo
-> ConduitM ByteString Void (ResourceT IO) ()
forall (m :: * -> *) c o.
(MonadThrow m, MonadResource m, Binary c) =>
FilePath
-> Int -> Parser c -> FileInfo -> ConduitT ByteString o m ()
contentToFile FilePath
dir Int
n Parser c
pc FileInfo
fi
FilePath -> FileInfo -> ConduitM ByteString Void (ResourceT IO) ()
forall (m :: * -> *) c.
(MonadThrow m, MonadIO m, MonadResource m) =>
FilePath -> FileInfo -> ConduitT ByteString c m ()
citesToFile FilePath
dir FileInfo
fi )
restoreContent :: (Binary c) => FilePath
-> IO (M.Map String (Int16, Seq Int16, c))
restoreContent :: FilePath -> IO (Map FilePath (Int16, Seq Int16, c))
restoreContent FilePath
dir = ResourceT IO (Map FilePath (Int16, Seq Int16, c))
-> IO (Map FilePath (Int16, Seq Int16, c))
forall (m :: * -> *) a. MonadUnliftIO m => ResourceT m a -> m a
runResourceT (ResourceT IO (Map FilePath (Int16, Seq Int16, c))
-> IO (Map FilePath (Int16, Seq Int16, c)))
-> ResourceT IO (Map FilePath (Int16, Seq Int16, c))
-> IO (Map FilePath (Int16, Seq Int16, c))
forall a b. (a -> b) -> a -> b
$ ConduitT
() Void (ResourceT IO) (Map FilePath (Int16, Seq Int16, c))
-> ResourceT IO (Map FilePath (Int16, Seq Int16, c))
forall (m :: * -> *) r. Monad m => ConduitT () Void m r -> m r
runConduit (ConduitT
() Void (ResourceT IO) (Map FilePath (Int16, Seq Int16, c))
-> ResourceT IO (Map FilePath (Int16, Seq Int16, c)))
-> ConduitT
() Void (ResourceT IO) (Map FilePath (Int16, Seq Int16, c))
-> ResourceT IO (Map FilePath (Int16, Seq Int16, c))
forall a b. (a -> b) -> a -> b
$
FilePath -> ConduitT () (ContentRow Int16 c) (ResourceT IO) ()
forall (m :: * -> *) c i.
(MonadResource m, MonadThrow m, Binary c) =>
FilePath -> ConduitT i (ContentRow Int16 c) m ()
contentFromFile FilePath
dir ConduitT () (ContentRow Int16 c) (ResourceT IO) ()
-> ConduitM
(ContentRow Int16 c)
Void
(ResourceT IO)
(Map FilePath (Int16, Seq Int16, c))
-> ConduitT
() Void (ResourceT IO) (Map FilePath (Int16, Seq Int16, c))
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.|
(ContentRow Int16 c -> Map FilePath (Int16, Seq Int16, c))
-> ConduitM
(ContentRow Int16 c)
Void
(ResourceT IO)
(Map FilePath (Int16, Seq Int16, c))
forall (m :: * -> *) b a o.
(Monad m, Monoid b) =>
(a -> b) -> ConduitT a o m b
C.foldMap ( \(CRow Int16
i FilePath
k Seq Int16
fs c
c) -> FilePath
-> (Int16, Seq Int16, c) -> Map FilePath (Int16, Seq Int16, c)
forall k a. k -> a -> Map k a
M.singleton FilePath
k (Int16
i, Seq Int16
fs, c
c) )
citesFromFile :: (MonadResource m, MonadThrow m) => FilePath -> ConduitT i (CitesRow String) m ()
citesFromFile :: FilePath -> ConduitT i (CitesRow FilePath) m ()
citesFromFile FilePath
dir =
FilePath -> ConduitT i ByteString m ()
forall (m :: * -> *) i.
MonadResource m =>
FilePath -> ConduitT i ByteString m ()
C.sourceFile (FilePath
dir FilePath -> FilePath -> FilePath
</> FilePath
"cites") ConduitT i ByteString m ()
-> ConduitM ByteString (CitesRow FilePath) m ()
-> ConduitT i (CitesRow FilePath) m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.|
ConduitM ByteString (CitesRow FilePath) m ()
forall b (m :: * -> *).
(Binary b, MonadThrow m) =>
ConduitT ByteString b m ()
CB.conduitDecode
loadGraph :: (Binary c) =>
FilePath
-> IO (G.Graph (ContentRow Int16 c))
loadGraph :: FilePath -> IO (Graph (ContentRow Int16 c))
loadGraph FilePath
dir = do
Map FilePath (Int16, Seq Int16, c)
mm <- FilePath -> IO (Map FilePath (Int16, Seq Int16, c))
forall c.
Binary c =>
FilePath -> IO (Map FilePath (Int16, Seq Int16, c))
restoreContent FilePath
dir
ResourceT IO (Graph (ContentRow Int16 c))
-> IO (Graph (ContentRow Int16 c))
forall (m :: * -> *) a. MonadUnliftIO m => ResourceT m a -> m a
runResourceT (ResourceT IO (Graph (ContentRow Int16 c))
-> IO (Graph (ContentRow Int16 c)))
-> ResourceT IO (Graph (ContentRow Int16 c))
-> IO (Graph (ContentRow Int16 c))
forall a b. (a -> b) -> a -> b
$ ConduitT () Void (ResourceT IO) (Graph (ContentRow Int16 c))
-> ResourceT IO (Graph (ContentRow Int16 c))
forall (m :: * -> *) r. Monad m => ConduitT () Void m r -> m r
runConduit (ConduitT () Void (ResourceT IO) (Graph (ContentRow Int16 c))
-> ResourceT IO (Graph (ContentRow Int16 c)))
-> ConduitT () Void (ResourceT IO) (Graph (ContentRow Int16 c))
-> ResourceT IO (Graph (ContentRow Int16 c))
forall a b. (a -> b) -> a -> b
$
FilePath -> ConduitT () (CitesRow FilePath) (ResourceT IO) ()
forall (m :: * -> *) i.
(MonadResource m, MonadThrow m) =>
FilePath -> ConduitT i (CitesRow FilePath) m ()
citesFromFile FilePath
dir ConduitT () (CitesRow FilePath) (ResourceT IO) ()
-> ConduitM
(CitesRow FilePath)
Void
(ResourceT IO)
(Graph (ContentRow Int16 c))
-> ConduitT () Void (ResourceT IO) (Graph (ContentRow Int16 c))
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.|
(Graph (ContentRow Int16 c)
-> CitesRow FilePath -> Graph (ContentRow Int16 c))
-> Graph (ContentRow Int16 c)
-> ConduitM
(CitesRow FilePath)
Void
(ResourceT IO)
(Graph (ContentRow Int16 c))
forall (m :: * -> *) a b o.
Monad m =>
(a -> b -> a) -> a -> ConduitT b o m a
C.foldl (\Graph (ContentRow Int16 c)
gr (CitesRow FilePath
b FilePath
a) ->
let
edm :: Maybe ((Int16, Seq Int16, c), (Int16, Seq Int16, c))
edm = (,) ((Int16, Seq Int16, c)
-> (Int16, Seq Int16, c)
-> ((Int16, Seq Int16, c), (Int16, Seq Int16, c)))
-> Maybe (Int16, Seq Int16, c)
-> Maybe
((Int16, Seq Int16, c)
-> ((Int16, Seq Int16, c), (Int16, Seq Int16, c)))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> FilePath
-> Map FilePath (Int16, Seq Int16, c)
-> Maybe (Int16, Seq Int16, c)
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup FilePath
a Map FilePath (Int16, Seq Int16, c)
mm Maybe
((Int16, Seq Int16, c)
-> ((Int16, Seq Int16, c), (Int16, Seq Int16, c)))
-> Maybe (Int16, Seq Int16, c)
-> Maybe ((Int16, Seq Int16, c), (Int16, Seq Int16, c))
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> FilePath
-> Map FilePath (Int16, Seq Int16, c)
-> Maybe (Int16, Seq Int16, c)
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup FilePath
b Map FilePath (Int16, Seq Int16, c)
mm
in
case Maybe ((Int16, Seq Int16, c), (Int16, Seq Int16, c))
edm of
Maybe ((Int16, Seq Int16, c), (Int16, Seq Int16, c))
Nothing -> Graph (ContentRow Int16 c)
gr
Just ((Int16
ib, Seq Int16
bffs, c
bc), (Int16
ia, Seq Int16
affs, c
ac)) ->
let
acr :: ContentRow Int16 c
acr = Int16 -> FilePath -> Seq Int16 -> c -> ContentRow Int16 c
forall i c. i -> FilePath -> Seq Int16 -> c -> ContentRow i c
CRow Int16
ia FilePath
a Seq Int16
affs c
ac
bcr :: ContentRow Int16 c
bcr = Int16 -> FilePath -> Seq Int16 -> c -> ContentRow Int16 c
forall i c. i -> FilePath -> Seq Int16 -> c -> ContentRow i c
CRow Int16
ib FilePath
b Seq Int16
bffs c
bc
in
(ContentRow Int16 c
acr ContentRow Int16 c
-> ContentRow Int16 c -> Graph (ContentRow Int16 c)
forall a. a -> a -> Graph a
`G.edge` ContentRow Int16 c
bcr) Graph (ContentRow Int16 c)
-> Graph (ContentRow Int16 c) -> Graph (ContentRow Int16 c)
forall a. Graph a -> Graph a -> Graph a
`G.overlay` Graph (ContentRow Int16 c)
gr
) Graph (ContentRow Int16 c)
forall a. Graph a
G.empty
sourceGraphEdges :: (MonadResource m, MonadThrow m) =>
FilePath
-> M.Map String (Int16, Seq Int16, c)
-> ConduitT i (Maybe (G.Graph (ContentRow Int16 c))) m ()
sourceGraphEdges :: FilePath
-> Map FilePath (Int16, Seq Int16, c)
-> ConduitT i (Maybe (Graph (ContentRow Int16 c))) m ()
sourceGraphEdges FilePath
dir Map FilePath (Int16, Seq Int16, c)
mm =
FilePath -> ConduitT i (CitesRow FilePath) m ()
forall (m :: * -> *) i.
(MonadResource m, MonadThrow m) =>
FilePath -> ConduitT i (CitesRow FilePath) m ()
citesFromFile FilePath
dir ConduitT i (CitesRow FilePath) m ()
-> ConduitM
(CitesRow FilePath) (Maybe (Graph (ContentRow Int16 c))) m ()
-> ConduitT i (Maybe (Graph (ContentRow Int16 c))) m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.|
(CitesRow FilePath -> Maybe (Graph (ContentRow Int16 c)))
-> ConduitM
(CitesRow FilePath) (Maybe (Graph (ContentRow Int16 c))) m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
C.map (\(CitesRow FilePath
b FilePath
a) ->
case (,) ((Int16, Seq Int16, c)
-> (Int16, Seq Int16, c)
-> ((Int16, Seq Int16, c), (Int16, Seq Int16, c)))
-> Maybe (Int16, Seq Int16, c)
-> Maybe
((Int16, Seq Int16, c)
-> ((Int16, Seq Int16, c), (Int16, Seq Int16, c)))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> FilePath
-> Map FilePath (Int16, Seq Int16, c)
-> Maybe (Int16, Seq Int16, c)
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup FilePath
a Map FilePath (Int16, Seq Int16, c)
mm Maybe
((Int16, Seq Int16, c)
-> ((Int16, Seq Int16, c), (Int16, Seq Int16, c)))
-> Maybe (Int16, Seq Int16, c)
-> Maybe ((Int16, Seq Int16, c), (Int16, Seq Int16, c))
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> FilePath
-> Map FilePath (Int16, Seq Int16, c)
-> Maybe (Int16, Seq Int16, c)
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup FilePath
b Map FilePath (Int16, Seq Int16, c)
mm of
Maybe ((Int16, Seq Int16, c), (Int16, Seq Int16, c))
Nothing -> Maybe (Graph (ContentRow Int16 c))
forall a. Maybe a
Nothing
Just ((Int16
ib, Seq Int16
bffs, c
bc), (Int16
ia, Seq Int16
affs, c
ac)) ->
let
acr :: ContentRow Int16 c
acr = Int16 -> FilePath -> Seq Int16 -> c -> ContentRow Int16 c
forall i c. i -> FilePath -> Seq Int16 -> c -> ContentRow i c
CRow Int16
ia FilePath
a Seq Int16
affs c
ac
bcr :: ContentRow Int16 c
bcr = Int16 -> FilePath -> Seq Int16 -> c -> ContentRow Int16 c
forall i c. i -> FilePath -> Seq Int16 -> c -> ContentRow i c
CRow Int16
ib FilePath
b Seq Int16
bffs c
bc
in Graph (ContentRow Int16 c) -> Maybe (Graph (ContentRow Int16 c))
forall a. a -> Maybe a
Just (ContentRow Int16 c
acr ContentRow Int16 c
-> ContentRow Int16 c -> Graph (ContentRow Int16 c)
forall a. a -> a -> Graph a
`G.edge` ContentRow Int16 c
bcr))
contentToFile :: (MonadThrow m, MonadResource m, Binary c) =>
FilePath
-> Int
-> Parser c
-> FileInfo
-> ConduitT ByteString o m ()
contentToFile :: FilePath
-> Int -> Parser c -> FileInfo -> ConduitT ByteString o m ()
contentToFile FilePath
dir Int
n Parser c
pc FileInfo
fi = Bool -> ConduitT ByteString o m () -> ConduitT ByteString o m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when ((FilePath -> FilePath
takeExtension (FilePath -> FilePath)
-> (ByteString -> FilePath) -> ByteString -> FilePath
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> FilePath
unpack (ByteString -> FilePath) -> ByteString -> FilePath
forall a b. (a -> b) -> a -> b
$ FileInfo -> ByteString
filePath FileInfo
fi) FilePath -> FilePath -> Bool
forall a. Eq a => a -> a -> Bool
== FilePath
".content") (ConduitT ByteString o m () -> ConduitT ByteString o m ())
-> ConduitT ByteString o m () -> ConduitT ByteString o m ()
forall a b. (a -> b) -> a -> b
$ do
ConduitT ByteString (Row Text) m ()
forall (m :: * -> *).
MonadThrow m =>
ConduitT ByteString (Row Text) m ()
parseTSV ConduitT ByteString (Row Text) m ()
-> ConduitM (Row Text) o m () -> ConduitT ByteString o m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.|
(Row Text -> Text) -> ConduitT (Row Text) Text m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
C.map Row Text -> Text
T.unwords ConduitT (Row Text) Text m ()
-> ConduitM Text o m () -> ConduitM (Row Text) o m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.|
ConduitT Text (ContentRow Int16 c) m Int16
-> ConduitT Text (ContentRow Int16 c) m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void ((Text -> Int16 -> Either Int16 (Int16, ContentRow Int16 c))
-> Int16 -> ConduitT Text (ContentRow Int16 c) m Int16
forall (m :: * -> *) a s b.
Monad m =>
(a -> s -> Either s (s, b)) -> s -> ConduitT a b m s
C.mapAccumWhile ( \Text
r Int16
i -> do
case Parsec Void Text (ContentRow Int16 c)
-> FilePath
-> Text
-> Either (ParseErrorBundle Text Void) (ContentRow Int16 c)
forall e s a.
Parsec e s a -> FilePath -> s -> Either (ParseErrorBundle s e) a
parse (Int16 -> Int -> Parser c -> Parsec Void Text (ContentRow Int16 c)
forall i c. i -> Int -> Parser c -> Parser (ContentRow i c)
contentRowP Int16
i Int
n Parser c
pc) FilePath
"" Text
r of
Left ParseErrorBundle Text Void
e -> FilePath -> Either Int16 (Int16, ContentRow Int16 c)
forall a. HasCallStack => FilePath -> a
error (FilePath -> Either Int16 (Int16, ContentRow Int16 c))
-> FilePath -> Either Int16 (Int16, ContentRow Int16 c)
forall a b. (a -> b) -> a -> b
$ ParseErrorBundle Text Void -> FilePath
forall s e.
(VisualStream s, TraversableStream s, ShowErrorComponent e) =>
ParseErrorBundle s e -> FilePath
errorBundlePretty ParseErrorBundle Text Void
e
Right ContentRow Int16 c
x -> (Int16, ContentRow Int16 c)
-> Either Int16 (Int16, ContentRow Int16 c)
forall a b. b -> Either a b
Right (Int16 -> Int16
forall a. Enum a => a -> a
succ Int16
i, ContentRow Int16 c
x) ) (Int16
0 :: Int16)
) ConduitT Text (ContentRow Int16 c) m ()
-> ConduitM (ContentRow Int16 c) o m () -> ConduitM Text o m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.|
ConduitT (ContentRow Int16 c) ByteString m ()
forall b (m :: * -> *).
(Binary b, MonadThrow m) =>
ConduitT b ByteString m ()
CB.conduitEncode ConduitT (ContentRow Int16 c) ByteString m ()
-> ConduitT ByteString o m ()
-> ConduitM (ContentRow Int16 c) o m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.|
FilePath -> ConduitT ByteString o m ()
forall (m :: * -> *) o.
MonadResource m =>
FilePath -> ConduitT ByteString o m ()
C.sinkFile (FilePath
dir FilePath -> FilePath -> FilePath
</> FilePath
"content-z")
contentFromFile :: (MonadResource m, MonadThrow m, Binary c) => FilePath
-> ConduitT i (ContentRow Int16 c) m ()
contentFromFile :: FilePath -> ConduitT i (ContentRow Int16 c) m ()
contentFromFile FilePath
dir =
FilePath -> ConduitT i ByteString m ()
forall (m :: * -> *) i.
MonadResource m =>
FilePath -> ConduitT i ByteString m ()
C.sourceFile (FilePath
dir FilePath -> FilePath -> FilePath
</> FilePath
"content-z") ConduitT i ByteString m ()
-> ConduitM ByteString (ContentRow Int16 c) m ()
-> ConduitT i (ContentRow Int16 c) m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.|
ConduitM ByteString (ContentRow Int16 c) m ()
forall b (m :: * -> *).
(Binary b, MonadThrow m) =>
ConduitT ByteString b m ()
CB.conduitDecode
data CitesRow a = CitesRow {
CitesRow a -> a
cirTo :: a
, CitesRow a -> a
cirFrom :: a
} deriving (CitesRow a -> CitesRow a -> Bool
(CitesRow a -> CitesRow a -> Bool)
-> (CitesRow a -> CitesRow a -> Bool) -> Eq (CitesRow a)
forall a. Eq a => CitesRow a -> CitesRow a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CitesRow a -> CitesRow a -> Bool
$c/= :: forall a. Eq a => CitesRow a -> CitesRow a -> Bool
== :: CitesRow a -> CitesRow a -> Bool
$c== :: forall a. Eq a => CitesRow a -> CitesRow a -> Bool
Eq, Int -> CitesRow a -> FilePath -> FilePath
[CitesRow a] -> FilePath -> FilePath
CitesRow a -> FilePath
(Int -> CitesRow a -> FilePath -> FilePath)
-> (CitesRow a -> FilePath)
-> ([CitesRow a] -> FilePath -> FilePath)
-> Show (CitesRow a)
forall a. Show a => Int -> CitesRow a -> FilePath -> FilePath
forall a. Show a => [CitesRow a] -> FilePath -> FilePath
forall a. Show a => CitesRow a -> FilePath
forall a.
(Int -> a -> FilePath -> FilePath)
-> (a -> FilePath) -> ([a] -> FilePath -> FilePath) -> Show a
showList :: [CitesRow a] -> FilePath -> FilePath
$cshowList :: forall a. Show a => [CitesRow a] -> FilePath -> FilePath
show :: CitesRow a -> FilePath
$cshow :: forall a. Show a => CitesRow a -> FilePath
showsPrec :: Int -> CitesRow a -> FilePath -> FilePath
$cshowsPrec :: forall a. Show a => Int -> CitesRow a -> FilePath -> FilePath
Show, (forall x. CitesRow a -> Rep (CitesRow a) x)
-> (forall x. Rep (CitesRow a) x -> CitesRow a)
-> Generic (CitesRow a)
forall x. Rep (CitesRow a) x -> CitesRow a
forall x. CitesRow a -> Rep (CitesRow a) x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
forall a x. Rep (CitesRow a) x -> CitesRow a
forall a x. CitesRow a -> Rep (CitesRow a) x
$cto :: forall a x. Rep (CitesRow a) x -> CitesRow a
$cfrom :: forall a x. CitesRow a -> Rep (CitesRow a) x
Generic, Get (CitesRow a)
[CitesRow a] -> Put
CitesRow a -> Put
(CitesRow a -> Put)
-> Get (CitesRow a) -> ([CitesRow a] -> Put) -> Binary (CitesRow a)
forall a. Binary a => Get (CitesRow a)
forall a. Binary a => [CitesRow a] -> Put
forall a. Binary a => CitesRow a -> Put
forall t. (t -> Put) -> Get t -> ([t] -> Put) -> Binary t
putList :: [CitesRow a] -> Put
$cputList :: forall a. Binary a => [CitesRow a] -> Put
get :: Get (CitesRow a)
$cget :: forall a. Binary a => Get (CitesRow a)
put :: CitesRow a -> Put
$cput :: forall a. Binary a => CitesRow a -> Put
Binary)
citesRowP :: Parser (CitesRow String)
citesRowP :: Parser (CitesRow FilePath)
citesRowP = FilePath -> FilePath -> CitesRow FilePath
forall a. a -> a -> CitesRow a
CitesRow (FilePath -> FilePath -> CitesRow FilePath)
-> ParsecT Void Text Identity FilePath
-> ParsecT Void Text Identity (FilePath -> CitesRow FilePath)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Void Text Identity FilePath
-> ParsecT Void Text Identity FilePath
forall a. Parser a -> Parser a
lexeme ParsecT Void Text Identity FilePath
alphaNum ParsecT Void Text Identity (FilePath -> CitesRow FilePath)
-> ParsecT Void Text Identity FilePath
-> Parser (CitesRow FilePath)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ParsecT Void Text Identity FilePath
-> ParsecT Void Text Identity FilePath
forall a. Parser a -> Parser a
lexeme ParsecT Void Text Identity FilePath
alphaNum
data ContentRow i c = CRow {
ContentRow i c -> i
crId :: i
, ContentRow i c -> FilePath
crIdStr :: String
, ContentRow i c -> Seq Int16
crFeatures :: Seq Int16
, ContentRow i c -> c
crClass :: c
} deriving (ContentRow i c -> ContentRow i c -> Bool
(ContentRow i c -> ContentRow i c -> Bool)
-> (ContentRow i c -> ContentRow i c -> Bool)
-> Eq (ContentRow i c)
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
forall i c.
(Eq i, Eq c) =>
ContentRow i c -> ContentRow i c -> Bool
/= :: ContentRow i c -> ContentRow i c -> Bool
$c/= :: forall i c.
(Eq i, Eq c) =>
ContentRow i c -> ContentRow i c -> Bool
== :: ContentRow i c -> ContentRow i c -> Bool
$c== :: forall i c.
(Eq i, Eq c) =>
ContentRow i c -> ContentRow i c -> Bool
Eq, Eq (ContentRow i c)
Eq (ContentRow i c)
-> (ContentRow i c -> ContentRow i c -> Ordering)
-> (ContentRow i c -> ContentRow i c -> Bool)
-> (ContentRow i c -> ContentRow i c -> Bool)
-> (ContentRow i c -> ContentRow i c -> Bool)
-> (ContentRow i c -> ContentRow i c -> Bool)
-> (ContentRow i c -> ContentRow i c -> ContentRow i c)
-> (ContentRow i c -> ContentRow i c -> ContentRow i c)
-> Ord (ContentRow i c)
ContentRow i c -> ContentRow i c -> Bool
ContentRow i c -> ContentRow i c -> Ordering
ContentRow i c -> ContentRow i c -> ContentRow i c
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
forall i c. (Ord i, Ord c) => Eq (ContentRow i c)
forall i c.
(Ord i, Ord c) =>
ContentRow i c -> ContentRow i c -> Bool
forall i c.
(Ord i, Ord c) =>
ContentRow i c -> ContentRow i c -> Ordering
forall i c.
(Ord i, Ord c) =>
ContentRow i c -> ContentRow i c -> ContentRow i c
min :: ContentRow i c -> ContentRow i c -> ContentRow i c
$cmin :: forall i c.
(Ord i, Ord c) =>
ContentRow i c -> ContentRow i c -> ContentRow i c
max :: ContentRow i c -> ContentRow i c -> ContentRow i c
$cmax :: forall i c.
(Ord i, Ord c) =>
ContentRow i c -> ContentRow i c -> ContentRow i c
>= :: ContentRow i c -> ContentRow i c -> Bool
$c>= :: forall i c.
(Ord i, Ord c) =>
ContentRow i c -> ContentRow i c -> Bool
> :: ContentRow i c -> ContentRow i c -> Bool
$c> :: forall i c.
(Ord i, Ord c) =>
ContentRow i c -> ContentRow i c -> Bool
<= :: ContentRow i c -> ContentRow i c -> Bool
$c<= :: forall i c.
(Ord i, Ord c) =>
ContentRow i c -> ContentRow i c -> Bool
< :: ContentRow i c -> ContentRow i c -> Bool
$c< :: forall i c.
(Ord i, Ord c) =>
ContentRow i c -> ContentRow i c -> Bool
compare :: ContentRow i c -> ContentRow i c -> Ordering
$ccompare :: forall i c.
(Ord i, Ord c) =>
ContentRow i c -> ContentRow i c -> Ordering
$cp1Ord :: forall i c. (Ord i, Ord c) => Eq (ContentRow i c)
Ord, Int -> ContentRow i c -> FilePath -> FilePath
[ContentRow i c] -> FilePath -> FilePath
ContentRow i c -> FilePath
(Int -> ContentRow i c -> FilePath -> FilePath)
-> (ContentRow i c -> FilePath)
-> ([ContentRow i c] -> FilePath -> FilePath)
-> Show (ContentRow i c)
forall a.
(Int -> a -> FilePath -> FilePath)
-> (a -> FilePath) -> ([a] -> FilePath -> FilePath) -> Show a
forall i c.
(Show i, Show c) =>
Int -> ContentRow i c -> FilePath -> FilePath
forall i c.
(Show i, Show c) =>
[ContentRow i c] -> FilePath -> FilePath
forall i c. (Show i, Show c) => ContentRow i c -> FilePath
showList :: [ContentRow i c] -> FilePath -> FilePath
$cshowList :: forall i c.
(Show i, Show c) =>
[ContentRow i c] -> FilePath -> FilePath
show :: ContentRow i c -> FilePath
$cshow :: forall i c. (Show i, Show c) => ContentRow i c -> FilePath
showsPrec :: Int -> ContentRow i c -> FilePath -> FilePath
$cshowsPrec :: forall i c.
(Show i, Show c) =>
Int -> ContentRow i c -> FilePath -> FilePath
Show, (forall x. ContentRow i c -> Rep (ContentRow i c) x)
-> (forall x. Rep (ContentRow i c) x -> ContentRow i c)
-> Generic (ContentRow i c)
forall x. Rep (ContentRow i c) x -> ContentRow i c
forall x. ContentRow i c -> Rep (ContentRow i c) x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
forall i c x. Rep (ContentRow i c) x -> ContentRow i c
forall i c x. ContentRow i c -> Rep (ContentRow i c) x
$cto :: forall i c x. Rep (ContentRow i c) x -> ContentRow i c
$cfrom :: forall i c x. ContentRow i c -> Rep (ContentRow i c) x
Generic, Get (ContentRow i c)
[ContentRow i c] -> Put
ContentRow i c -> Put
(ContentRow i c -> Put)
-> Get (ContentRow i c)
-> ([ContentRow i c] -> Put)
-> Binary (ContentRow i c)
forall t. (t -> Put) -> Get t -> ([t] -> Put) -> Binary t
forall i c. (Binary i, Binary c) => Get (ContentRow i c)
forall i c. (Binary i, Binary c) => [ContentRow i c] -> Put
forall i c. (Binary i, Binary c) => ContentRow i c -> Put
putList :: [ContentRow i c] -> Put
$cputList :: forall i c. (Binary i, Binary c) => [ContentRow i c] -> Put
get :: Get (ContentRow i c)
$cget :: forall i c. (Binary i, Binary c) => Get (ContentRow i c)
put :: ContentRow i c -> Put
$cput :: forall i c. (Binary i, Binary c) => ContentRow i c -> Put
Binary)
bit :: Parser Bool
bit :: Parser Bool
bit = (Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'0' ParsecT Void Text Identity Char -> Bool -> Parser Bool
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Bool
False) Parser Bool -> Parser Bool -> Parser Bool
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'1' ParsecT Void Text Identity Char -> Bool -> Parser Bool
forall (f :: * -> *) a b. Functor f => f a -> b -> f b
$> Bool
True)
sparse :: Foldable t => t Bool -> Seq Int16
sparse :: t Bool -> Seq Int16
sparse = (Seq Int16, Int16) -> Seq Int16
forall a b. (a, b) -> a
fst ((Seq Int16, Int16) -> Seq Int16)
-> (t Bool -> (Seq Int16, Int16)) -> t Bool -> Seq Int16
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Seq Int16, Int16) -> Bool -> (Seq Int16, Int16))
-> (Seq Int16, Int16) -> t Bool -> (Seq Int16, Int16)
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl (\(Seq Int16
acc, Int16
i) Bool
b -> if Bool
b then (Seq Int16
acc Seq Int16 -> Int16 -> Seq Int16
forall a. Seq a -> a -> Seq a
|> Int16
i, Int16 -> Int16
forall a. Enum a => a -> a
succ Int16
i) else (Seq Int16
acc, Int16 -> Int16
forall a. Enum a => a -> a
succ Int16
i)) (Seq Int16
forall a. Monoid a => a
mempty, Int16
0)
contentRowP :: i
-> Int
-> Parser c
-> Parser (ContentRow i c)
contentRowP :: i -> Int -> Parser c -> Parser (ContentRow i c)
contentRowP i
i Int
n Parser c
dcp = do
FilePath
istr <- ParsecT Void Text Identity FilePath
-> ParsecT Void Text Identity FilePath
forall a. Parser a -> Parser a
lexeme ParsecT Void Text Identity FilePath
alphaNum
Seq Int16
feats <- [Bool] -> Seq Int16
forall (t :: * -> *). Foldable t => t Bool -> Seq Int16
sparse ([Bool] -> Seq Int16)
-> ParsecT Void Text Identity [Bool]
-> ParsecT Void Text Identity (Seq Int16)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> Parser Bool -> ParsecT Void Text Identity [Bool]
forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count Int
n (Parser Bool -> Parser Bool
forall a. Parser a -> Parser a
lexeme Parser Bool
bit)
c
c <- Parser c -> Parser c
forall a. Parser a -> Parser a
lexeme Parser c
dcp
ContentRow i c -> Parser (ContentRow i c)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ContentRow i c -> Parser (ContentRow i c))
-> ContentRow i c -> Parser (ContentRow i c)
forall a b. (a -> b) -> a -> b
$ i -> FilePath -> Seq Int16 -> c -> ContentRow i c
forall i c. i -> FilePath -> Seq Int16 -> c -> ContentRow i c
CRow i
i FilePath
istr Seq Int16
feats c
c
citesToFile :: (MonadThrow m, MonadIO m, MonadResource m) =>
FilePath
-> FileInfo
-> ConduitT ByteString c m ()
citesToFile :: FilePath -> FileInfo -> ConduitT ByteString c m ()
citesToFile FilePath
dir FileInfo
fi = do
let fpath :: FilePath
fpath = ByteString -> FilePath
unpack (ByteString -> FilePath) -> ByteString -> FilePath
forall a b. (a -> b) -> a -> b
$ FileInfo -> ByteString
filePath FileInfo
fi
Bool -> ConduitT ByteString c m () -> ConduitT ByteString c m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (FilePath -> FilePath
takeExtension FilePath
fpath FilePath -> FilePath -> Bool
forall a. Eq a => a -> a -> Bool
== FilePath
".cites") (ConduitT ByteString c m () -> ConduitT ByteString c m ())
-> ConduitT ByteString c m () -> ConduitT ByteString c m ()
forall a b. (a -> b) -> a -> b
$
ConduitT ByteString (Row Text) m ()
forall (m :: * -> *).
MonadThrow m =>
ConduitT ByteString (Row Text) m ()
parseTSV ConduitT ByteString (Row Text) m ()
-> ConduitM (Row Text) c m () -> ConduitT ByteString c m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.|
(Row Text -> Text) -> ConduitT (Row Text) Text m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
C.map Row Text -> Text
T.unwords ConduitT (Row Text) Text m ()
-> ConduitM Text c m () -> ConduitM (Row Text) c m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.|
(Text -> CitesRow FilePath)
-> ConduitT Text (CitesRow FilePath) m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
C.map ( \Text
r -> case Parser (CitesRow FilePath)
-> FilePath
-> Text
-> Either (ParseErrorBundle Text Void) (CitesRow FilePath)
forall e s a.
Parsec e s a -> FilePath -> s -> Either (ParseErrorBundle s e) a
parse Parser (CitesRow FilePath)
citesRowP FilePath
"" Text
r of
Left ParseErrorBundle Text Void
e -> FilePath -> CitesRow FilePath
forall a. HasCallStack => FilePath -> a
error (FilePath -> CitesRow FilePath) -> FilePath -> CitesRow FilePath
forall a b. (a -> b) -> a -> b
$ ParseErrorBundle Text Void -> FilePath
forall s e.
(VisualStream s, TraversableStream s, ShowErrorComponent e) =>
ParseErrorBundle s e -> FilePath
errorBundlePretty ParseErrorBundle Text Void
e
Right CitesRow FilePath
x -> CitesRow FilePath
x ) ConduitT Text (CitesRow FilePath) m ()
-> ConduitM (CitesRow FilePath) c m () -> ConduitM Text c m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.|
ConduitT (CitesRow FilePath) ByteString m ()
forall b (m :: * -> *).
(Binary b, MonadThrow m) =>
ConduitT b ByteString m ()
CB.conduitEncode ConduitT (CitesRow FilePath) ByteString m ()
-> ConduitT ByteString c m ()
-> ConduitM (CitesRow FilePath) c m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.|
FilePath -> ConduitT ByteString c m ()
forall (m :: * -> *) o.
MonadResource m =>
FilePath -> ConduitT ByteString o m ()
C.sinkFile (FilePath
dir FilePath -> FilePath -> FilePath
</> FilePath
"cites")