module Data.FixedWidth.LargeFile where
import Control.Applicative
import Data.Attoparsec.Text as StrictParse
import Data.Attoparsec.Text.Lazy as LazyParse
import qualified Data.Text as Strict
import qualified Data.Text.Lazy as Lazy
import qualified Data.ByteString.Lazy.Char8 as LazyBytes
import Data.Text.Lazy.Encoding (decodeUtf8)
fileToLazyText :: String -> IO Lazy.Text
fileToLazyText filename = fmap decodeUtf8 $ LazyBytes.readFile filename
strictLineNL :: Parser Strict.Text
strictLineNL = (StrictParse.takeTill isEndOfLine) <* endOfLine
strictLine :: Parser Strict.Text
strictLine = strictLineNL <|> StrictParse.takeText
chunkLazyText :: Parser Strict.Text -> Lazy.Text -> ([Strict.Text], Maybe String)
chunkLazyText chunkParser lazyText =
if Lazy.null lazyText
then ([], Nothing)
else
case (LazyParse.parse chunkParser lazyText) of
LazyParse.Done rest' chunk ->
let (chunks, failOpt) = chunkLazyText chunkParser rest'
in (chunk : chunks, failOpt)
failure@(LazyParse.Fail _ _ _) -> ([], Just $ show failure)
chunkFile :: String -> (Parser Strict.Text) -> IO ([Strict.Text], Maybe String)
chunkFile filename parser = do
lazyText <- fileToLazyText filename
return $ chunkLazyText parser lazyText