{-# LANGUAGE BangPatterns #-} {-# LANGUAGE ScopedTypeVariables #-} module HaskellWorks.Data.Xml.Succinct.Cursor.MMap ( SlowCursor , FastCursor , mmapSlowCursor , mmapFastCursor ) where import Data.Word import Foreign.ForeignPtr import HaskellWorks.Data.BalancedParens.RangeMin2 import HaskellWorks.Data.BalancedParens.Simple import HaskellWorks.Data.Bits.BitShown import HaskellWorks.Data.RankSelect.CsPoppy1 import HaskellWorks.Data.Vector.Storable import HaskellWorks.Data.Xml.Succinct.Cursor import HaskellWorks.Data.Xml.Succinct.Cursor.BlankedXml import HaskellWorks.Data.Xml.Succinct.Cursor.Types import qualified Data.ByteString.Internal as BSI import qualified HaskellWorks.Data.Xml.Internal.ToIbBp64 as I import qualified System.IO.MMap as IO mmapSlowCursor :: String -> IO SlowCursor mmapSlowCursor filePath = do (fptr :: ForeignPtr Word8, offset, size) <- IO.mmapFileForeignPtr filePath IO.ReadOnly Nothing let !bs = BSI.fromForeignPtr (castForeignPtr fptr) offset size let blankedXml = bsToBlankedXml bs let (ib, bp) = construct64UnzipN (fromIntegral size) (I.toIbBp64 blankedXml) let !cursor = XmlCursor { cursorText = bs , interests = BitShown ib , balancedParens = SimpleBalancedParens bp , cursorRank = 1 } return cursor mmapFastCursor :: String -> IO FastCursor mmapFastCursor filename = do -- Load the XML file into memory as a raw cursor. -- The raw XML data is `text`, and `ib` and `bp` are the indexes. -- `ib` and `bp` can be persisted to an index file for later use to avoid -- re-parsing the file. XmlCursor !text (BitShown !ib) (SimpleBalancedParens !bp) _ <- mmapSlowCursor filename let !bpCsPoppy = makeCsPoppy bp let !rangeMinMax = mkRangeMin2 bpCsPoppy let !ibCsPoppy = makeCsPoppy ib return $ XmlCursor text ibCsPoppy rangeMinMax 1