-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Read and parse ZIM files -- -- zim-parser is a library to read and parse ZIM (http://openzim.org) -- files. ZiM files contain offline web content (eg. Wikipedia) which can -- be browsed locally without an Internet connection. @package zim-parser @version 0.2.0.0 -- | This is a library for parsing ZIM (http://openzim.org) files. -- ZIM files contain offline web content (eg, Wikipedia) which can be -- browsed locally without an Internet connection. -- -- The API is meant to be intuitive for normal use-cases. -- -- To get content for "A/index.htm" from ZIM file "file.zim": -- --
--   > mimeContent <- "file.zim" `getContent` Url "A/index.htm"
--   > :t mimeContent
--   mimeContent :: Maybe (B8.ByteString, BL.ByteString)
--   > print mimeContent
--   Just ("text/html", "<html><head>...</html>")
--   
-- -- The above will open the file, parse the ZIM header, lookup the MIME -- type and content of the URL, close the file and return the MIME type -- and content as a pair. Note that content is a lazy bytestring. -- -- The above operation should suffice for a simple webserver serving a -- ZIM file. For finer control, it is possible to cache and reuse the -- file handle and the ZIM header. -- --
--   > hdl <- openBinaryFile "file.zim" ReadMode
--   > hdr <- getHeader hdl
--   > :t hdr
--   hdr :: ZimHeader
--   > (hdl, hdr) `getContent` Url "A/index.htm"
--   Just ("text/html", "<html><head>...</html>")
--   
-- -- ZIM files of Wikimedia Foundation (Wikipedia, Wikibooks, etc) can be -- found at http://ftpmirror.your.org/pub/kiwix/zim. -- -- Below is a full example of a Scotty web server that serves a ZIM file -- (specified on command line) on localhost port 3000: -- --
--   {-# LANGUAGE OverloadedStrings #-}
--   
--   import Control.Monad.IO.Class (liftIO)
--   import Data.Text.Lazy (toStrict, fromStrict)
--   import Data.Text.Encoding (decodeUtf8, encodeUtf8)
--   import System.Environment (getArgs)
--   import Network.HTTP.Types.Status (status404)
--   import Web.Scotty
--   import Codec.Archive.Zim.Parser (getMainPageUrl, getContent, Url(..))
--   
--   main :: IO ()
--   main = do
--       [fp] <- getArgs
--       scotty 3000 $ do
--         get "/" (redirectToZimMainPage fp)
--         get (regex "^/(./.*)$") (serveZimUrl fp)
--         notFound $ text "Invalid URL!"
--   
--   redirectToZimMainPage :: FilePath -> ActionM ()
--   redirectToZimMainPage fp = do
--       res <- liftIO $ getMainPageUrl fp
--       case res of
--         Nothing -> do
--           status status404
--           text "This ZIM file has no main page specified!"
--         Just (Url url) -> redirect . fromStrict $ decodeUtf8 url
--   
--   serveZimUrl :: FilePath -> ActionM ()
--   serveZimUrl fp = do
--       url <- (encodeUtf8 . toStrict) <$> param "1"
--       res <- liftIO $ fp `getContent` Url url
--       case res of
--         Nothing -> do
--           liftIO . putStrLn $ "Invalid URL: " ++ show url
--           status status404
--           text $ "Invalid URL!"
--         Just (mimeType, content) -> do
--           liftIO . putStrLn $ "Serving: " ++ show url
--           setHeader "Content-Type" (fromStrict $ decodeUtf8 mimeType)
--           raw content
--   
-- -- Feedback and contributions are welcome on -- http://github.com/robbinch/zim-parser. module Codec.Archive.Zim.Parser getHeader :: RunZim h => h -> IO ZimHeader getMimeList :: RunZim h => h -> IO MimeList -- | Returns URL of main page in ZIM. This URL can be used for redirecting -- to the actual page. getMainPageUrl :: RunZim h => h -> IO (Maybe Url) getCluster :: RunZim h => h -> ClusterNumber -> IO Cluster getBlob :: RunZim h => h -> (ClusterNumber, BlobNumber) -> IO Blob -- | List of Mime Types type MimeList = Array Int ByteString -- | Construct a Title with a Namespace. mkNsTitle :: Char -> ByteString -> Title -- | Construct a TitlePrefix with a Namespace. mkNsTitlePrefix :: Char -> ByteString -> TitlePrefix -- | Construct a Url with a Namespace. mkNsUrl :: Char -> ByteString -> Url -- | Instances of this class represent a Zim File and are able to perform -- ZIM operations (getMimeList, getContent, etc). Valid instances include -- a Handle to a ZIM file, a FilePath to a ZIM file, or a (Handle, -- ZimHeader) where ZimHeader is parsed previously (so it does not need -- to be reparsed). class RunZim h class ZimGetDE k getDE :: (ZimGetDE k, RunZim h) => h -> k -> IO ZimDirEnt class ZimSearchDE k -- | Search for a Directory Entry on a RunZim. When searching for a: -- -- searchDE :: (ZimSearchDE k, RunZim h) => h -> k -> IO [(Int, ZimDirEnt)] class ZimGetContent k -- | Get (MIME type, Content). Note that Content is lazy. getContent :: (ZimGetContent k, RunZim h) => h -> k -> IO (Maybe (ByteString, ByteString)) -- | Other than the below, ErrorCall can be thrown by LZMA library if there -- is a problem with decompression. data ZimException -- | ZIM file has invalid magic number (anything other than 72173914). ZimInvalidMagic :: ZimException -- | There is an error in parsing. ZimParseError :: String -> ZimException -- | There is insufficient bytes required to parse. ZimIncompleteInput :: ZimException -- | The given index (URL, title or cluster) is out of bounds for this ZIM -- file. ZimInvalidIndex :: Int -> ZimException -- | See http://www.openzim.org/wiki/ZIM_file_format#Header for more -- details. data ZimHeader ZimHeader :: Int -> Int -> ByteString -> Int -> Int -> Integer -> Integer -> Integer -> Integer -> Maybe Int -> Maybe Int -> Integer -> ZimHeader -- | Magic Number of file (somewhat superfluous as getZimHeader -- will throw an exception if magic number is anything other than -- 72173914) [zimMagicNumber] :: ZimHeader -> Int -- | Version of ZIM header [zimVersion] :: ZimHeader -> Int -- | UUID of file [zimUuid] :: ZimHeader -> ByteString -- | Number of articles [zimArticleCount] :: ZimHeader -> Int -- | Number of clusters [zimClusterCount] :: ZimHeader -> Int -- | Position of sorted URL pointers [zimUrlPtrPos] :: ZimHeader -> Integer -- | Position of sorted Title pointers [zimTitlePtrPos] :: ZimHeader -> Integer -- | Position of Cluster pointers [zimClusterPtrPos] :: ZimHeader -> Integer -- | Position of MIME list [zimMimeListPos] :: ZimHeader -> Integer -- | Index of main page [zimMainPage] :: ZimHeader -> Maybe Int -- | Index of layout page [zimLayoutPage] :: ZimHeader -> Maybe Int -- | Position of MD5 checksum [zimChecksumPos] :: ZimHeader -> Integer -- | There are 4 types of directory entries. Most content in a ZIM file are -- usually ZimArticleEntry or ZimRedirectEntry. data ZimDirEntType ZimArticleEntry :: ZimDirEntType ZimRedirectEntry :: ZimDirEntType ZimLinkTarget :: ZimDirEntType ZimDeletedEntry :: ZimDirEntType -- | See -- http://www.openzim.org/wiki/ZIM_file_format#Directory_Entries -- for more details. data ZimDirEnt ZimDirEnt :: ZimDirEntType -> Int -> Int -> Char -> Int -> Maybe Int -> Maybe Int -> Maybe Int -> ByteString -> ByteString -> ZimDirEnt -- | Type of this Directory Entry [zimDeType] :: ZimDirEnt -> ZimDirEntType -- | Index into MIME list given by getZimMimeList [zimDeMimeType] :: ZimDirEnt -> Int -- | Parameter Length [zimDeParameterLen] :: ZimDirEnt -> Int -- | Namespace [zimDeNamespace] :: ZimDirEnt -> Char -- | Revision [zimDeRevision] :: ZimDirEnt -> Int -- | Redirect Index (only applicable for ZimRedirectEntry) [zimDeRedirectIndex] :: ZimDirEnt -> Maybe Int -- | Content is stored in this Cluster Number (only applicable for -- ZimArticleEntry) [zimDeClusterNumber] :: ZimDirEnt -> Maybe Int -- | Content is stored in this Blob Number (only applicable for -- ZimArticleEntry) [zimDeBlobNumber] :: ZimDirEnt -> Maybe Int -- | URL [zimDeUrl] :: ZimDirEnt -> ByteString -- | Title [zimDeTitle] :: ZimDirEnt -> ByteString -- | Wrapper for URL index newtype UrlIndex UrlIndex :: Int -> UrlIndex -- | Wrapper for Title index newtype TitleIndex TitleIndex :: Int -> TitleIndex -- | Wrapper for Cluster number newtype ClusterNumber ClusterNumber :: Int -> ClusterNumber -- | Wrapper for Blob number newtype BlobNumber BlobNumber :: Int -> BlobNumber -- | Wrapper for Cluster newtype Cluster Cluster :: ByteString -> Cluster [unCluster] :: Cluster -> ByteString -- | Wrapper for Blob newtype Blob Blob :: ByteString -> Blob [unBlob] :: Blob -> ByteString -- | Wrapper for Url newtype Url Url :: ByteString -> Url -- | Wrapper for Title data Title -- | Wrapper for Title Prefix data TitlePrefix instance GHC.Show.Show Codec.Archive.Zim.Parser.TitlePrefix instance GHC.Classes.Ord Codec.Archive.Zim.Parser.TitlePrefix instance GHC.Classes.Eq Codec.Archive.Zim.Parser.TitlePrefix instance GHC.Show.Show Codec.Archive.Zim.Parser.Title instance GHC.Classes.Ord Codec.Archive.Zim.Parser.Title instance GHC.Classes.Eq Codec.Archive.Zim.Parser.Title instance GHC.Show.Show Codec.Archive.Zim.Parser.Url instance GHC.Classes.Ord Codec.Archive.Zim.Parser.Url instance GHC.Classes.Eq Codec.Archive.Zim.Parser.Url instance GHC.Show.Show Codec.Archive.Zim.Parser.BlobNumber instance GHC.Classes.Ord Codec.Archive.Zim.Parser.BlobNumber instance GHC.Classes.Eq Codec.Archive.Zim.Parser.BlobNumber instance GHC.Show.Show Codec.Archive.Zim.Parser.ClusterNumber instance GHC.Classes.Ord Codec.Archive.Zim.Parser.ClusterNumber instance GHC.Classes.Eq Codec.Archive.Zim.Parser.ClusterNumber instance GHC.Show.Show Codec.Archive.Zim.Parser.TitleIndex instance GHC.Classes.Ord Codec.Archive.Zim.Parser.TitleIndex instance GHC.Classes.Eq Codec.Archive.Zim.Parser.TitleIndex instance GHC.Show.Show Codec.Archive.Zim.Parser.UrlIndex instance GHC.Classes.Ord Codec.Archive.Zim.Parser.UrlIndex instance GHC.Classes.Eq Codec.Archive.Zim.Parser.UrlIndex instance GHC.Show.Show Codec.Archive.Zim.Parser.ZimDirEnt instance GHC.Classes.Eq Codec.Archive.Zim.Parser.ZimDirEnt instance GHC.Show.Show Codec.Archive.Zim.Parser.ZimDirEntType instance GHC.Classes.Eq Codec.Archive.Zim.Parser.ZimDirEntType instance GHC.Classes.Eq Codec.Archive.Zim.Parser.ZimHeader instance GHC.Show.Show Codec.Archive.Zim.Parser.ZimHeader instance GHC.Show.Show Codec.Archive.Zim.Parser.ZimException instance GHC.Exception.Exception Codec.Archive.Zim.Parser.ZimException instance Codec.Archive.Zim.Parser.RunZim GHC.IO.Handle.Types.Handle instance Codec.Archive.Zim.Parser.RunZim (GHC.IO.Handle.Types.Handle, Codec.Archive.Zim.Parser.ZimHeader) instance Codec.Archive.Zim.Parser.RunZim GHC.IO.FilePath instance Codec.Archive.Zim.Parser.ZimGetDE Codec.Archive.Zim.Parser.UrlIndex instance Codec.Archive.Zim.Parser.ZimGetDE Codec.Archive.Zim.Parser.TitleIndex instance Codec.Archive.Zim.Parser.ZimGetContent (Codec.Archive.Zim.Parser.MimeList, Codec.Archive.Zim.Parser.ZimDirEnt) instance Codec.Archive.Zim.Parser.ZimGetContent Codec.Archive.Zim.Parser.ZimDirEnt instance Codec.Archive.Zim.Parser.ZimGetContent (Codec.Archive.Zim.Parser.MimeList, Codec.Archive.Zim.Parser.Url) instance Codec.Archive.Zim.Parser.ZimGetContent Codec.Archive.Zim.Parser.Url instance Codec.Archive.Zim.Parser.ZimGetContent (Codec.Archive.Zim.Parser.MimeList, Codec.Archive.Zim.Parser.Title) instance Codec.Archive.Zim.Parser.ZimGetContent Codec.Archive.Zim.Parser.Title instance Codec.Archive.Zim.Parser.ZimGetContent (Codec.Archive.Zim.Parser.MimeList, Codec.Archive.Zim.Parser.UrlIndex) instance Codec.Archive.Zim.Parser.ZimGetContent Codec.Archive.Zim.Parser.UrlIndex instance Codec.Archive.Zim.Parser.ZimGetContent (Codec.Archive.Zim.Parser.MimeList, Codec.Archive.Zim.Parser.TitleIndex) instance Codec.Archive.Zim.Parser.ZimGetContent Codec.Archive.Zim.Parser.TitleIndex instance Codec.Archive.Zim.Parser.ZimSearchDE Codec.Archive.Zim.Parser.Url instance Codec.Archive.Zim.Parser.ZimSearchDE Codec.Archive.Zim.Parser.Title instance Codec.Archive.Zim.Parser.ZimSearchDE Codec.Archive.Zim.Parser.TitlePrefix