-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | Read and parse ZIM files
--
-- zim-parser is a library to read and parse ZIM (http://openzim.org)
-- files. ZiM files contain offline web content (eg. Wikipedia) which can
-- be browsed locally without an Internet connection.
@package zim-parser
@version 0.2.0.0
-- | This is a library for parsing ZIM (http://openzim.org) files.
-- ZIM files contain offline web content (eg, Wikipedia) which can be
-- browsed locally without an Internet connection.
--
-- The API is meant to be intuitive for normal use-cases.
--
-- To get content for "A/index.htm" from ZIM file "file.zim":
--
--
-- > mimeContent <- "file.zim" `getContent` Url "A/index.htm"
-- > :t mimeContent
-- mimeContent :: Maybe (B8.ByteString, BL.ByteString)
-- > print mimeContent
-- Just ("text/html", "<html><head>...</html>")
--
--
-- The above will open the file, parse the ZIM header, lookup the MIME
-- type and content of the URL, close the file and return the MIME type
-- and content as a pair. Note that content is a lazy bytestring.
--
-- The above operation should suffice for a simple webserver serving a
-- ZIM file. For finer control, it is possible to cache and reuse the
-- file handle and the ZIM header.
--
--
-- > hdl <- openBinaryFile "file.zim" ReadMode
-- > hdr <- getHeader hdl
-- > :t hdr
-- hdr :: ZimHeader
-- > (hdl, hdr) `getContent` Url "A/index.htm"
-- Just ("text/html", "<html><head>...</html>")
--
--
-- ZIM files of Wikimedia Foundation (Wikipedia, Wikibooks, etc) can be
-- found at http://ftpmirror.your.org/pub/kiwix/zim.
--
-- Below is a full example of a Scotty web server that serves a ZIM file
-- (specified on command line) on localhost port 3000:
--
--
-- {-# LANGUAGE OverloadedStrings #-}
--
-- import Control.Monad.IO.Class (liftIO)
-- import Data.Text.Lazy (toStrict, fromStrict)
-- import Data.Text.Encoding (decodeUtf8, encodeUtf8)
-- import System.Environment (getArgs)
-- import Network.HTTP.Types.Status (status404)
-- import Web.Scotty
-- import Codec.Archive.Zim.Parser (getMainPageUrl, getContent, Url(..))
--
-- main :: IO ()
-- main = do
-- [fp] <- getArgs
-- scotty 3000 $ do
-- get "/" (redirectToZimMainPage fp)
-- get (regex "^/(./.*)$") (serveZimUrl fp)
-- notFound $ text "Invalid URL!"
--
-- redirectToZimMainPage :: FilePath -> ActionM ()
-- redirectToZimMainPage fp = do
-- res <- liftIO $ getMainPageUrl fp
-- case res of
-- Nothing -> do
-- status status404
-- text "This ZIM file has no main page specified!"
-- Just (Url url) -> redirect . fromStrict $ decodeUtf8 url
--
-- serveZimUrl :: FilePath -> ActionM ()
-- serveZimUrl fp = do
-- url <- (encodeUtf8 . toStrict) <$> param "1"
-- res <- liftIO $ fp `getContent` Url url
-- case res of
-- Nothing -> do
-- liftIO . putStrLn $ "Invalid URL: " ++ show url
-- status status404
-- text $ "Invalid URL!"
-- Just (mimeType, content) -> do
-- liftIO . putStrLn $ "Serving: " ++ show url
-- setHeader "Content-Type" (fromStrict $ decodeUtf8 mimeType)
-- raw content
--
--
-- Feedback and contributions are welcome on
-- http://github.com/robbinch/zim-parser.
module Codec.Archive.Zim.Parser
getHeader :: RunZim h => h -> IO ZimHeader
getMimeList :: RunZim h => h -> IO MimeList
-- | Returns URL of main page in ZIM. This URL can be used for redirecting
-- to the actual page.
getMainPageUrl :: RunZim h => h -> IO (Maybe Url)
getCluster :: RunZim h => h -> ClusterNumber -> IO Cluster
getBlob :: RunZim h => h -> (ClusterNumber, BlobNumber) -> IO Blob
-- | List of Mime Types
type MimeList = Array Int ByteString
-- | Construct a Title with a Namespace.
mkNsTitle :: Char -> ByteString -> Title
-- | Construct a TitlePrefix with a Namespace.
mkNsTitlePrefix :: Char -> ByteString -> TitlePrefix
-- | Construct a Url with a Namespace.
mkNsUrl :: Char -> ByteString -> Url
-- | Instances of this class represent a Zim File and are able to perform
-- ZIM operations (getMimeList, getContent, etc). Valid instances include
-- a Handle to a ZIM file, a FilePath to a ZIM file, or a (Handle,
-- ZimHeader) where ZimHeader is parsed previously (so it does not need
-- to be reparsed).
class RunZim h
class ZimGetDE k
getDE :: (ZimGetDE k, RunZim h) => h -> k -> IO ZimDirEnt
class ZimSearchDE k
-- | Search for a Directory Entry on a RunZim. When searching for a:
--
--
-- - Url Returns either 0 (not found) or 1
-- element.
-- - Title Returns either 0 (not found) or 1
-- element.
-- - TitlePrefix Returns either 0 (not found) or 2
-- elements corresponding to lower and upper bound of titles containing
-- the prefix.
--
searchDE :: (ZimSearchDE k, RunZim h) => h -> k -> IO [(Int, ZimDirEnt)]
class ZimGetContent k
-- | Get (MIME type, Content). Note that Content is lazy.
getContent :: (ZimGetContent k, RunZim h) => h -> k -> IO (Maybe (ByteString, ByteString))
-- | Other than the below, ErrorCall can be thrown by LZMA library if there
-- is a problem with decompression.
data ZimException
-- | ZIM file has invalid magic number (anything other than 72173914).
ZimInvalidMagic :: ZimException
-- | There is an error in parsing.
ZimParseError :: String -> ZimException
-- | There is insufficient bytes required to parse.
ZimIncompleteInput :: ZimException
-- | The given index (URL, title or cluster) is out of bounds for this ZIM
-- file.
ZimInvalidIndex :: Int -> ZimException
-- | See http://www.openzim.org/wiki/ZIM_file_format#Header for more
-- details.
data ZimHeader
ZimHeader :: Int -> Int -> ByteString -> Int -> Int -> Integer -> Integer -> Integer -> Integer -> Maybe Int -> Maybe Int -> Integer -> ZimHeader
-- | Magic Number of file (somewhat superfluous as getZimHeader
-- will throw an exception if magic number is anything other than
-- 72173914)
[zimMagicNumber] :: ZimHeader -> Int
-- | Version of ZIM header
[zimVersion] :: ZimHeader -> Int
-- | UUID of file
[zimUuid] :: ZimHeader -> ByteString
-- | Number of articles
[zimArticleCount] :: ZimHeader -> Int
-- | Number of clusters
[zimClusterCount] :: ZimHeader -> Int
-- | Position of sorted URL pointers
[zimUrlPtrPos] :: ZimHeader -> Integer
-- | Position of sorted Title pointers
[zimTitlePtrPos] :: ZimHeader -> Integer
-- | Position of Cluster pointers
[zimClusterPtrPos] :: ZimHeader -> Integer
-- | Position of MIME list
[zimMimeListPos] :: ZimHeader -> Integer
-- | Index of main page
[zimMainPage] :: ZimHeader -> Maybe Int
-- | Index of layout page
[zimLayoutPage] :: ZimHeader -> Maybe Int
-- | Position of MD5 checksum
[zimChecksumPos] :: ZimHeader -> Integer
-- | There are 4 types of directory entries. Most content in a ZIM file are
-- usually ZimArticleEntry or ZimRedirectEntry.
data ZimDirEntType
ZimArticleEntry :: ZimDirEntType
ZimRedirectEntry :: ZimDirEntType
ZimLinkTarget :: ZimDirEntType
ZimDeletedEntry :: ZimDirEntType
-- | See
-- http://www.openzim.org/wiki/ZIM_file_format#Directory_Entries
-- for more details.
data ZimDirEnt
ZimDirEnt :: ZimDirEntType -> Int -> Int -> Char -> Int -> Maybe Int -> Maybe Int -> Maybe Int -> ByteString -> ByteString -> ZimDirEnt
-- | Type of this Directory Entry
[zimDeType] :: ZimDirEnt -> ZimDirEntType
-- | Index into MIME list given by getZimMimeList
[zimDeMimeType] :: ZimDirEnt -> Int
-- | Parameter Length
[zimDeParameterLen] :: ZimDirEnt -> Int
-- | Namespace
[zimDeNamespace] :: ZimDirEnt -> Char
-- | Revision
[zimDeRevision] :: ZimDirEnt -> Int
-- | Redirect Index (only applicable for ZimRedirectEntry)
[zimDeRedirectIndex] :: ZimDirEnt -> Maybe Int
-- | Content is stored in this Cluster Number (only applicable for
-- ZimArticleEntry)
[zimDeClusterNumber] :: ZimDirEnt -> Maybe Int
-- | Content is stored in this Blob Number (only applicable for
-- ZimArticleEntry)
[zimDeBlobNumber] :: ZimDirEnt -> Maybe Int
-- | URL
[zimDeUrl] :: ZimDirEnt -> ByteString
-- | Title
[zimDeTitle] :: ZimDirEnt -> ByteString
-- | Wrapper for URL index
newtype UrlIndex
UrlIndex :: Int -> UrlIndex
-- | Wrapper for Title index
newtype TitleIndex
TitleIndex :: Int -> TitleIndex
-- | Wrapper for Cluster number
newtype ClusterNumber
ClusterNumber :: Int -> ClusterNumber
-- | Wrapper for Blob number
newtype BlobNumber
BlobNumber :: Int -> BlobNumber
-- | Wrapper for Cluster
newtype Cluster
Cluster :: ByteString -> Cluster
[unCluster] :: Cluster -> ByteString
-- | Wrapper for Blob
newtype Blob
Blob :: ByteString -> Blob
[unBlob] :: Blob -> ByteString
-- | Wrapper for Url
newtype Url
Url :: ByteString -> Url
-- | Wrapper for Title
data Title
-- | Wrapper for Title Prefix
data TitlePrefix
instance GHC.Show.Show Codec.Archive.Zim.Parser.TitlePrefix
instance GHC.Classes.Ord Codec.Archive.Zim.Parser.TitlePrefix
instance GHC.Classes.Eq Codec.Archive.Zim.Parser.TitlePrefix
instance GHC.Show.Show Codec.Archive.Zim.Parser.Title
instance GHC.Classes.Ord Codec.Archive.Zim.Parser.Title
instance GHC.Classes.Eq Codec.Archive.Zim.Parser.Title
instance GHC.Show.Show Codec.Archive.Zim.Parser.Url
instance GHC.Classes.Ord Codec.Archive.Zim.Parser.Url
instance GHC.Classes.Eq Codec.Archive.Zim.Parser.Url
instance GHC.Show.Show Codec.Archive.Zim.Parser.BlobNumber
instance GHC.Classes.Ord Codec.Archive.Zim.Parser.BlobNumber
instance GHC.Classes.Eq Codec.Archive.Zim.Parser.BlobNumber
instance GHC.Show.Show Codec.Archive.Zim.Parser.ClusterNumber
instance GHC.Classes.Ord Codec.Archive.Zim.Parser.ClusterNumber
instance GHC.Classes.Eq Codec.Archive.Zim.Parser.ClusterNumber
instance GHC.Show.Show Codec.Archive.Zim.Parser.TitleIndex
instance GHC.Classes.Ord Codec.Archive.Zim.Parser.TitleIndex
instance GHC.Classes.Eq Codec.Archive.Zim.Parser.TitleIndex
instance GHC.Show.Show Codec.Archive.Zim.Parser.UrlIndex
instance GHC.Classes.Ord Codec.Archive.Zim.Parser.UrlIndex
instance GHC.Classes.Eq Codec.Archive.Zim.Parser.UrlIndex
instance GHC.Show.Show Codec.Archive.Zim.Parser.ZimDirEnt
instance GHC.Classes.Eq Codec.Archive.Zim.Parser.ZimDirEnt
instance GHC.Show.Show Codec.Archive.Zim.Parser.ZimDirEntType
instance GHC.Classes.Eq Codec.Archive.Zim.Parser.ZimDirEntType
instance GHC.Classes.Eq Codec.Archive.Zim.Parser.ZimHeader
instance GHC.Show.Show Codec.Archive.Zim.Parser.ZimHeader
instance GHC.Show.Show Codec.Archive.Zim.Parser.ZimException
instance GHC.Exception.Exception Codec.Archive.Zim.Parser.ZimException
instance Codec.Archive.Zim.Parser.RunZim GHC.IO.Handle.Types.Handle
instance Codec.Archive.Zim.Parser.RunZim (GHC.IO.Handle.Types.Handle, Codec.Archive.Zim.Parser.ZimHeader)
instance Codec.Archive.Zim.Parser.RunZim GHC.IO.FilePath
instance Codec.Archive.Zim.Parser.ZimGetDE Codec.Archive.Zim.Parser.UrlIndex
instance Codec.Archive.Zim.Parser.ZimGetDE Codec.Archive.Zim.Parser.TitleIndex
instance Codec.Archive.Zim.Parser.ZimGetContent (Codec.Archive.Zim.Parser.MimeList, Codec.Archive.Zim.Parser.ZimDirEnt)
instance Codec.Archive.Zim.Parser.ZimGetContent Codec.Archive.Zim.Parser.ZimDirEnt
instance Codec.Archive.Zim.Parser.ZimGetContent (Codec.Archive.Zim.Parser.MimeList, Codec.Archive.Zim.Parser.Url)
instance Codec.Archive.Zim.Parser.ZimGetContent Codec.Archive.Zim.Parser.Url
instance Codec.Archive.Zim.Parser.ZimGetContent (Codec.Archive.Zim.Parser.MimeList, Codec.Archive.Zim.Parser.Title)
instance Codec.Archive.Zim.Parser.ZimGetContent Codec.Archive.Zim.Parser.Title
instance Codec.Archive.Zim.Parser.ZimGetContent (Codec.Archive.Zim.Parser.MimeList, Codec.Archive.Zim.Parser.UrlIndex)
instance Codec.Archive.Zim.Parser.ZimGetContent Codec.Archive.Zim.Parser.UrlIndex
instance Codec.Archive.Zim.Parser.ZimGetContent (Codec.Archive.Zim.Parser.MimeList, Codec.Archive.Zim.Parser.TitleIndex)
instance Codec.Archive.Zim.Parser.ZimGetContent Codec.Archive.Zim.Parser.TitleIndex
instance Codec.Archive.Zim.Parser.ZimSearchDE Codec.Archive.Zim.Parser.Url
instance Codec.Archive.Zim.Parser.ZimSearchDE Codec.Archive.Zim.Parser.Title
instance Codec.Archive.Zim.Parser.ZimSearchDE Codec.Archive.Zim.Parser.TitlePrefix