{-# LANGUAGE OverloadedStrings #-}
{- |
   Module      : Text.Pandoc.MIME
   Copyright   : Copyright (C) 2011-2024 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Mime type lookup.
-}
module Text.Pandoc.MIME (
  MimeType,
  getMimeType,
  getMimeTypeDef,
  getCharset,
  extensionFromMimeType,
  mediaCategory ) where
import Data.List (isPrefixOf, isSuffixOf)
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import Data.Maybe (fromMaybe, listToMaybe)
import Data.Tuple (swap)
import qualified Network.Mime
import System.FilePath

type MimeType = T.Text

-- | Determine mime type appropriate for file path.
getMimeType :: FilePath -> Maybe MimeType
getMimeType :: FilePath -> Maybe Text
getMimeType FilePath
fp
  -- ODT
  | FilePath
fp FilePath -> FilePath -> Bool
forall a. Eq a => a -> a -> Bool
== FilePath
"layout-cache" =
        Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"application/binary"
  | FilePath
"Formula-" FilePath -> FilePath -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` FilePath
fp Bool -> Bool -> Bool
&& FilePath
"/" FilePath -> FilePath -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isSuffixOf` FilePath
fp =
        Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"application/vnd.oasis.opendocument.formula"
  -- generic
  | Bool
otherwise = Text -> Map Text Text -> Maybe Text
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup (Text -> Text
T.toLower (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Int -> Text -> Text
T.drop Int
1 (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ FilePath -> Text
T.pack (FilePath -> Text) -> FilePath -> Text
forall a b. (a -> b) -> a -> b
$ FilePath -> FilePath
takeExtension FilePath
fp) Map Text Text
mimeTypes

-- | Determime mime type appropriate for file path, defaulting to
-- “application/octet-stream” if nothing else fits.
getMimeTypeDef :: FilePath -> MimeType
getMimeTypeDef :: FilePath -> Text
getMimeTypeDef = Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"application/octet-stream" (Maybe Text -> Text)
-> (FilePath -> Maybe Text) -> FilePath -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. FilePath -> Maybe Text
getMimeType

extensionFromMimeType :: MimeType -> Maybe T.Text
-- few special cases, where there are multiple options:
extensionFromMimeType :: Text -> Maybe Text
extensionFromMimeType Text
"text/plain" = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"txt"
extensionFromMimeType Text
"video/quicktime" = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"mov"
extensionFromMimeType Text
"video/mpeg" = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"mpeg"
extensionFromMimeType Text
"video/dv" = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"dv"
extensionFromMimeType Text
"image/vnd.djvu" = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"djvu"
extensionFromMimeType Text
"image/tiff" = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"tiff"
extensionFromMimeType Text
"image/jpeg" = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"jpg"
extensionFromMimeType Text
"application/xml" = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"xml"
extensionFromMimeType Text
"application/ogg" = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"ogg"
extensionFromMimeType Text
"image/svg+xml" = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"svg" -- avoid svgz
extensionFromMimeType Text
mimetype =
  Text -> Map Text Text -> Maybe Text
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup ((Char -> Bool) -> Text -> Text
T.takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
';') Text
mimetype) Map Text Text
reverseMimeTypes
  -- note:  we just look up the basic mime type, dropping the content-encoding etc.

-- | Determine general media category for file path, e.g.
--
-- prop> mediaCategory "foo.jpg" = Just "image"
mediaCategory :: FilePath -> Maybe T.Text
mediaCategory :: FilePath -> Maybe Text
mediaCategory FilePath
fp = FilePath -> Maybe Text
getMimeType FilePath
fp Maybe Text -> (Text -> Maybe Text) -> Maybe Text
forall a b. Maybe a -> (a -> Maybe b) -> Maybe b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [Text] -> Maybe Text
forall a. [a] -> Maybe a
listToMaybe ([Text] -> Maybe Text) -> (Text -> [Text]) -> Text -> Maybe Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HasCallStack => Text -> Text -> [Text]
Text -> Text -> [Text]
T.splitOn Text
"/"

reverseMimeTypes :: M.Map MimeType T.Text
reverseMimeTypes :: Map Text Text
reverseMimeTypes = [(Text, Text)] -> Map Text Text
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList ([(Text, Text)] -> Map Text Text)
-> [(Text, Text)] -> Map Text Text
forall a b. (a -> b) -> a -> b
$ ((Text, Text) -> (Text, Text)) -> [(Text, Text)] -> [(Text, Text)]
forall a b. (a -> b) -> [a] -> [b]
map (Text, Text) -> (Text, Text)
forall a b. (a, b) -> (b, a)
swap [(Text, Text)]
mimeTypesList

mimeTypes :: M.Map T.Text MimeType
mimeTypes :: Map Text Text
mimeTypes = [(Text, Text)] -> Map Text Text
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList [(Text, Text)]
mimeTypesList

-- | Get the charset from a mime type, if one is present.
getCharset :: MimeType -> Maybe T.Text
getCharset :: Text -> Maybe Text
getCharset Text
mt =
  let (Text
_,Text
y) = HasCallStack => Text -> Text -> (Text, Text)
Text -> Text -> (Text, Text)
T.breakOn Text
"charset=" Text
mt
   in if Text -> Bool
T.null Text
y
         then Maybe Text
forall a. Maybe a
Nothing
         else Text -> Maybe Text
forall a. a -> Maybe a
Just (Text -> Maybe Text) -> Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.toUpper (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Text -> Text
T.takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
';') (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Int -> Text -> Text
T.drop Int
8 Text
y

-- | Collection of common mime types.
-- Except for first entry, list borrowed from
-- <https://github.com/Happstack/happstack-server/blob/master/src/Happstack/Server/FileServe/BuildingBlocks.hs happstack-server>
mimeTypesList :: [(T.Text, MimeType)]
mimeTypesList :: [(Text, Text)]
mimeTypesList = Map Text Text -> [(Text, Text)]
forall k a. Map k a -> [(k, a)]
M.toList ((ByteString -> Text) -> Map Text ByteString -> Map Text Text
forall a b k. (a -> b) -> Map k a -> Map k b
M.map ByteString -> Text
T.decodeUtf8 Map Text ByteString
Network.Mime.defaultMimeMap) [(Text, Text)] -> [(Text, Text)] -> [(Text, Text)]
forall a. [a] -> [a] -> [a]
++
           [(Text
"%",Text
"application/x-trash")
           ,(Text
"323",Text
"text/h323")
           ,(Text
"alc",Text
"chemical/x-alchemy")
           ,(Text
"art",Text
"image/x-jg")
           ,(Text
"asn",Text
"chemical/x-ncbi-asn1")
           ,(Text
"aso",Text
"chemical/x-ncbi-asn1-binary")
           ,(Text
"atomsrv",Text
"application/atomserv+xml")
           ,(Text
"b",Text
"chemical/x-molconn-Z")
           ,(Text
"bak",Text
"application/x-trash")
           ,(Text
"bat",Text
"application/x-msdos-program")
           ,(Text
"bmp",Text
"image/x-ms-bmp")
           ,(Text
"boo",Text
"text/x-boo")
           ,(Text
"book",Text
"application/x-maker")
           ,(Text
"bsd",Text
"chemical/x-crossfire")
           ,(Text
"c",Text
"text/x-csrc")
           ,(Text
"c++",Text
"text/x-c++src")
           ,(Text
"c3d",Text
"chemical/x-chem3d")
           ,(Text
"cabal",Text
"application/x-cabal")
           ,(Text
"cac",Text
"chemical/x-cache")
           ,(Text
"cache",Text
"chemical/x-cache")
           ,(Text
"cascii",Text
"chemical/x-cactvs-binary")
           ,(Text
"cbin",Text
"chemical/x-cactvs-binary")
           ,(Text
"cbz",Text
"application/x-cbz")
           ,(Text
"cc",Text
"text/x-c++src")
           ,(Text
"cdf",Text
"application/x-cdf")
           ,(Text
"cdr",Text
"image/x-coreldraw")
           ,(Text
"cdt",Text
"image/x-coreldrawtemplate")
           ,(Text
"cef",Text
"chemical/x-cxf")
           ,(Text
"cer",Text
"chemical/x-cerius")
           ,(Text
"chm",Text
"chemical/x-chemdraw")
           ,(Text
"chrt",Text
"application/x-kchart")
           ,(Text
"com",Text
"application/x-msdos-program")
           ,(Text
"cpa",Text
"chemical/x-compass")
           ,(Text
"cpp",Text
"text/x-c++src")
           ,(Text
"cpt",Text
"image/x-corelphotopaint")
           ,(Text
"crl",Text
"application/x-pkcs7-crl")
           ,(Text
"csf",Text
"chemical/x-cache-csf")
           ,(Text
"csm",Text
"chemical/x-csml")
           ,(Text
"ctab",Text
"chemical/x-cactvs-binary")
           ,(Text
"ctx",Text
"chemical/x-ctx")
           ,(Text
"cub",Text
"chemical/x-gaussian-cube")
           ,(Text
"cxf",Text
"chemical/x-cxf")
           ,(Text
"cxx",Text
"text/x-c++src")
           ,(Text
"d",Text
"text/x-dsrc")
           ,(Text
"dat",Text
"chemical/x-mopac-input")
           ,(Text
"dif",Text
"video/dv")
           ,(Text
"diff",Text
"text/x-diff")
           ,(Text
"dl",Text
"video/dl")
           ,(Text
"dll",Text
"application/x-msdos-program")
           ,(Text
"dms",Text
"application/x-dms")
           ,(Text
"dx",Text
"chemical/x-jcamp-dx")
           ,(Text
"emb",Text
"chemical/x-embl-dl-nucleotide")
           ,(Text
"embl",Text
"chemical/x-embl-dl-nucleotide")
           ,(Text
"emf",Text
"image/x-emf")
           ,(Text
"ent",Text
"chemical/x-ncbi-asn1-ascii")

           -- The type used in mime-types is `application/postscript`,
           -- but code in Text.Pandoc.PDF relies on the type being
           -- `application/eps`. Do not remove without updating that
           -- module.
           ,(Text
"eps",Text
"application/eps")

           ,(Text
"fb",Text
"application/x-maker")
           ,(Text
"fbdoc",Text
"application/x-maker")
           ,(Text
"fch",Text
"chemical/x-gaussian-checkpoint")
           ,(Text
"fchk",Text
"chemical/x-gaussian-checkpoint")
           ,(Text
"frm",Text
"application/x-maker")
           ,(Text
"fs",Text
"text/plain")
           ,(Text
"gal",Text
"chemical/x-gaussian-log")
           ,(Text
"gam",Text
"chemical/x-gamess-input")
           ,(Text
"gamin",Text
"chemical/x-gamess-input")
           ,(Text
"gau",Text
"chemical/x-gaussian-input")
           ,(Text
"gcd",Text
"text/x-pcs-gcd")
           ,(Text
"gcf",Text
"application/x-graphing-calculator")
           ,(Text
"gcg",Text
"chemical/x-gcg8-sequence")
           ,(Text
"gen",Text
"chemical/x-genbank")
           ,(Text
"gjc",Text
"chemical/x-gaussian-input")
           ,(Text
"gjf",Text
"chemical/x-gaussian-input")
           ,(Text
"gl",Text
"video/gl")
           ,(Text
"glsl",Text
"text/plain")
           ,(Text
"gpt",Text
"chemical/x-mopac-graph")
           ,(Text
"gsm",Text
"audio/x-gsm")
           ,(Text
"h",Text
"text/x-chdr")
           ,(Text
"h++",Text
"text/x-c++hdr")
           ,(Text
"hh",Text
"text/x-c++hdr")
           ,(Text
"hin",Text
"chemical/x-hin")
           ,(Text
"hpp",Text
"text/x-c++hdr")
           ,(Text
"hs",Text
"text/x-haskell")
           ,(Text
"hta",Text
"application/hta")
           ,(Text
"hxx",Text
"text/x-c++hdr")
           ,(Text
"ica",Text
"application/x-ica")
           ,(Text
"icz",Text
"text/calendar")
           ,(Text
"iii",Text
"application/x-iphone")
           ,(Text
"inp",Text
"chemical/x-gamess-input")
           ,(Text
"ins",Text
"application/x-internet-signup")
           ,(Text
"isp",Text
"application/x-internet-signup")
           ,(Text
"ist",Text
"chemical/x-isostar")
           ,(Text
"istr",Text
"chemical/x-isostar")
           ,(Text
"jdx",Text
"chemical/x-jcamp-dx")
           ,(Text
"jfif",Text
"image/jpeg")
           ,(Text
"jmz",Text
"application/x-jmol")
           ,(Text
"key",Text
"application/pgp-keys")
           ,(Text
"kil",Text
"application/x-killustrator")
           ,(Text
"kin",Text
"chemical/x-kinemage")
           ,(Text
"lhs",Text
"text/x-literate-haskell")
           ,(Text
"lsf",Text
"video/x-la-asf")
           ,(Text
"lsx",Text
"video/x-la-asf")
           ,(Text
"lyx",Text
"application/x-lyx")
           ,(Text
"lzh",Text
"application/x-lzh")
           ,(Text
"lzx",Text
"application/x-lzx")
           ,(Text
"man",Text
"application/x-troff-man")
           ,(Text
"mcif",Text
"chemical/x-mmcif")
           ,(Text
"mcm",Text
"chemical/x-macmolecule")
           ,(Text
"mdb",Text
"application/msaccess")
           ,(Text
"me",Text
"application/x-troff-me")
           ,(Text
"mm",Text
"application/x-freemind")
           ,(Text
"mmd",Text
"chemical/x-macromodel-input")
           ,(Text
"mmod",Text
"chemical/x-macromodel-input")
           ,(Text
"moc",Text
"text/x-moc")
           ,(Text
"mol",Text
"chemical/x-mdl-molfile")
           ,(Text
"mol2",Text
"chemical/x-mol2")
           ,(Text
"moo",Text
"chemical/x-mopac-out")
           ,(Text
"mop",Text
"chemical/x-mopac-input")
           ,(Text
"mopcrt",Text
"chemical/x-mopac-input")
           ,(Text
"mpc",Text
"chemical/x-mopac-input")
           ,(Text
"mpega",Text
"audio/mpeg")
           ,(Text
"ms",Text
"application/x-troff-ms")
           ,(Text
"msi",Text
"application/x-msi")
           ,(Text
"mvb",Text
"chemical/x-mopac-vib")
           ,(Text
"nwc",Text
"application/x-nwc")
           ,(Text
"o",Text
"application/x-object")
           ,(Text
"old",Text
"application/x-trash")
           ,(Text
"oza",Text
"application/x-oz-application")
           ,(Text
"pat",Text
"image/x-coreldrawpattern")
           ,(Text
"patch",Text
"text/x-diff")
           ,(Text
"pdb",Text
"chemical/x-pdb")
           ,(Text
"php",Text
"application/x-httpd-php")
           ,(Text
"php3",Text
"application/x-httpd-php3")
           ,(Text
"php3p",Text
"application/x-httpd-php3-preprocessed")
           ,(Text
"php4",Text
"application/x-httpd-php4")
           ,(Text
"phps",Text
"application/x-httpd-php-source")
           ,(Text
"pht",Text
"application/x-httpd-php")
           ,(Text
"phtml",Text
"application/x-httpd-php")
           ,(Text
"pk",Text
"application/x-tex-pk")
           ,(Text
"pls",Text
"audio/x-scpls")
           ,(Text
"pot",Text
"text/plain")
           ,(Text
"prt",Text
"chemical/x-ncbi-asn1-ascii")
           ,(Text
"py",Text
"text/x-python")
           ,(Text
"pyc",Text
"application/x-python-code")
           ,(Text
"pyo",Text
"application/x-python-code")
           ,(Text
"qtl",Text
"application/x-quicktimeplayer")
           ,(Text
"rd",Text
"chemical/x-mdl-rdfile")
           ,(Text
"rhtml",Text
"application/x-httpd-eruby")
           ,(Text
"rm",Text
"audio/x-pn-realaudio")
           ,(Text
"ros",Text
"chemical/x-rosdal")
           ,(Text
"rxn",Text
"chemical/x-mdl-rxnfile")
           ,(Text
"sct",Text
"text/scriptlet")
           ,(Text
"sd",Text
"chemical/x-mdl-sdfile")
           ,(Text
"sd2",Text
"audio/x-sd2")
           ,(Text
"sdf",Text
"application/vnd.stardivision.math")
           ,(Text
"sds",Text
"application/vnd.stardivision.chart")
           ,(Text
"sgf",Text
"application/x-go-sgf")
           ,(Text
"sid",Text
"audio/prs.sid")
           ,(Text
"sik",Text
"application/x-trash")
           ,(Text
"spc",Text
"chemical/x-galactic-spc")
           ,(Text
"sw",Text
"chemical/x-swissprot")
           ,(Text
"swfl",Text
"application/x-shockwave-flash")
           ,(Text
"taz",Text
"application/x-gtar")
           ,(Text
"tgf",Text
"chemical/x-mdl-tgf")
           ,(Text
"tm",Text
"text/texmacs")
           ,(Text
"ts",Text
"text/texmacs")
           ,(Text
"tsp",Text
"application/dsptype")
           ,(Text
"val",Text
"chemical/x-ncbi-asn1-binary")
           ,(Text
"vmd",Text
"chemical/x-vmd")
           ,(Text
"vms",Text
"chemical/x-vamas-iso14976")
           ,(Text
"vrm",Text
"x-world/x-vrml")
           ,(Text
"vs",Text
"text/plain")
           ,(Text
"wk",Text
"application/x-123")
           ,(Text
"wmf",Text
"image/x-wmf")
           ,(Text
"wmz",Text
"application/x-ms-wmz")
           ,(Text
"wp5",Text
"application/wordperfect5.1")
           ,(Text
"wsc",Text
"text/scriptlet")
           ,(Text
"wz",Text
"application/x-wingz")
           ,(Text
"xlb",Text
"application/vnd.ms-excel")
           ,(Text
"xtel",Text
"chemical/x-xtel")
           ,(Text
"zmt",Text
"chemical/x-mopac-input")
           ]