---------------------------------------------------------------------- -- | -- Module : Text.Mime -- Copyright : (c) Jun Mukai 2006 -- License : BSD-style (see the file LICENSE) -- -- Maintainer : mukai@jmuk.org -- Stability : experimental -- Portability : portable -- -- Mime Parser -- module Text.Mime where import Text.Packrat.Parse hiding (space, spaces, Message) import Text.Packrat.Pos import Data.ByteString (ByteString) import qualified Data.ByteString.Char8 as BS import qualified Codec.Binary.Base64.String as B64 (encode, decode) import Data.Digest.MD5 (hash) import Data.Maybe import Data.Char import Data.List import Data.Bits import Data.Array import qualified Text.PrettyPrint.HughesPJ as PP data Mime = SinglePart [Header] ByteString | MultiPart [Header] [Mime] deriving (Eq, Show) type Message = ([Header], ByteString) type Header = (FieldName, FieldValue) type FieldName = String type FieldValue = String data MimeDerivs = MimeDerivs { dvMessage :: Result MimeDerivs Message , dvMime :: Result MimeDerivs Mime , dvHeader :: Result MimeDerivs Header , dvRest :: Result MimeDerivs ByteString , advChar :: Result MimeDerivs Char , advPos :: Pos } instance Derivs MimeDerivs where dvChar = advChar dvPos = advPos mime :: ByteString -> Mime mime = eval dvMime message :: ByteString -> Message message = eval dvMessage mime' :: String -> Mime mime' = eval' dvMime message' :: String -> Message message' = eval' dvMessage eval :: (MimeDerivs -> Result MimeDerivs r) -> ByteString -> r eval pMain s = case pMain (parse (Pos "" 1 1) s) of Parsed v d' e' -> v NoParse e -> error (show e) parse :: Pos -> ByteString -> MimeDerivs parse pos s = d where d = MimeDerivs message mime header rest chr pos message = pMessage d mime = pMime d header = pHeader d rest = if BS.null s then NoParse (eofError d) else Parsed s (parse (BS.foldl nextPos pos s) BS.empty) (nullError d) chr = if BS.null s then NoParse (eofError d) else let (c, s') = (BS.head s, BS.tail s) in Parsed c (parse (nextPos pos c) s') (nullError d) eval' :: (MimeDerivs -> Result MimeDerivs r) -> String -> r eval' pMain s = case pMain (parse' (Pos "" 1 1) s) of Parsed v d' e' -> v NoParse e -> error (show e) parse' :: Pos -> String -> MimeDerivs parse' pos s = d where d = MimeDerivs message mime header rest chr pos message = pMessage d mime = pMime d header = pHeader d rest = case s of "" -> NoParse (eofError d) _ -> Parsed (BS.pack s) (parse' (foldl nextPos pos s) "") (nullError d) chr = case s of (c:s') -> Parsed c (parse' (nextPos pos c) s') (nullError d) _ -> NoParse (eofError d) ---------------------------------------------------------------------- lineBreak :: Derivs d => Parser d String lineBreak = string "\r\n" <|> string "\n" pHeader :: MimeDerivs -> Result MimeDerivs (String, String) Parser pHeader = do field <- noneOf "\r\n" `manyTill` char ':' many (oneOf " \t") value <- noneOf "\r\n" `manyTill` lineBreak cont <- many (many1 (oneOf " \t") >> (anyChar `manyTill` lineBreak)) return (capital field, unwords (value:cont)) pMessage :: MimeDerivs -> Result MimeDerivs Message Parser pMessage = do headers <- many (Parser dvHeader) lineBreak body <- Parser dvRest return (headers, body) pMime :: MimeDerivs -> Result MimeDerivs Mime Parser pMime = do headers <- many (Parser dvHeader) lineBreak if isMultipart headers then let b = boundary headers in do string ("--"++b) lineBreak mimeBodies <- many $ mimeInner $ string ("--"++b) mimeLast <- mimeInner $ string ("--"++b++"--") return $ MultiPart headers (mimeBodies++[mimeLast]) else do body <- Parser dvRest return $ SinglePart headers body where isMultipart headers = case lookup "Content-Type" headers of Nothing -> False Just s -> "multipart/" == (take 10 s) boundary headers = case lookup "Content-Type" headers of Nothing -> fail "" Just s -> let s' = drop 9 $ head $ dropWhile ((/="boundary=") . (take 9)) (tails s) in if head s' == '"' then takeWhile (/='"') $ tail s' else takeWhile isAlphaNum s' mimeInner b = do headers <- many (Parser dvHeader) lineBreak if isMultipart headers then let b' = boundary headers in do string ("--"++b') lineBreak mimeBodies <- many $ mimeInner $ string ("--"++b') mimeLast <- mimeInner $ string ("--"++b'++"--") b >> lineBreak return $ MultiPart headers (mimeBodies++[mimeLast]) else do body <- (anyChar `manyTill` lineBreak) `manyTill` (b >> lineBreak) return $ SinglePart headers (BS.pack $ unlines body) ---------------------------------------------------------------------- -- RFC 2047 Mime Header Extentions Parser -- type CharSet = String data RFC2047Derivs = RFC2047Derivs { dvHeaderExts :: Result RFC2047Derivs [(CharSet, String)] , hdvChar :: Result RFC2047Derivs Char , hdvPos :: Pos } instance Derivs RFC2047Derivs where dvChar = hdvChar dvPos = hdvPos headerExts :: ByteString -> [(CharSet, String)] headerExts s = case dvHeaderExts (p (Pos "" 1 1) s) of Parsed v d' e' -> v NoParse e -> error (show e) where p pos s = d where d = RFC2047Derivs mstr chr pos mstr = pHeaderExts d chr = if BS.null s then NoParse (eofError d) else let (c, s') = (BS.head s, BS.tail s) in Parsed c (p (nextPos pos c) s') (nullError d) headerExts' :: String -> [(CharSet, String)] headerExts' = headerExts . BS.pack pHeaderExts :: RFC2047Derivs -> Result RFC2047Derivs [(CharSet, String)] Parser pHeaderExts = (getRFC2047 <|> normalStr) `sepBy` (many $ oneOf " \t") where getRFC2047 = do string "=?" charset <- anyChar `manyTill` char '?' mechanism <- choice [char 'B', char 'b', char 'Q', char 'q'] char '?' body <- if mechanism `elem` "bB" then decodeB64 else decodeQuoted return (charset, body) normalStr :: Derivs d => Parser d (String, String) normalStr = do body <- many1 $ noneOf " \t" return ("", body) decodeQuoted :: Derivs d => Parser d String decodeQuoted = do cs <- many (quotedChar <|> noneOf "?") string "?=" return cs where quotedChar = do char '=' n1 <- hexChar n2 <- hexChar return $ chr $ n1 * 16 + n2 hexChar = choice [ do { c <- oneOf ['0'..'9'] ; return $ ord c - ord '0' } , do { c <- oneOf (['A'..'F']++['a'..'f']) ; return $ ord (toLower c) - ord 'a' + 10 } ] decodeB64 :: Derivs d => Parser d String decodeB64 = do bodies <- many pQuartet string "?=" return $ concat bodies where pQuartet = do a <- b64Chars b <- b64Chars c <- b64Chars d <- b64Chars return $ decodeChars (a,b,c,d) <|> do a <- b64Chars b <- b64Chars c <- b64Chars char '=' return $ init $ decodeChars (a, b, c, 0) <|> do a <- b64Chars b <- b64Chars string "==" return [head $ decodeChars (a, b, 0, 0)] b64Chars = do many $ noneOf (['a'..'z']++['A'..'Z']++['0'..'9']++"+/?=") choice [ do { c <- oneOf ['a'..'z'] ; return $ ord c - ord 'a' + 26 } , do { c <- oneOf ['A'..'Z'] ; return $ ord c - ord 'A' } , do { c <- oneOf ['0'..'9'] ; return $ ord c - ord '0' + 52 } , char '+' >> return 62 , char '/' >> return 63 ] decodeChars (a, b, c, d) = map chr [ n `shiftR` 16 .&. 0xff , n `shiftR` 8 .&. 0xff , n .&. 0xff] where n = a `shiftL` 18 .|. b `shiftL` 12 .|. c `shiftL` 6 .|. d ---------------------------------------------------------------------- -- Mime Documents Pretty printer -- showHeader' :: Header -> PP.Doc showHeader' (field, value) = (PP.text (capital field) PP.<> PP.char ':') PP.<+> PP.fsep (map PP.text (words value)) showHeader :: CharSet -> Header -> PP.Doc showHeader charset (field, value) = (PP.text (capital field) PP.<> PP.char ':') PP.<+> (PP.fsep $ map (PP.text . prepB64) $ separate value) where separate s | length s < 76 - length field = [s] | isJust (find isSpace s) = concatMap separate $ words s | length s < 998 - length field = [s] | otherwise = let (s', s'') = splitAt (998 - length field) s in s' : separate s'' prepB64 s | isJust $ find (not.isPrint) s = "=?" ++ charset ++ "?b?" ++ b64Encode s ++ "?=" | otherwise = s capital :: String -> String capital "" = "" capital (c:cs) | isAlpha c = toUpper c : inner cs | otherwise = c : capital cs where inner "" = "" inner (c:cs) | isAlpha c = toLower c : inner cs | otherwise = c : capital cs b64Encode :: String -> String b64Encode = B64.encode . map (toEnum.ord) showMessage :: CharSet -> Message -> PP.Doc showMessage charset (hdrs, body) = PP.vcat ((map (showHeader charset) hdrs) ++ [ PP.empty, PP.text (BS.unpack body)]) showMime :: CharSet -> Mime -> PP.Doc showMime charset (SinglePart hdrs body) = PP.vcat ((map (showHeader charset) hdrs) ++ [ PP.empty, PP.text (BS.unpack body)]) showMime charset (MultiPart hdrs bodies) | isJust $ lookup "Content-Type" hdrs = PP.vcat $ map (showHeader charset) hdrs ++ (PP.empty : mixture boundary (map (showMime charset) bodies)) | otherwise = PP.vcat $ map (showHeader charset) (hdrs++[newHeader]) ++ (PP.empty : mixture newBoundary (map (showMime charset) bodies)) where boundary = let s = fromJust $ lookup "Content-Type" hdrs s' = drop 9 $ head $ dropWhile ((/="boundary=") . (take 9)) (tails s) in if head s' == '"' then takeWhile (/='"') $ tail s' else takeWhile isAlphaNum s' newBoundary = md5Hash $ concatMap snd hdrs newHeader = ("Content-Type", "multipart/mixed; boundary=\"" ++ newBoundary ++ "\"") mixture b [] = [PP.text ("--"++b++"--")] mixture b (body:bodies) = PP.text ("--"++b) : body : mixture b bodies md5Hash = showOctet . hash . map (toEnum.ord) where showOctet = concat . map hexChars hexChars c = [arr ! (c `div` 16), arr ! (c `mod` 16)] arr = listArray (0, 15) "0123456789abcdef"