{-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE DeriveGeneric #-} module Text.Email.Parser (addrSpec ,localPart ,domainPart ,EmailAddress ,toByteString) where import Control.Applicative import qualified Data.ByteString.Char8 as BS import Data.ByteString (ByteString) import Data.Char (chr) import Data.Attoparsec.ByteString.Char8 import Data.Attoparsec.Combinator import Data.Data (Data, Typeable) import GHC.Generics (Generic) import qualified Text.Read as Read -- | Represents an email address. data EmailAddress = EmailAddress ByteString ByteString deriving (Eq, Ord, Data, Typeable, Generic) instance Show EmailAddress where show = show . toByteString instance Read EmailAddress where readListPrec = Read.readListPrecDefault readPrec = Read.parens (do bs <- Read.readPrec case parseOnly addrSpec bs of Left _ -> Read.pfail Right a -> return a) -- | Converts an email address back to a ByteString toByteString (EmailAddress l d) = BS.concat [l, BS.singleton '@', d] -- | Extracts the local part of an email address. localPart :: EmailAddress -> ByteString localPart (EmailAddress local _) = local -- | Extracts the domain part of an email address. domainPart :: EmailAddress -> ByteString domainPart (EmailAddress _ domain) = domain -- | A parser for email addresses. addrSpec = do localPart <- local char '@' domainPart <- domain endOfInput return (EmailAddress localPart domainPart) local = dottedAtoms domain = dottedAtoms <|> domainLiteral dottedAtoms = BS.intercalate (BS.singleton '.') <$> (optional cfws *> (atom <|> quotedString) <* optional cfws) `sepBy1` (char '.') atom = takeWhile1 isAtomText isAtomText x = isAlphaNum x || inClass "!#$%&'*+/=?^_`{|}~-" x domainLiteral = (BS.cons '[' . flip BS.snoc ']' . BS.concat) <$> (between (optional cfws *> char '[') (char ']' <* optional cfws) $ many (optional fws >> takeWhile1 isDomainText) <* optional fws) isDomainText x = inClass "\33-\90\94-\126" x || isObsNoWsCtl x quotedString = (\x -> BS.concat $ [BS.singleton '"', BS.concat x, BS.singleton '"']) <$> (between (char '"') (char '"') $ many (optional fws >> quotedContent) <* optional fws) quotedContent = takeWhile1 isQuotedText <|> quotedPair isQuotedText x = inClass "\33\35-\91\93-\126" x || isObsNoWsCtl x quotedPair = (BS.cons '\\' . BS.singleton) <$> (char '\\' *> (vchar <|> wsp <|> lf <|> cr <|> obsNoWsCtl <|> nullChar)) cfws = ignore $ many (comment <|> fws) fws :: Parser () fws = ignore $ ignore (wsp1 >> optional (crlf >> wsp1)) <|> ignore (many1 (crlf >> wsp1)) ignore :: Parser a -> Parser () ignore x = x >> return () between l r x = l *> x <* r comment :: Parser () comment = ignore ((between (char '(') (char ')') $ many (ignore commentContent <|> fws))) commentContent = skipWhile1 isCommentText <|> ignore quotedPair <|> comment isCommentText x = inClass "\33-\39\42-\91\93-\126" x || isObsNoWsCtl x nullChar = char '\0' skipWhile1 x = satisfy x >> skipWhile x wsp1 = skipWhile1 isWsp wsp = satisfy isWsp isWsp x = x == ' ' || x == '\t' isAlphaNum x = isDigit x || isAlpha_ascii x cr = char '\r' lf = char '\n' crlf = cr >> lf >> return () isVchar = inClass "\x21-\x7e" vchar = satisfy isVchar isObsNoWsCtl = inClass "\1-\8\11-\12\14-\31\127" obsNoWsCtl = satisfy isObsNoWsCtl