{-# LANGUAGE BangPatterns #-} {-# LANGUAGE OverloadedStrings #-} {-# Language TemplateHaskell #-} {-| Module : Irc.RawIrcMsg Description : Low-level representation of IRC messages Copyright : (c) Eric Mertens, 2016 License : ISC Maintainer : emertens@gmail.com This module provides a parser and printer for the low-level IRC message format. It handles splitting up IRC commands into the prefix, command, and arguments. -} module Irc.RawIrcMsg ( -- * Low-level IRC messages RawIrcMsg(..) , rawIrcMsg , msgServerTime , msgPrefix , msgCommand , msgParams -- * Text format for IRC messages , parseRawIrcMsg , renderRawIrcMsg -- * Permissive text decoder , asUtf8 ) where import Control.Applicative import Control.Monad (when) import Control.Lens import Data.Array import Data.Attoparsec.Text as P import Data.ByteString (ByteString) import Data.ByteString.Builder (Builder) import Data.Functor import Data.Monoid import Data.Text (Text) import Data.Time (UTCTime, parseTimeM, defaultTimeLocale) import Data.Word (Word8) import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Builder as Builder import qualified Data.Text as Text import qualified Data.Text.Encoding as Text import Irc.UserInfo -- | 'RawIrcMsg' breaks down the IRC protocol into its most basic parts. -- The "trailing" parameter indicated in the IRC protocol with a leading -- colon will appear as the last parameter in the parameter list. -- -- Note that RFC 2812 specifies a maximum of 15 parameters. -- -- @:prefix COMMAND param0 param1 param2 .. paramN@ data RawIrcMsg = RawIrcMsg { _msgServerTime :: Maybe UTCTime -- ^ Time from znc.in/server-time-iso extension , _msgPrefix :: Maybe UserInfo -- ^ Optional sender of message , _msgCommand :: Text -- ^ command , _msgParams :: [Text] -- ^ command parameters } deriving (Read, Show) makeLenses ''RawIrcMsg -- | Attempt to split an IRC protocol message without its trailing newline -- information into a structured message. parseRawIrcMsg :: Text -> Maybe RawIrcMsg parseRawIrcMsg x = case parseOnly rawIrcMsgParser x of Left{} -> Nothing Right r -> Just r -- | RFC 2812 specifies that there can only be up to -- 14 "middle" parameters, after that the fifteenth is -- the final parameter and the trailing : is optional! maxMiddleParams :: Int maxMiddleParams = 14 -- Excerpt from https://tools.ietf.org/html/rfc2812#section-2.3.1 -- message = [ ":" prefix SPACE ] command [ params ] crlf -- prefix = servername / ( nickname [ [ "!" user ] "@" host ] ) -- command = 1*letter / 3digit -- params = *14( SPACE middle ) [ SPACE ":" trailing ] -- =/ 14( SPACE middle ) [ SPACE [ ":" ] trailing ] -- nospcrlfcl = %x01-09 / %x0B-0C / %x0E-1F / %x21-39 / %x3B-FF -- ; any octet except NUL, CR, LF, " " and ":" -- middle = nospcrlfcl *( ":" / nospcrlfcl ) -- trailing = *( ":" / " " / nospcrlfcl ) -- SPACE = %x20 ; space character -- crlf = %x0D %x0A ; "carriage return" "linefeed" -- | Parse a whole IRC message assuming that the trailing -- newlines have already been removed. This parser will -- parse valid messages correctly but will also accept some -- invalid messages. Presumably the server isn't sending -- invalid messages! rawIrcMsgParser :: Parser RawIrcMsg rawIrcMsgParser = do time <- guarded (string "@time=") timeParser prefix <- guarded (char ':') prefixParser cmd <- simpleTokenParser params <- paramsParser maxMiddleParams return RawIrcMsg { _msgServerTime = time , _msgPrefix = prefix , _msgCommand = cmd , _msgParams = params } -- | Parse the list of parameters in a raw message. The RFC -- allows for up to 15 parameters. paramsParser :: Int -> Parser [Text] paramsParser n = do _ <- skipMany (char ' ') -- Freenode requires this exception endOfInput $> [] <|> more where more | n == 0 = do _ <- optional (char ':') finalParam | otherwise = do mbColon <- optional (char ':') case mbColon of Just{} -> finalParam Nothing -> middleParam finalParam = do x <- takeText let !x' = Text.copy x return [x'] middleParam = do x <- P.takeWhile (/= ' ') when (Text.null x) (fail "Empty middle parameter") let !x' = Text.copy x xs <- paramsParser (n-1) return (x':xs) -- | Parse the server-time message prefix: -- @time=2015-03-04T22:29:04.064Z timeParser :: Parser UTCTime timeParser = do timeBytes <- simpleTokenParser _ <- char ' ' case parseIrcTime (Text.unpack timeBytes) of Nothing -> fail "Bad server-time format" Just t -> return t parseIrcTime :: String -> Maybe UTCTime parseIrcTime = parseTimeM True defaultTimeLocale "%Y-%m-%dT%H:%M:%S%Q%Z" prefixParser :: Parser UserInfo prefixParser = do tok <- simpleTokenParser _ <- char ' ' return (parseUserInfo tok) -- | Take the bytes up to the next space delimiter simpleTokenParser :: Parser Text simpleTokenParser = do xs <- P.takeWhile (/= ' ') when (Text.null xs) (fail "Empty token") return $! Text.copy xs -- | Take the bytes up to the next space delimiter. -- If the first character of this token is a ':' -- then take the whole remaining bytestring -- | Serialize a structured IRC protocol message back into its wire -- format. This command adds the required trailing newline. renderRawIrcMsg :: RawIrcMsg -> ByteString renderRawIrcMsg m = L.toStrict $ Builder.toLazyByteString $ maybe mempty renderPrefix (view msgPrefix m) <> Text.encodeUtf8Builder (view msgCommand m) <> buildParams (view msgParams m) <> Builder.char8 '\r' <> Builder.char8 '\n' -- | Construct a new 'RawIrcMsg' without a time or prefix. rawIrcMsg :: Text {- ^ command -} -> [Text] {- ^ parameters -} -> RawIrcMsg rawIrcMsg = RawIrcMsg Nothing Nothing renderPrefix :: UserInfo -> Builder renderPrefix u = Builder.char8 ':' <> Text.encodeUtf8Builder (renderUserInfo u) <> Builder.char8 ' ' -- | Build concatenate a list of parameters into a single, space- -- delimited bytestring. Use a colon for the last parameter if it contains -- a colon or a space. buildParams :: [Text] -> Builder buildParams [x] | " " `Text.isInfixOf` x || ":" `Text.isPrefixOf` x = Builder.char8 ' ' <> Builder.char8 ':' <> Text.encodeUtf8Builder x buildParams (x:xs) = Builder.char8 ' ' <> Text.encodeUtf8Builder x <> buildParams xs buildParams [] = mempty -- | When the first parser succeeds require the second parser to succeed. -- Otherwise return Nothing guarded :: Parser a -> Parser b -> Parser (Maybe b) guarded pa pb = do mb <- optional pa case mb of Nothing -> return Nothing Just{} -> fmap Just pb -- | Try to decode a message as UTF-8. If that fails interpret it as Windows CP1252 -- This helps deal with clients like XChat that get clever and otherwise misconfigured -- clients. asUtf8 :: ByteString -> Text asUtf8 x = case Text.decodeUtf8' x of Right txt -> txt Left{} -> decodeCP1252 x decodeCP1252 :: ByteString -> Text decodeCP1252 = Text.pack . map (cp1252!) . B.unpack -- This character encoding is a superset of ISO 8859-1 in terms of printable -- characters, but differs from the IANA's ISO-8859-1 by using displayable -- characters rather than control characters in the 80 to 9F (hex) range. cp1252 :: Array Word8 Char cp1252 = listArray (0,255) ['\NUL','\SOH','\STX','\ETX','\EOT','\ENQ','\ACK','\a','\b','\t','\n','\v','\f','\r','\SO','\SI', '\DLE','\DC1','\DC2','\DC3','\DC4','\NAK','\SYN','\ETB','\CAN','\EM','\SUB','\ESC','\FS','\GS','\RS','\US', ' ','!','\"','#','$','%','&','\'','(',')','*','+',',','-','.','/', '0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?', '@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O', 'P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_', '`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o', 'p','q','r','s','t','u','v','w','x','y','z','{','|','}','~','\DEL', '\8364','\129','\8218','\402','\8222','\8230','\8224','\8225','\710','\8240','\352','\8249','\338','\141','\381','\143', '\144','\8216','\8217','\8220','\8221','\8226','\8211','\8212','\732','\8482','\353','\8250','\339','\157','\382','\376', '\160','\161','\162','\163','\164','\165','\166','\167','\168','\169','\170','\171','\172','\173','\174','\175', '\176','\177','\178','\179','\180','\181','\182','\183','\184','\185','\186','\187','\188','\189','\190','\191', '\192','\193','\194','\195','\196','\197','\198','\199','\200','\201','\202','\203','\204','\205','\206','\207', '\208','\209','\210','\211','\212','\213','\214','\215','\216','\217','\218','\219','\220','\221','\222','\223', '\224','\225','\226','\227','\228','\229','\230','\231','\232','\233','\234','\235','\236','\237','\238','\239', '\240','\241','\242','\243','\244','\245','\246','\247','\248','\249','\250','\251','\252','\253','\254','\255']