{-# LANGUAGE BangPatterns #-} {-# LANGUAGE OverloadedStrings #-} -- | This module provides a parser and printer for the low-level IRC -- message format. module Irc.Format ( UserInfo(..) , RawIrcMsg(..) , parseRawIrcMsg , renderRawIrcMsg , parseUserInfo , renderUserInfo , Identifier , mkId , idBytes , idDenote , asUtf8 , ircFoldCase ) where import Control.Applicative import Control.Monad (when) import Data.Array import Data.Attoparsec.ByteString.Char8 as P import Data.ByteString (ByteString) import Data.ByteString.Builder (Builder) import Data.Functor import Data.Monoid import Data.String import Data.Text (Text) import Data.Time (UTCTime) import Data.Word (Word8) import qualified Data.ByteString as B import qualified Data.ByteString.Builder as Builder import qualified Data.ByteString.Char8 as B8 import qualified Data.ByteString.Lazy as L import qualified Data.Text as Text import qualified Data.Text.Encoding as Text import Irc.Time (myParseTime) -- | 'UserInfo' packages a nickname along with the username and hsotname -- if they are known in the current context. data UserInfo = UserInfo { userNick :: Identifier , userName :: Maybe ByteString , userHost :: Maybe ByteString } deriving (Read, Show) -- | 'RawIrcMsg' breaks down the IRC protocol into its most basic parts. -- The "trailing" parameter indicated in the IRC protocol with a leading -- colon will appear as the last parameter in the parameter list. -- -- Note that RFC 2812 specifies a maximum of 15 parameters. -- -- @:prefix COMMAND param0 param1 param2 .. paramN@ data RawIrcMsg = RawIrcMsg { msgTime :: Maybe UTCTime , msgPrefix :: Maybe UserInfo , msgCommand :: ByteString , msgParams :: [ByteString] } deriving (Read, Show) -- | Case insensitive identifier representing channels and nicknames data Identifier = Identifier ByteString ByteString deriving (Read, Show) -- Equality on normalized 'Identifiers' instance Eq Identifier where x == y = idDenote x == idDenote y -- Comparison on normalized 'Identifiers' instance Ord Identifier where compare x y = compare (idDenote x) (idDenote y) instance IsString Identifier where fromString = mkId . fromString -- | Construct an 'Identifier' from a 'ByteString' mkId :: ByteString -> Identifier mkId x = Identifier x (ircFoldCase x) -- | Returns the original 'ByteString' of an 'Identifier' idBytes :: Identifier -> ByteString idBytes (Identifier x _) = x -- | Returns the case-normalized 'ByteString' of an 'Identifier' -- which is suitable for comparison. idDenote :: Identifier -> ByteString idDenote (Identifier _ x) = x -- | Attempt to split an IRC protocol message without its trailing newline -- information into a structured message. parseRawIrcMsg :: ByteString -> Maybe RawIrcMsg parseRawIrcMsg x = case parseOnly rawIrcMsgParser x of Left{} -> Nothing Right r -> Just r -- | RFC 2812 specifies that there can only be up to -- 14 "middle" parameters, after that the fifteenth is -- the final parameter and the trailing : is optional! maxMiddleParams :: Int maxMiddleParams = 14 -- Excerpt from https://tools.ietf.org/html/rfc2812#section-2.3.1 -- message = [ ":" prefix SPACE ] command [ params ] crlf -- prefix = servername / ( nickname [ [ "!" user ] "@" host ] ) -- command = 1*letter / 3digit -- params = *14( SPACE middle ) [ SPACE ":" trailing ] -- =/ 14( SPACE middle ) [ SPACE [ ":" ] trailing ] -- nospcrlfcl = %x01-09 / %x0B-0C / %x0E-1F / %x21-39 / %x3B-FF -- ; any octet except NUL, CR, LF, " " and ":" -- middle = nospcrlfcl *( ":" / nospcrlfcl ) -- trailing = *( ":" / " " / nospcrlfcl ) -- SPACE = %x20 ; space character -- crlf = %x0D %x0A ; "carriage return" "linefeed" -- | Parse a whole IRC message assuming that the trailing -- newlines have already been removed. This parser will -- parse valid messages correctly but will also accept some -- invalid messages. Presumably the server isn't sending -- invalid messages! rawIrcMsgParser :: Parser RawIrcMsg rawIrcMsgParser = do time <- guarded (string "@time=") timeParser prefix <- guarded (char ':') prefixParser cmd <- simpleTokenParser params <- paramsParser maxMiddleParams return RawIrcMsg { msgTime = time , msgPrefix = prefix , msgCommand = cmd , msgParams = params } -- | Parse the list of parameters in a raw message. The RFC -- allows for up to 15 parameters. paramsParser :: Int -> Parser [ByteString] paramsParser n = do _ <- skipMany (char ' ') -- Freenode requires this exception endOfInput $> [] <|> more where more | n == 0 = do _ <- optional (char ':') finalParam | otherwise = do mbColon <- optional (char ':') case mbColon of Just{} -> finalParam Nothing -> middleParam finalParam = do x <- takeByteString let !x' = B.copy x return [x'] middleParam = do x <- P.takeWhile (/= ' ') when (B8.null x) (fail "Empty middle parameter") let !x' = B.copy x xs <- paramsParser (n-1) return (x':xs) -- | Parse the server-time message prefix: -- @time=2015-03-04T22:29:04.064Z timeParser :: Parser UTCTime timeParser = do timeBytes <- simpleTokenParser _ <- char ' ' case parseIrcTime (B8.unpack timeBytes) of Nothing -> fail "Bad server-time format" Just t -> return t parseIrcTime :: String -> Maybe UTCTime parseIrcTime = myParseTime "%Y-%m-%dT%H:%M:%S%Q%Z" prefixParser :: Parser UserInfo prefixParser = do tok <- simpleTokenParser _ <- char ' ' return (parseUserInfo tok) -- | Take the bytes up to the next space delimiter simpleTokenParser :: Parser ByteString simpleTokenParser = do xs <- P.takeWhile (/= ' ') when (B8.null xs) (fail "Empty token") return $! B8.copy xs -- | Take the bytes up to the next space delimiter. -- If the first character of this token is a ':' -- then take the whole remaining bytestring -- | Render 'UserInfo' as @nick!username\@hostname@ renderUserInfo :: UserInfo -> ByteString renderUserInfo u = idBytes (userNick u) <> maybe B.empty ("!" <>) (userName u) <> maybe B.empty ("@" <>) (userHost u) -- | Split up a hostmask into a nickname, username, and hostname. -- The username and hostname might not be defined but are delimited by -- a @!@ and @\@@ respectively. parseUserInfo :: ByteString -> UserInfo parseUserInfo x = UserInfo { userNick = mkId nick , userName = if B.null user then Nothing else Just (B.drop 1 user) , userHost = if B.null host then Nothing else Just (B.drop 1 host) } where (nickuser,host) = B8.break (=='@') x (nick,user) = B8.break (=='!') nickuser -- | Serialize a structured IRC protocol message back into its wire -- format. This command adds the required trailing newline. renderRawIrcMsg :: RawIrcMsg -> ByteString renderRawIrcMsg m = L.toStrict $ Builder.toLazyByteString $ maybe mempty renderPrefix (msgPrefix m) <> Builder.byteString (msgCommand m) <> buildParams (msgParams m) <> Builder.word8 13 <> Builder.word8 10 renderPrefix :: UserInfo -> Builder renderPrefix u = Builder.char8 ':' <> Builder.byteString (renderUserInfo u) <> Builder.char8 ' ' -- | Build concatenate a list of parameters into a single, space- -- delimited bytestring. Use a colon for the last parameter if it contains -- a colon or a space. buildParams :: [ByteString] -> Builder buildParams [x] | B.elem 32 x || B.elem 58 x = Builder.word8 32 <> Builder.word8 58 <> Builder.byteString x buildParams (x:xs) = Builder.word8 32 <> Builder.byteString x <> buildParams xs buildParams [] = mempty -- | When the first parser succeeds require the second parser to succeed. -- Otherwise return Nothing guarded :: Parser a -> Parser b -> Parser (Maybe b) guarded pa pb = do mb <- optional pa case mb of Nothing -> return Nothing Just{} -> fmap Just pb -- | Capitalize a string according to RFC 2812 -- Latin letters are capitalized and {|}~ are mapped to [\]^ ircFoldCase :: ByteString -> ByteString ircFoldCase = B.map (B.index casemap . fromIntegral) casemap :: ByteString casemap = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\ \\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\ \ !\"#$%&'()*+,-./0123456789:;<=>?\ \@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_\ \`ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^\x7f\ \\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\ \\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\ \\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\ \\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\ \\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\ \\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\ \\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\ \\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" -- Try to decode a message as UTF-8. If that fails interpret it as Windows CP1252 -- This helps deal with clients like XChat that get clever and otherwise misconfigured -- clients. asUtf8 :: ByteString -> Text asUtf8 x = case Text.decodeUtf8' x of Right txt -> txt Left{} -> decodeCP1252 x decodeCP1252 :: ByteString -> Text decodeCP1252 = Text.pack . map (cp1252!) . B.unpack cp1252 :: Array Word8 Char cp1252 = listArray (0,255) ['\NUL','\SOH','\STX','\ETX','\EOT','\ENQ','\ACK','\a','\b','\t','\n','\v','\f','\r','\SO','\SI', '\DLE','\DC1','\DC2','\DC3','\DC4','\NAK','\SYN','\ETB','\CAN','\EM','\SUB','\ESC','\FS','\GS','\RS','\US', ' ','!','\"','#','$','%','&','\'','(',')','*','+',',','-','.','/', '0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?', '@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O', 'P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_', '`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o', 'p','q','r','s','t','u','v','w','x','y','z','{','|','}','~','\DEL', '\8364','\129','\8218','\402','\8222','\8230','\8224','\8225','\710','\8240','\352','\8249','\338','\141','\381','\143', '\144','\8216','\8217','\8220','\8221','\8226','\8211','\8212','\732','\8482','\353','\8250','\339','\157','\382','\376', '\160','\161','\162','\163','\164','\165','\166','\167','\168','\169','\170','\171','\172','\173','\174','\175', '\176','\177','\178','\179','\180','\181','\182','\183','\184','\185','\186','\187','\188','\189','\190','\191', '\192','\193','\194','\195','\196','\197','\198','\199','\200','\201','\202','\203','\204','\205','\206','\207', '\208','\209','\210','\211','\212','\213','\214','\215','\216','\217','\218','\219','\220','\221','\222','\223', '\224','\225','\226','\227','\228','\229','\230','\231','\232','\233','\234','\235','\236','\237','\238','\239', '\240','\241','\242','\243','\244','\245','\246','\247','\248','\249','\250','\251','\252','\253','\254','\255']