{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# Language TemplateHaskell #-}
module Irc.RawIrcMsg
(
RawIrcMsg(..)
, rawIrcMsg
, msgServerTime
, msgPrefix
, msgCommand
, msgParams
, parseRawIrcMsg
, renderRawIrcMsg
, asUtf8
) where
import Control.Applicative
import Control.Monad (when)
import Control.Lens
import Data.Array
import Data.Attoparsec.Text as P
import Data.ByteString (ByteString)
import Data.ByteString.Builder (Builder)
import Data.Functor
import Data.Monoid
import Data.Text (Text)
import Data.Time (UTCTime, parseTimeM, defaultTimeLocale)
import Data.Word (Word8)
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Builder as Builder
import qualified Data.Text as Text
import qualified Data.Text.Encoding as Text
import Irc.UserInfo
data RawIrcMsg = RawIrcMsg
{ _msgServerTime :: Maybe UTCTime
, _msgPrefix :: Maybe UserInfo
, _msgCommand :: Text
, _msgParams :: [Text]
}
deriving (Read, Show)
makeLenses ''RawIrcMsg
-- | Attempt to split an IRC protocol message without its trailing newline
-- information into a structured message.
parseRawIrcMsg :: Text -> Maybe RawIrcMsg
parseRawIrcMsg x =
case parseOnly rawIrcMsgParser x of
Left{} -> Nothing
Right r -> Just r
-- | RFC 2812 specifies that there can only be up to
-- 14 "middle" parameters, after that the fifteenth is
-- the final parameter and the trailing : is optional!
maxMiddleParams :: Int
maxMiddleParams = 14
-- Excerpt from https://tools.ietf.org/html/rfc2812#section-2.3.1
-- message = [ ":" prefix SPACE ] command [ params ] crlf
-- prefix = servername / ( nickname [ [ "!" user ] "@" host ] )
-- command = 1*letter / 3digit
-- params = *14( SPACE middle ) [ SPACE ":" trailing ]
-- =/ 14( SPACE middle ) [ SPACE [ ":" ] trailing ]
-- nospcrlfcl = %x01-09 / %x0B-0C / %x0E-1F / %x21-39 / %x3B-FF
-- ; any octet except NUL, CR, LF, " " and ":"
-- middle = nospcrlfcl *( ":" / nospcrlfcl )
-- trailing = *( ":" / " " / nospcrlfcl )
-- SPACE = %x20 ; space character
-- crlf = %x0D %x0A ; "carriage return" "linefeed"
-- | Parse a whole IRC message assuming that the trailing
-- newlines have already been removed. This parser will
-- parse valid messages correctly but will also accept some
-- invalid messages. Presumably the server isn't sending
-- invalid messages!
rawIrcMsgParser :: Parser RawIrcMsg
rawIrcMsgParser =
do time <- guarded (string "@time=") timeParser
prefix <- guarded (char ':') prefixParser
cmd <- simpleTokenParser
params <- paramsParser maxMiddleParams
return RawIrcMsg
{ _msgServerTime = time
, _msgPrefix = prefix
, _msgCommand = cmd
, _msgParams = params
}
-- | Parse the list of parameters in a raw message. The RFC
-- allows for up to 15 parameters.
paramsParser :: Int -> Parser [Text]
paramsParser n =
do _ <- skipMany (char ' ') -- Freenode requires this exception
endOfInput $> [] <|> more
where
more
| n == 0 =
do _ <- optional (char ':')
finalParam
| otherwise =
do mbColon <- optional (char ':')
case mbColon of
Just{} -> finalParam
Nothing -> middleParam
finalParam =
do x <- takeText
let !x' = Text.copy x
return [x']
middleParam =
do x <- P.takeWhile (/= ' ')
when (Text.null x) (fail "Empty middle parameter")
let !x' = Text.copy x
xs <- paramsParser (n-1)
return (x':xs)
-- | Parse the server-time message prefix:
-- @time=2015-03-04T22:29:04.064Z
timeParser :: Parser UTCTime
timeParser =
do timeBytes <- simpleTokenParser
_ <- char ' '
case parseIrcTime (Text.unpack timeBytes) of
Nothing -> fail "Bad server-time format"
Just t -> return t
parseIrcTime :: String -> Maybe UTCTime
parseIrcTime = parseTimeM True defaultTimeLocale "%Y-%m-%dT%H:%M:%S%Q%Z"
prefixParser :: Parser UserInfo
prefixParser =
do tok <- simpleTokenParser
_ <- char ' '
return (parseUserInfo tok)
-- | Take the bytes up to the next space delimiter
simpleTokenParser :: Parser Text
simpleTokenParser =
do xs <- P.takeWhile (/= ' ')
when (Text.null xs) (fail "Empty token")
return $! Text.copy xs
-- | Take the bytes up to the next space delimiter.
-- If the first character of this token is a ':'
-- then take the whole remaining bytestring
-- | Serialize a structured IRC protocol message back into its wire
-- format. This command adds the required trailing newline.
renderRawIrcMsg :: RawIrcMsg -> ByteString
renderRawIrcMsg m = L.toStrict $ Builder.toLazyByteString $
maybe mempty renderPrefix (view msgPrefix m)
<> Text.encodeUtf8Builder (view msgCommand m)
<> buildParams (view msgParams m)
<> Builder.char8 '\r'
<> Builder.char8 '\n'
-- | Construct a new 'RawIrcMsg' without a time or prefix.
rawIrcMsg ::
Text {- ^ command -} ->
[Text] {- ^ parameters -} -> RawIrcMsg
rawIrcMsg = RawIrcMsg Nothing Nothing
renderPrefix :: UserInfo -> Builder
renderPrefix u = Builder.char8 ':'
<> Text.encodeUtf8Builder (renderUserInfo u)
<> Builder.char8 ' '
-- | Build concatenate a list of parameters into a single, space-
-- delimited bytestring. Use a colon for the last parameter if it contains
-- a colon or a space.
buildParams :: [Text] -> Builder
buildParams [x]
| " " `Text.isInfixOf` x || ":" `Text.isPrefixOf` x
= Builder.char8 ' ' <> Builder.char8 ':' <> Text.encodeUtf8Builder x
buildParams (x:xs)
= Builder.char8 ' ' <> Text.encodeUtf8Builder x <> buildParams xs
buildParams [] = mempty
-- | When the first parser succeeds require the second parser to succeed.
-- Otherwise return Nothing
guarded :: Parser a -> Parser b -> Parser (Maybe b)
guarded pa pb =
do mb <- optional pa
case mb of
Nothing -> return Nothing
Just{} -> fmap Just pb
-- | Try to decode a message as UTF-8. If that fails interpret it as Windows CP1252
-- This helps deal with clients like XChat that get clever and otherwise misconfigured
-- clients.
asUtf8 :: ByteString -> Text
asUtf8 x = case Text.decodeUtf8' x of
Right txt -> txt
Left{} -> decodeCP1252 x
decodeCP1252 :: ByteString -> Text
decodeCP1252 = Text.pack . map (cp1252!) . B.unpack
-- This character encoding is a superset of ISO 8859-1 in terms of printable
-- characters, but differs from the IANA's ISO-8859-1 by using displayable
-- characters rather than control characters in the 80 to 9F (hex) range.
cp1252 :: Array Word8 Char
cp1252 = listArray (0,255)
['\NUL','\SOH','\STX','\ETX','\EOT','\ENQ','\ACK','\a','\b','\t','\n','\v','\f','\r','\SO','\SI',
'\DLE','\DC1','\DC2','\DC3','\DC4','\NAK','\SYN','\ETB','\CAN','\EM','\SUB','\ESC','\FS','\GS','\RS','\US',
' ','!','\"','#','$','%','&','\'','(',')','*','+',',','-','.','/',
'0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?',
'@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
'P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_',
'`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
'p','q','r','s','t','u','v','w','x','y','z','{','|','}','~','\DEL',
'\8364','\129','\8218','\402','\8222','\8230','\8224','\8225','\710','\8240','\352','\8249','\338','\141','\381','\143',
'\144','\8216','\8217','\8220','\8221','\8226','\8211','\8212','\732','\8482','\353','\8250','\339','\157','\382','\376',
'\160','\161','\162','\163','\164','\165','\166','\167','\168','\169','\170','\171','\172','\173','\174','\175',
'\176','\177','\178','\179','\180','\181','\182','\183','\184','\185','\186','\187','\188','\189','\190','\191',
'\192','\193','\194','\195','\196','\197','\198','\199','\200','\201','\202','\203','\204','\205','\206','\207',
'\208','\209','\210','\211','\212','\213','\214','\215','\216','\217','\218','\219','\220','\221','\222','\223',
'\224','\225','\226','\227','\228','\229','\230','\231','\232','\233','\234','\235','\236','\237','\238','\239',
'\240','\241','\242','\243','\244','\245','\246','\247','\248','\249','\250','\251','\252','\253','\254','\255']