-- |Functions for encoding and decoding CTCPs.
module Network.IRC.CTCP
    ( -- *Types
      CTCPByteString
    , getUnderlyingByteString

    -- *Encoding and decoding
    , toCTCP
    , fromCTCP
    , encodeCTCP
    , decodeCTCP

    -- *Utilities
    , isCTCP
    , asCTCP
    , orCTCP
    ) where

import Data.ByteString    (ByteString, pack, singleton, unpack)
import Data.List          (mapAccumL)
import Data.Maybe         (catMaybes, fromMaybe)
import Data.Text          (Text, splitOn)
import Data.Text.Encoding (decodeUtf8, encodeUtf8)
import Data.Tuple         (swap)

import qualified Data.ByteString as B
import qualified Data.Text       as T

-- |Type representing a CTCP-encoded bytestring.
newtype CTCPByteString = CBS
    { getUnderlyingByteString :: ByteString
    -- ^Get the underlying (encoded) bytestring from a CTCP
    -- bytestring.
    }
    deriving (Eq, Show)

-- |Turn a command name and arguments into a CTCP-encoded bytestring.
--
-- This encodes the text with UTF-8. If another encoding is desired,
-- 'encodeCTCP' should be used directly.
toCTCP :: Text -> [Text] -> CTCPByteString
toCTCP cmd args = encodeCTCP . encodeUtf8 . T.unwords $ cmd : args

-- |Encode a bytestring according to the CTCP spec.
encodeCTCP :: ByteString -> CTCPByteString
encodeCTCP bs = CBS $ B.concat [ singleton soh
                               , escape bs
                               , singleton soh
                               ]

    where escape = B.concatMap escape'
          escape' x = case lookup x encodings of
                        -- If there is an encoding, escape it and use
                        -- that.
                        Just x' -> pack [esc, x']

                        -- Otherwise, just return the original
                        -- character.
                        Nothing -> singleton x

-- |Decode a CTCP-encoded bytestring and turn it into a command name
-- and arguments.
--
-- This decodes the text with UTF-8. If another encoding is desired,
-- 'decodeCTCP' should be used directly.
fromCTCP :: CTCPByteString -> (Text, [Text])
fromCTCP bs = case splitOn (T.pack " ") . decodeUtf8 . decodeCTCP $ bs of
                (cmd : args) -> (cmd, args)
                _            -> (T.pack "", [])

-- |Decode a CTCP bytestring. Extraeneous escapes are dropped.
decodeCTCP :: CTCPByteString -> ByteString
decodeCTCP (CBS bs) = unescape . B.tail . B.init $ bs

    where unescape = pack . catMaybes . snd . mapAccumL step False . unpack

          -- If we fail to find a decoding, ignore the escape.
          step True x = (False, Just . fromMaybe x $ lookup x decodings)

          -- Enter escape mode, this doesn't add a character to the
          -- output.
          step False 0o020 = (True, Nothing)

          step _ x = (False, Just x)

soh :: Integral i => i
soh = 0o001

esc :: Integral i => i
esc = 0o020

encodings :: Integral i => [(i, i)]
encodings = [ (0o000, 0o060)
            , (0o012, 0o156)
            , (0o015, 0o162)
            , (0o020, 0o020)
            ]

decodings :: Integral i => [(i, i)]
decodings = map swap encodings

-- |Check if a bytestring represents a CTCP.
--
-- This is intentionally very lenient, in particular it doesn't check
-- that there are no extra escape characters. This is because the spec
-- states that misplaced escape characters should be discarded by the
-- decoding process.
isCTCP :: ByteString -> Bool
isCTCP bs = and $ (B.length bs >= 2) : (B.head bs == soh) : (B.last bs == soh) : map (flip B.notElem bs . fst) encodings

-- |Check if a bytestring looks like a CTCP, and if so, wrap it up in
-- the 'CTCPByteString' type.
--
-- This uses 'isCTCP', and so is lenient with escapes.
asCTCP :: ByteString -> Maybe CTCPByteString
asCTCP bs = if isCTCP bs
            then Just $ CBS bs
            else Nothing

-- |Apply one of two functions depending on whether the bytestring
-- looks like a CTCP or not.
--
-- This uses 'asCTCP', and so is lenient with escapes.
orCTCP :: (ByteString -> a) -> (CTCPByteString -> a) -> ByteString -> a
orCTCP f g bs = maybe (f bs) g (asCTCP bs)