-- |Functions for encoding and decoding CTCPs.
module Network.IRC.CTCP
  ( -- *Types
    CTCPByteString
  , getUnderlyingByteString

  -- *Encoding and decoding
  , toCTCP
  , fromCTCP
  , encodeCTCP
  , decodeCTCP

  -- *Utilities
  , isCTCP
  , asCTCP
  , orCTCP
  ) where

import Data.ByteString    (ByteString, pack, singleton, unpack)
import Data.List          (mapAccumL)
import Data.Maybe         (catMaybes, fromMaybe)
import Data.Monoid        ((<>))
import Data.Text          (Text, splitOn)
import Data.Text.Encoding (decodeUtf8, encodeUtf8)
import Data.Tuple         (swap)

import qualified Data.ByteString as B
import qualified Data.Text       as T

-- |Type representing a CTCP-encoded bytestring.
newtype CTCPByteString = CBS ByteString
  deriving (Eq, Show)

-- |Get the underlying (encoded) bytestring from a CTCP bytestring.
getUnderlyingByteString :: CTCPByteString -> ByteString
getUnderlyingByteString (CBS bs) = bs

-- |Turn a command name and arguments into a CTCP-encoded bytestring.
--
-- This encodes the text with UTF-8. If another encoding is desired,
-- 'encodeCTCP' should be used directly.
toCTCP :: Text -> [Text] -> CTCPByteString
toCTCP cmd args = encodeCTCP . encodeUtf8 . T.unwords $ cmd : args

-- |Encode a bytestring according to the CTCP spec.
encodeCTCP :: ByteString -> CTCPByteString
encodeCTCP bs = CBS $ singleton soh <> escape bs <> singleton soh

-- |Escape a bytestring according to the CTCP spec.
escape :: ByteString -> ByteString
escape = B.concatMap escape'
  where
    escape' x =
      case lookup x encodings of
        -- If there is an encoding, escape it and use that.
        Just x' -> pack [esc, x']

        -- Otherwise, just return the original character.
        Nothing -> singleton x

-- |Decode a CTCP-encoded bytestring and turn it into a command name
-- and arguments.
--
-- This decodes the text with UTF-8. If another encoding is desired,
-- 'decodeCTCP' should be used directly.
fromCTCP :: CTCPByteString -> (Text, [Text])
fromCTCP bs =
  case splitOn (T.pack " ") . decodeUtf8 . decodeCTCP $ bs of
    (cmd : args) -> (cmd, args)
    _            -> (T.pack "", [])

-- |Decode a CTCP bytestring. Extraeneous escapes are dropped.
decodeCTCP :: CTCPByteString -> ByteString
decodeCTCP (CBS bs) = unescape . B.tail . B.init $ bs

-- |Unescape a bytestring according to the CTCP spec. Extraeneous escapes are dropped.
unescape :: ByteString -> ByteString
unescape = pack . catMaybes . snd . mapAccumL step False . unpack
  where
    -- If we fail to find a decoding, ignore the escape.
    step True x = (False, Just . fromMaybe x $ lookup x decodings)

    -- Enter escape mode, this doesn't add a character to the output.
    step False 0o020 = (True, Nothing)

    step _ x = (False, Just x)

soh :: Integral i => i
soh = 0o001

esc :: Integral i => i
esc = 0o020

encodings :: Integral i => [(i, i)]
encodings =
  [ (0o000, 0o060)
  , (0o012, 0o156)
  , (0o015, 0o162)
  , (0o020, 0o020)
  ]

decodings :: Integral i => [(i, i)]
decodings = map swap encodings

-- |Check if a bytestring represents a CTCP.
--
-- This is intentionally very lenient, in particular it doesn't check
-- that there are no extra escape characters. This is because the spec
-- states that misplaced escape characters should be discarded by the
-- decoding process.
isCTCP :: ByteString -> Bool
isCTCP bs = and $ (B.length bs >= 2) : (B.head bs == soh) : (B.last bs == soh) : map (flip B.notElem bs . fst) encodings

-- |Check if a bytestring looks like a CTCP, and if so, wrap it up in
-- the 'CTCPByteString' type.
--
-- This uses 'isCTCP', and so is lenient with escapes.
asCTCP :: ByteString -> Maybe CTCPByteString
asCTCP bs =
  if isCTCP bs
  then Just $ CBS bs
  else Nothing

-- |Apply one of two functions depending on whether the bytestring
-- looks like a CTCP or not.
--
-- This uses 'asCTCP', and so is lenient with escapes.
orCTCP :: (ByteString -> a) -> (CTCPByteString -> a) -> ByteString -> a
orCTCP f g bs = maybe (f bs) g (asCTCP bs)