{-# LANGUAGE OverloadedStrings #-}

{- |
  This module provides Attoparsec-based parsers for the HTTP grammar
  rules as defined by RFC-2616.
-}
module Network.HTTP.Grammar (
  -- * Parsable data types.
  UserAgent(..),
  Product(..),

  -- * Productions
  userAgent,
  product,
  comment,
  ctext,
  quotedPair,
  text,
  char,
  ctl,
  token,
  separators,
  lws,
  crlf,
) where

import Prelude hiding (product)

import Control.Applicative (many, (<|>), (<$>), (<*>))
import Control.Monad (void)
import Data.Attoparsec.ByteString (Parser, word8, takeWhile1, satisfy,
  inClass, option, string, many1)
import Data.ByteString (ByteString)
import Data.Monoid (mconcat)
import Data.Word (Word8)
import qualified Data.ByteString as BS


{- |
  The User-Agent header field is defined as a list of tokens, each
  of which is either a product or a comment. Values of this data type
  represents one such token.
-}
data UserAgent
  = UAProduct Product
  | UAComment ByteString


{- |
  A representation of an HTTP User-Agent product.
  https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.8
-}
data Product = Product {
       productName :: ByteString,
    productVersion :: Maybe ByteString
  }


{- |
  Parser for the User-Agent header, defined:
  https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.43
-}
userAgent :: Parser [UserAgent]
userAgent = many1 (
    (UAProduct <$> product)
    <|> (UAComment <$> comment)
  )


{- |
  product - http product token.
  https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.8
-}
product :: Parser Product
product =
    Product
      <$> token
      <*> option Nothing versionParser 
  where
    versionParser = do
      void $ string "/"
      Just <$> token


{- |
  comment - basic http grammar rule.
  The result is the parsed comment content, rather than the raw source.

  https://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2
-}
comment :: Parser ByteString
comment = do
  void $ word8 40 -- "("
  segments <- many (
      takeWhile1 ctext
      <|> (BS.singleton <$> quotedPair)
      <|> comment
    )
  void $ word8 41 -- ")"
  return (mconcat segments)


{- |
  ctext - basic http grammar rule.
  https://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2
-}
ctext :: Word8 -> Bool
ctext x = text x && x /= 40 && x /= 41 -- "(", ")"


{- |
  quotedPair - basic http grammar rule.
  https://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2
-}
quotedPair :: Parser Word8
quotedPair = do
  void $ word8 92 -- <\>
  satisfy char


{- |
  TEXT - basic http grammar rule.
  https://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2
-}
text :: Word8 -> Bool
text = not . ctl


{- |
  CHAR - basic http grammar rule.
  https://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2
-}
char :: Word8 -> Bool
char x = x >= 0 && x <= 127


{- |
  CTL - basic http grammar rule.
  https://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2
-}
ctl :: Word8 -> Bool
ctl x = (x >= 0 && x <= 31) || x == 127


{- |
  token - basic http grammar rule.

  The grammar specifies that adjacent LWS should be consumed without affecting
  the meaning of the token. This parser returns the token stripped of any
  adjacent lws.

  https://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2
-}
token :: Parser ByteString
token = do
  option () lws
  t <- takeWhile1 (\x -> char x && not (separators x) && not (ctl x))
  option () lws
  return t

 
{- |
  separators - basic http grammar rule.
  https://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2
-}
separators :: Word8 -> Bool
separators = inClass "()<>@,;:\\\"/[]?={} \t"


{- |
  LWS - basic http grammar rule. (L)inear (W)hite(S)pace.

  Consumes all linear whitespace.
  https://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2
-}
lws :: Parser ()
lws = do
  option () crlf
  void $ takeWhile1 (\x -> x == 32 || x == 9)


{- |
  crlf - basic http grammar rule.

  Consumes one crlf.
  https://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2
-}
crlf :: Parser ()
crlf = void $ string "\r\n"