-- |
-- Copyright   : (c) 2008 Mathieu Boespflug
-- License     : LGPL
-- Maintainer  : mboes@tweag.net
-- Stability   : experimental
-- Portability : non-portable
--
-- Library for parsing and generating the text/directory mime content type.
-- This library implements all the required mechanisms in RFC 2425, which other
-- libraries may use to implement parsing and generating specific profiles,
-- such as vCard.
module Codec.MIME.ContentType.Text.Directory
    ( -- * Types
      Directory, Property(..)
    , Type(..), Parameter(..)
    , Value(..), Rfc2425Value, PrintValue(..), ValueParser
    , nakedType, (@@)
    , lookupParameter
    -- * Encoding\/decoding values
    , decodeValue, encodeValue
    , escape
    -- * Parsing
    , parseDirectory, parseDirectory', fromList, groupByBeginEnd
    -- ** Value Parsers
    , pa_URI, pa_text, pa_date, pa_time, pa_dateTime
    , pa_integer, pa_boolean, pa_float, pa_textList
    -- ** Value parser combinators
    , many
    -- * Printing
    , printDirectory, printDirectory', printProperty) where

import Data.Time
import System.Locale
import Data.Char (toLower)
import Data.Maybe (fromJust)
import Text.Regex.PCRE.ByteString.Lazy
import qualified Codec.Binary.Base64.String as Base64
import qualified Data.ByteString.Lazy.Char8 as B
import qualified Data.ByteString.Lazy.Char8.Caseless as I
import qualified Data.Map as Map
import Control.Monad (liftM)
import System.IO.Unsafe


-- | A directory is a list of groups of semantically related entities. These
-- entities are grouped together in RFC 2425 using @BEGIN ... END@ pairs.
-- Within a group properties are further grouped together by the property
-- types.
type Directory u = [Map.Map Type [Property u]]

data Property u = Prop
    { prop_type :: Type
    , prop_parameters :: [Parameter]
    , prop_value :: Value u }
                  deriving Show

data Type = Type
    { type_group :: Maybe I.ByteString
    , type_name :: I.ByteString }
            deriving (Eq, Ord, Show)

-- | Make a property type without any grouping.
nakedType :: I.ByteString -> Type
nakedType name = Type { type_group = Nothing, type_name = name }

-- | Check whether the given property is an instance of the given type.
(@@) :: Property u -> I.ByteString -> Bool
prop @@ name = prop_type prop == nakedType name

data Parameter = Param
    { param_name :: I.ByteString
    , param_values :: [B.ByteString] }
                 deriving Show

-- | Find the parameter values for a given parameter name.
lookupParameter :: I.ByteString -> [Parameter] -> Maybe [B.ByteString]
lookupParameter pname [] = Nothing
lookupParameter pname (p:ps)
    | param_name p == pname = Just (param_values p)
    | otherwise = lookupParameter pname ps

type URI = B.ByteString

-- | This is sufficient to represent values whose specification is defined in
-- RFC 2425. Values with other specifications can be represented via the
-- 'IANAValue' constructor.
data Value u = URI URI
             | Text B.ByteString
             | Date Day
             | Time DiffTime
             | DateTime UTCTime
             | Integer Integer
             | Boolean Bool
             | Float Float
-- Decode a list of values as a list of properties, since rfc2425
-- considers them to be semantically equivalent.
--           | List (Value u)
             | IANAValue u -- ^ An IANA defined type not part of rfc2425
               deriving (Eq, Show)

-- | Instantiate Value with this phantom type to indicate that property types
-- should be none other than those defined in rfc2425.
data Rfc2425Value

instance Show Rfc2425Value where
    show _ = undefined

-- | The type of parsers for property values, for instance to read an integer
-- property, text property, etc.
type ValueParser u = (Type, [Parameter]) -> B.ByteString -> [Value u]

-- | Break the input into logical lines, unfolding lines that span multiple
-- physical lines.
unfoldLines :: B.ByteString -> [B.ByteString]
unfoldLines s | B.null s = []
              | otherwise = B.foldr f [B.empty] s where
    f '\r' (xs:xss) | Just (h1, xs') <- B.uncons xs,
                      Just (h2, xs'') <- B.uncons xs' =
                      case (h1, h2) of
                        ('\n', ' ') -> xs'':xss
                        ('\n', '\t') -> xs'':xss
                        ('\n', _) -> "":xs':xss
                        _ -> error "Malformed input: no LF after a CR."
                    | otherwise = "":xss
    f x ~(xs:xss) = B.cons x xs : xss

newtype P a = P { unP :: B.ByteString -> (a, B.ByteString) }

instance Monad P where
    return x = P $ \s -> (x, s)
    m >>= k = P $ \s -> let (a, s') = unP m s in unP (k a) s'

p :: B.ByteString   -- ^ Text of the regular expression.
  -> P B.ByteString -- ^ The matching part of the input.
p pat =
    let Right r = unsafePerformIO $ compile compBlank execAnchored pat
    in P $ \s -> unsafePerformIO $ do
                   Right result <- regexec r s
                   return $ case result of
                              Just (_, match, s', _) -> (match, s')
                              Nothing -> error $ "Parse error: "
                                         ++ take 50 (show (B.unpack s)) ++ " ..."

capture :: B.ByteString     -- ^ Text of the regular expression containing capturing groups.
        -> P [B.ByteString] -- ^ The captured subparts of the input.
capture pat =
    let Right r = unsafePerformIO $ compile compBlank execAnchored pat
    in P $ \s -> unsafePerformIO $ do
                   Right result <- regexec r s
                   return $ case result of
                              Just (_, _, s', captures) -> (captures, s')
                              Nothing -> error $ "Parse error: "
                                         ++ take 50 (show (B.unpack s)) ++ " ..."

-- | Parse one character in the string.
nextChar :: P Char
nextChar = P $ \s -> (B.head s, B.tail s)

-- | Produces a map where properties are grouped together using their type as key.
parseDirectory :: ValueParser u
                -- ^ Given a Property Type and a list of parameters,
                -- parse a string representation into a Value.
                -> B.ByteString
                -> Directory u
parseDirectory valparse = fromList . parseDirectory' valparse

-- | An alternative version of 'parseDirectory' that produces a list
-- of properties rather than a mapping from property types to
-- properties. Note that here properties in the list are in the same
-- order as in the input string.
parseDirectory' :: ValueParser u
               -> B.ByteString
               -> [Property u]
parseDirectory' valparse = concatMap (fst . unP (pa_property valparse)) . unfoldLines

-- | Group properties into blocks delimited by @begin..end@ pairs.
groupByBeginEnd :: [Property u] -> [[Property u]]
groupByBeginEnd [] = []
groupByBeginEnd xs = tail $ foldr f [[]] xs
    where f p (ps:pss) | p @@ "begin" =
                           [] : (p:ps) : pss
          f p (ps:pss) = (p:ps):pss

-- | Build a directory from a list of properties.
fromList :: [Property u] -> Directory u
fromList = map (Map.fromListWith (\x y -> x ++ y) . map (\p -> (prop_type p, [p])))
           . groupByBeginEnd

-- | Parse a string representation into a property. Note that the return type
-- here is actually a list of properties, because we desugar properties whose
-- values are lists into a list of properties, one for each element of the
-- value list.
pa_property :: ValueParser u
              -- ^ Given a Property Type and a list of parameters, parse a
              -- string representation into a (list of) Value.
              -> P [Property u]
pa_property valparse = do
  [groupt, typt, sept] <-
      capture "(?U)(?:((?:[[:alnum:]]|-)+).)?((?:[[:alnum:]]|-)+)(:|;)"
  params <- case sept of
              ";" -> pa_parameterList
              ":" -> return []
  rest <- p ".*$"
  let group = if B.null groupt then Nothing else Just (I.unsensitize groupt)
  let typ = Type { type_group = group, type_name = I.unsensitize typt }
      mkprop v = Prop { prop_type = typ
                      , prop_parameters = params
                      , prop_value = v }
  return $ map mkprop $ valparse (typ, params) (decodeValue params rest)

pa_parameterList :: P [Parameter]
pa_parameterList = aux where
    paramName  = capture "((?:[[:alnum:]]|-)+)="
    paramValue = capture "(?:([^,;:\"]*)|\"([^\"]*)\")(,?)"
    paramValues = do
      [val,qval,sep] <- paramValue
      vs <- case sep of
              "," -> paramValues
              _ -> return []
      return $ if B.null qval then val:vs else qval:vs
    aux = do [name] <- paramName
             vs <- paramValues
             sep <- nextChar
             ps <- case sep of
                     ';' -> aux
                     ':' -> return []
             return $ Param { param_name = I.unsensitize name, param_values = vs } : ps

-- | Properties may indicate an encoding, so this decodes the value
-- if need be before parsing.
decodeValue = codec Base64.decode

-- | Properties may indicate an encoding, so this encodes the value if need be
-- after printing.
encodeValue = codec Base64.encode

codec :: (String -> String) -> [Parameter] -> B.ByteString -> B.ByteString
codec f params input =
    case lookupParameter "encoding" params of
      Nothing -> input
      Just ["b"] -> B.pack $ f $ B.unpack input
      Just ["B"] -> B.pack $ f $ B.unpack input
      _ -> error "Unknown encoding."

-- A few canned parsers for value types defined in rfc2425

pa_URI :: ValueParser u
pa_URI _ = (:[]) . Text

-- | Unescape slashes, newlines and commas.
pa_text :: ValueParser u
pa_text tps = take 1 . pa_textList tps

pa_date :: ValueParser u
pa_date _ =
    (:[]) . Date . readTime defaultTimeLocale (iso8601DateFormat Nothing) . B.unpack

pa_time :: ValueParser u
pa_time _ =
    (:[]) . Time . utctDayTime . readTime defaultTimeLocale "%T" . B.unpack

pa_dateTime :: ValueParser u
pa_dateTime _ =
    (:[]) . DateTime .
    readTime defaultTimeLocale (iso8601DateFormat (Just "T%T")) .
    B.unpack

pa_integer :: ValueParser u
pa_integer _ = (:[]) . Integer . fst . fromJust . B.readInteger

pa_boolean :: ValueParser u
pa_boolean _ "TRUE" = [Boolean True]
pa_boolean _ "FALSE" = [Boolean False]
pa_boolean _ _ = error "Not a valid boolean."

pa_float :: ValueParser u
pa_float _ = (:[]) . Float . read . B.unpack

pa_textList :: ValueParser u
pa_textList _ "" = []
pa_textList _ s = map (Text . B.pack . B.unpack) $ B.foldr f [B.empty] s
    where f ','  (xs:xss) = B.empty : xs : xss
          f '\\' ("":xs:xss) = B.cons ',' xs : xss
          f '\\' (xs:xss) | Just ('n',_)  <- B.uncons xs =
                            B.append "\r\n" xs : xss
          f '\\' (xs:xss) | Just ('N',_)  <- B.uncons xs =
                            B.append "\r\n" xs : xss
          f '\\' (xs:xss) | Just ('\\',_) <- B.uncons xs = B.cons '\\' xs : xss
          f x (xs:xss) = B.cons x xs : xss

-- | Take a parser for single values to a parser for a list of values. This
-- assumes that the separator between values is the "," character, and that
-- values do not contain commas themselves.
many :: ValueParser u -> ValueParser u
many pa tps input = map (head . pa tps) $ breakAll input
    where breakAll "" = []
          breakAll xs = ys : breakAll (B.drop 1 zs)
              where (ys, zs) = B.span (/= ',') xs

-- Printing

showBS :: Show a => a -> B.ByteString
showBS = B.pack . show

-- | Escape any occurrence of the characters given as first argument with a
-- backslash. Newlines are always replaced by the two character sequence
-- @"\\n"@. The backslash character is always escaped.
escape :: B.ByteString -> B.ByteString -> B.ByteString
escape chars = B.foldr f "" where
    f '\r' xs | Just ('\n', xs') <- B.uncons xs = B.append "\\n" xs'
              | otherwise = error "CR not followed by LF."
    f x xs | x `B.elem` B.cons '\\' chars = B.cons '\\' (B.cons x xs)
           | otherwise = B.cons x xs

-- Pretty printing of values
class PrintValue a where
    printValue :: a -> B.ByteString

instance PrintValue u => PrintValue (Value u) where
    printValue (URI v) = v
    printValue (Text v) = escape "," $ v
    printValue (Date v) = showBS v
    printValue (Time v) = showBS v
    printValue (DateTime v) = showBS v
    printValue (Integer v) = showBS v
    printValue (Boolean True) = "TRUE"
    printValue (Boolean False) = "FALSE"
    printValue (Float v) = showBS v
    printValue (IANAValue v) = printValue v

instance PrintValue Rfc2425Value where
    printValue _ = error "No other types in RFC 2425."

printDirectory :: PrintValue u => Directory u -> B.ByteString
printDirectory = printDirectory' . concat . concat . map Map.elems

printDirectory' :: PrintValue u => [Property u] -> B.ByteString
printDirectory' props = B.intercalate "\r\n" $ map printProperty props

printProperty :: PrintValue u => Property u -> B.ByteString
printProperty prop =
    if null params
    then B.concat [ printType (prop_type prop), ":"
                  , encodeValue params $ printValue $ prop_value prop ]
    else B.concat [ printType (prop_type prop), ";"
                  , B.concat $ map printParameter $ prop_parameters prop, ":"
                  , printValue $ prop_value prop ]
    where params = prop_parameters prop

printType :: Type -> B.ByteString
printType typ =
    I.sensitize $ case type_group typ of
                    Just group -> I.concat [group, ".", type_name typ]
                    Nothing -> type_name typ

printParameter :: Parameter -> B.ByteString
printParameter param = B.concat [I.sensitize $ param_name param, "="
                                , B.intercalate "," $ param_values param]