{-|

Parser for the iCalendar format (version 2.0, RFC2445).
  
Features:

  * Unfolds folded lines (see RFC)
 
  * Supports all IANA iCalendar tokens

/Warning!/  This is version 0.0, important things are missing
   
  * Property and parameter values are not parsed, but kept as in the file.

  * There are no restrictions on the properties components can have.
  
  * Line unfolding causes error messages to report the wrong line.
 
This package is meant to grow to a full-fledged data definition, printer and
parser for the iCalendar format.  But that will probably take some time.  So,
if you've come here because you need this, please e-mail me (Eelco Lempsink).

-}

module Text.ICalendar.Parser 
    (
    -- * Tokens from RFC2445
      ComponentName(..)
    , PropertyName(..)
    , ParamName(..)
    , ParamValue(..)
    -- * Data definitions
    , Property
    , Value
    , ICalendar(..)
    , Component(..)
    , Param(..)
    -- * Parser
    , parser
    ) where

------------------------------------------------------------
-- Imports
------------------------------------------------------------

import Data.Char
import Control.Monad

import Control.Applicative
import Control.Monad (MonadPlus(..), ap)
-- Hide Parsec's definitions of some Applicative functions.
import Text.ParserCombinators.Parsec hiding (many, optional, (<|>))

-- Every Monad is an Applicative.
instance Applicative (GenParser s a) where
    pure = return
    (<*>) = ap

-- Every MonadPlus is an Alternative.
instance Alternative (GenParser s a) where
    empty = mzero
    (<|>) = mplus

------------------------------------------------------------
-- Tokens, as defined in RFC2445.
------------------------------------------------------------

-- | Calendar Component names (4.6)
data ComponentName =
              VEVENT     -- ^ Event     Component (4.6.1)
            | VTODO      -- ^ To-do     Component (4.6.2)
            | VJOURNAL   -- ^ Journal   Component (4.6.3)
            | VFREEBUSY  -- ^ Free/Busy Component (4.6.4)
            | VTIMEZONE  -- ^ Time Zone Component (4.6.5)
            | X_c String -- ^ Non-standard/Experimental Component
            -- Time Zone subcomponents
            | STANDARD   -- ^ Standard Time        Sub-component (4.6.5)
            | DAYLIGHT   -- ^ Daylight Saving Time Sub-component (4.6.5)
            -- Event and To-do subcomponent
            | VALARM     -- ^ Alarm                Sub-component (4.6.6)
            deriving (Eq, Show)

-- | Property names for Calendar Properties (4.7) and Component Properties (4.8)
data PropertyName = 
                -- Calendar properties (4.7)
                  CALSCALE          -- ^ Calendar Scale (4.7.1)
                | METHOD            -- ^ Method (4.7.2)
                | PRODID            -- ^ Product Identifier (4.7.3)
                | VERSION           -- ^ Version (4.7.4)
                
                -- Component properties (4.8.1)
                | ATTACH            -- ^ Attachment (4.8.1.1)
                | CATEGORIES        -- ^ Categories (4.8.1.2)
                | CLASS             -- ^ Classification (4.8.1.3)
                | COMMENT           -- ^ Comment (4.8.1.4)
                | DESCRIPTION       -- ^ Description (4.8.1.5)
                | GEO               -- ^ Geographic Position (4.8.1.6)
                | LOCATION          -- ^ Location (4.8.1.7)
                | PERCENT_COMPLETE  -- ^ Percent Complete (4.8.1.8)
                | PRIORITY          -- ^ Priority (4.8.1.9)
                | RESOURCES         -- ^ Resources (4.8.1.10)
                | STATUS            -- ^ Status (4.8.1.11)
                | SUMMARY           -- ^ Summary (4.8.1.12)
               
                -- Date and Time Component properties (4.8.2)
                | COMPLETED         -- ^ Date/Time Completed (4.8.2.1) 
                | DTEND             -- ^ Date/Time End (4.8.2.2)         
                | DUE               -- ^ Date/Time Due (4.8.2.3)
                | DTSTART           -- ^ Date/Time Start (4.8.2.4) 
                | DURATION          -- ^ Duration (4.8.2.5)   
                | FREEBUSY          -- ^ Free/Busy Time (4.8.2.6)    
                | TRANSP            -- ^ Time Transparency (4.8.2.7)  
               
                -- Time Zone Component properties (4.8.3)
                | TZID              -- ^ Time Zone Identifier (4.8.3.1) 
                | TZNAME            -- ^ Time Zone Name (4.8.3.2)      
                | TZOFFSETFROM      -- ^ Time Zone Offset From (4.8.3.3)     
                | TZOFFSETTO        -- ^ Time Zone Offset To (4.8.3.4)  
                | TZURL             -- ^ Time Zone URL (4.8.3.5)     
                
                -- Relationship Component properties (4.8.4)
                | ATTENDEE          -- ^ Attendee (4.8.4.1) 
                | CONTACT           -- ^ Contact (4.8.4.2)
                | ORGANIZER         -- ^ Organizer (4.8.4.3)  
                | RECURRENCE_ID     -- ^ Recurrence ID (4.8.4.4)    
                | RELATED_TO        -- ^ Related To (4.8.4.5)   
                | URL               -- ^ Uniform Resource Locator (4.8.4.6) 
                | UID               -- ^ Unique Identfier (4.8.4.7)  
                
                -- Recurrence Component properties (4.8.5)
                | EXDATE            -- ^ Exception Date/Times (4.8.5.1) 
                | EXRULE            -- ^ Exception Rule (4.8.5.2)  
                | RDATE             -- ^ Recurrence Date/Times (4.8.5.3) 
                | RRULE             -- ^ Recurrence Rule (4.8.5.4) 
                
                -- Alarm Component properties (4.8.6)
                | ACTION            -- ^ Action (4.8.6.1)    
                | REPEAT            -- ^ Repeat Count (4.8.6.2)  
                | TRIGGER           -- ^ Trigger (4.8.6.3)    
                
                -- Change Management Component properties (4.8.7)
                | CREATED           -- ^ Date/Time Created (4.8.7.1)   
                | DTSTAMP           -- ^ Date/Time Stamp (4.8.7.2)     
                | LAST_MODIFIED     -- ^ Last Modified (4.8.7.3)      
                | SEQUENCE          -- ^ Sequence Number (4.8.7.4)   
                
                -- Miscellaneous Component properties (4.8.8)
                | X_ String         -- ^ Non-standard Properties (4.8.8.1)
                | REQUEST_STATUS    -- ^ Request Status (4.8.8.2)
                deriving (Eq, Show)

-- | Parameter names (4.2)
data ParamName = ALTREP          -- ^ Alternate Text Represenation (4.2.1)           
               | CN              -- ^ Common Name (4.2.2)   
               | CUTYPE          -- ^ Calendar User Type (4.2.3)       
               | DELEGATED_FROM  -- ^ Delegators (4.2.4)               
               | DELEGATED_TO    -- ^ Delegatees (4.2.5)             
               | DIR             -- ^ Directory Entry Reference (4.2.6)    
               | ENCODING        -- ^ Inline Encoding (4.2.7)         
               | FMTYPE          -- ^ Format Type (4.2.8)       
               | FBTYPE          -- ^ Free/Busy Time Type (4.2.9)       
               | LANGUAGE        -- ^ Language (4.2.10)         
               | MEMBER          -- ^ Group or List Membership (4.2.11)       
               | PARTSTAT        -- ^ Participation Status (4.2.12)         
               | RANGE           -- ^ Recurrence Identfier Range (4.2.13)      
               | RELATED         -- ^ Alarm Trigger Relationship (4.2.14)        
               | RELTYPE         -- ^ Relationship Type (4.2.15)        
               | ROLE            -- ^ Participation Role (4.2.16)     
               | RSVP            -- ^ RSVP Expectation (4.2.17)     
               | SENT_BY         -- ^ Sent By (4.2.18)        
               | TZIDp           -- ^ Time Zone Identifier (4.2.19)      
               | VALUE           -- ^ Value Data Types (4.2.20)      
               | X_p String      -- ^ Non-standard Parameter (4.2.21)           
               deriving (Eq, Show)                 

data ParamValue = Paramtext    String
                | QuotedString String
                deriving (Eq, Show)

------------------------------------------------------------
-- Parsed datastructers and helper functions
------------------------------------------------------------

type Value      = String
type Property   = (PropertyName, [Param], Value)

-- | iCalendar data
data ICalendar  = ICalendar { calendarProperties :: [Property]
                            , components         :: [Component]
                            } deriving (Eq, Show)

mkICalendar     = ICalendar [] []                          

addCalendarProperty prop calendar = calendar { calendarProperties = prop : calendarProperties calendar }
addComponent        comp calendar = calendar { components         = comp : components calendar }

-- | Component data
data Component   = Component { cName         :: ComponentName
                             , properties    :: [Property]
                             , subcomponents :: [Component]
                             } deriving (Eq, Show)

mkComponent name = Component name [] []

addProperty     prop component = component { properties    = prop : properties component }
addSubcomponent comp component = component { subcomponents = comp : subcomponents component }

-- | Parameter data
data Param = Param { pName   :: ParamName
                   , pValues :: [ParamValue]
                   } deriving (Eq, Show)

------------------------------------------------------------
-- Parser helpers
------------------------------------------------------------

-- | Case insensitive string matching.  Also wrapped in a 'try' so the matching
-- will fail as a whole and not mid-string.
ciString = try . sequence . (map (\c -> char (toUpper c) <|> char (toLower c)))

crlf = string "\r\n"

-- | Parses any character except controls and double quotation marks
pQsafeChar = oneOf $ wsp ++ map chr (0x21 : [0x23..0x7e]) ++ nonUsAscii

-- | Parses any character except controls, double quotation marks, ";", ":", ","
pSafeChar = oneOf $ wsp ++ map chr (0x21 : [0x23..0x2b] ++ [0x2d..0x39] ++ [0x3c..0x7e]) ++ nonUsAscii

-- | Parses any textual character
pValueChar = oneOf $ wsp ++ map chr [0x21..0x7e] ++ nonUsAscii

-- | Non US-ASCII characters
nonUsAscii = map chr [0x80..0xF8]

-- | Linear white space
wsp = " \t"

------------------------------------------------------------
-- Scanner, creating a tokenstream
------------------------------------------------------------

-- TODO folding the lines should keep the original positions
-- | Parses a "String" to an "ICalendar".  Will unfold folded lines, so error messages might be off.
parser = do input <- getInput
            setInput (foldLines input)
            pICalendar

-- | Remove crlf's followed by space or tab, this is called "line folding".
foldLines [] = []
foldLines ('\r':'\n':' ' :lines) = foldLines lines
foldLines ('\r':'\n':'\t':lines) = foldLines lines
foldLines (l:lines)              = l : foldLines lines

pICalendar =  pBegin <* pVCalendar 
           *> pCalendarPropertiesAndComponents
          <*  eof

pVCalendar = ciString "VCALENDAR" <* crlf

pCalendarPropertiesAndComponents =
         addCalendarProperty <$> pProperty  <*> pCalendarPropertiesAndComponents
     <|> addComponent        <$> pComponent <*> pCalendarPropertiesAndComponents
     <|> mkICalendar         <$  pEnd       <*  pVCalendar

pComponent = do component <- pComponentBegin
                pPropertiesAndSubcomponents component

pComponentBegin = mkComponent <$ pBegin <*> pComponentName

pPropertiesAndSubcomponents component =
       addProperty     <$> pProperty  <*> pPropertiesAndSubcomponents component
  <|>  addSubcomponent <$> pComponent <*> pPropertiesAndSubcomponents component
  <|>  component       <$  pComponentEnd component

pProperty = (,,) <$> pPropertyName <*> many pParam <*> pValue

pParam = Param <$ char ';' <*> pParamName <*> sepBy pParamValue (char ',')

-- NOTE: pQuotedString has to go first, because the value may be empty
pParamValue   = pQuotedString <|> pParamtext 
pParamtext    = Paramtext    <$> many pSafeChar
pQuotedString = QuotedString <$  char '"' <*> many pQsafeChar <* char '"'

pValue = char ':' *> many pValueChar <* crlf

-- TODO: This will disappear once every component has its own parser
pComponentEnd c = do pEnd
                     component <- pComponentName
                     if component == cName c
                           then return component
                           else unexpected $ "END:" ++ show (cName c)

pBegin  = ciString "BEGIN:"
pEnd    = ciString "END:"

pComponentName = ( VEVENT           <$ ciString "VEVENT"
               <|> VTODO            <$ ciString "VTODO"
               <|> VJOURNAL         <$ ciString "VJOURNAL"
               <|> VFREEBUSY        <$ ciString "VFREEBUSY"
               <|> VTIMEZONE        <$ ciString "VTIMEZONE"
               <|> X_c              <$ ciString "X-" <*> many1 (alphaNum <|> char '-')
               <|> STANDARD         <$ ciString "STANDARD"
               <|> DAYLIGHT         <$ ciString "DAYLIGHT"
               <|> VALARM           <$ ciString "VALARM"
               ) <* crlf
       
pPropertyName  =   CALSCALE         <$ ciString "CALSCALE"
               <|> METHOD           <$ ciString "METHOD"
               <|> PRODID           <$ ciString "PRODID"
               <|> VERSION          <$ ciString "VERSION"
               <|> ATTACH           <$ ciString "ATTACH"
               <|> CATEGORIES       <$ ciString "CATEGORIES"
               <|> CLASS            <$ ciString "CLASS"
               <|> COMMENT          <$ ciString "COMMENT"
               <|> DESCRIPTION      <$ ciString "DESCRIPTION"
               <|> GEO              <$ ciString "GEO"
               <|> LOCATION         <$ ciString "LOCATION"
               <|> PERCENT_COMPLETE <$ ciString "PERCENT-COMPLETE"
               <|> PRIORITY         <$ ciString "PRIORITY"
               <|> RESOURCES        <$ ciString "RESOURCES"
               <|> STATUS           <$ ciString "STATUS"
               <|> SUMMARY          <$ ciString "SUMMARY"
               <|> COMPLETED        <$ ciString "COMPLETED"
               <|> DTEND            <$ ciString "DTEND"
               <|> DUE              <$ ciString "DUE"
               <|> DTSTART          <$ ciString "DTSTART"
               <|> DURATION         <$ ciString "DURATION"
               <|> FREEBUSY         <$ ciString "FREEBUSY"
               <|> TRANSP           <$ ciString "TRANSP"
               <|> TZID             <$ ciString "TZID"
               <|> TZNAME           <$ ciString "TZNAME"
               <|> TZOFFSETFROM     <$ ciString "TZOFFSETFROM"
               <|> TZOFFSETTO       <$ ciString "TZOFFSETTO"
               <|> TZURL            <$ ciString "TZURL"
               <|> ATTENDEE         <$ ciString "ATTENDEE"
               <|> CONTACT          <$ ciString "CONTACT"
               <|> ORGANIZER        <$ ciString "ORGANIZER"
               <|> RECURRENCE_ID    <$ ciString "RECURRENCE-ID"
               <|> RELATED_TO       <$ ciString "RELATED-TO"
               <|> URL              <$ ciString "URL"
               <|> UID              <$ ciString "UID"
               <|> EXDATE           <$ ciString "EXDATE"
               <|> EXRULE           <$ ciString "EXRULE"
               <|> RDATE            <$ ciString "RDATE"
               <|> RRULE            <$ ciString "RRULE"
               <|> ACTION           <$ ciString "ACTION"
               <|> REPEAT           <$ ciString "REPEAT"
               <|> TRIGGER          <$ ciString "TRIGGER"
               <|> CREATED          <$ ciString "CREATED"
               <|> DTSTAMP          <$ ciString "DTSTAMP"
               <|> LAST_MODIFIED    <$ ciString "LAST-MODIFIED"
               <|> SEQUENCE         <$ ciString "SEQUENCE"
               <|> X_               <$ ciString "X-" <*> many1 (alphaNum <|> char '-')
               <|> REQUEST_STATUS   <$ ciString "REQUEST-STATUS"

pParamName     = ( ALTREP           <$ ciString "ALTREP"
               <|> CN               <$ ciString "CN"
               <|> CUTYPE           <$ ciString "CUTYPE"
               <|> DELEGATED_FROM   <$ ciString "DELEGATED-FROM"
               <|> DELEGATED_TO     <$ ciString "DELEGATED-TO"
               <|> DIR              <$ ciString "DIR"
               <|> ENCODING         <$ ciString "ENCODING"
               <|> FMTYPE           <$ ciString "FMTYPE"
               <|> FBTYPE           <$ ciString "FBTYPE"
               <|> LANGUAGE         <$ ciString "LANGUAGE"
               <|> MEMBER           <$ ciString "MEMBER"
               <|> PARTSTAT         <$ ciString "PARTSTAT"
               <|> RANGE            <$ ciString "RANGE"
               <|> RELATED          <$ ciString "RELATED"
               <|> RELTYPE          <$ ciString "RELTYPE"
               <|> ROLE             <$ ciString "ROLE"
               <|> RSVP             <$ ciString "RSVP"
               <|> SENT_BY          <$ ciString "SENT-BY"
               <|> TZIDp            <$ ciString "TZID"
               <|> VALUE            <$ ciString "VALUE"
               <|> X_p              <$ ciString "X-" <*> many1 (alphaNum <|> char '-')
               ) <* char '='