{-| Parser for the iCalendar format (version 2.0, RFC2445). Features: * Unfolds folded lines (see RFC) * Supports all IANA iCalendar tokens /Warning!/ This is version 0.0, important things are missing * Property and parameter values are not parsed, but kept as in the file. * There are no restrictions on the properties components can have. * Line unfolding causes error messages to report the wrong line. This package is meant to grow to a full-fledged data definition, printer and parser for the iCalendar format. But that will probably take some time. So, if you've come here because you need this, please e-mail me (Eelco Lempsink). -} module Text.ICalendar.Parser ( -- * Tokens from RFC2445 ComponentName(..) , PropertyName(..) , ParamName(..) , ParamValue(..) -- * Data definitions , Property , Value , ICalendar(..) , Component(..) , Param(..) -- * Parser , parser ) where ------------------------------------------------------------ -- Imports ------------------------------------------------------------ import Data.Char import Control.Monad import Control.Applicative import Control.Monad (MonadPlus(..), ap) -- Hide Parsec's definitions of some Applicative functions. import Text.ParserCombinators.Parsec hiding (many, optional, (<|>)) -- Every Monad is an Applicative. instance Applicative (GenParser s a) where pure = return (<*>) = ap -- Every MonadPlus is an Alternative. instance Alternative (GenParser s a) where empty = mzero (<|>) = mplus ------------------------------------------------------------ -- Tokens, as defined in RFC2445. ------------------------------------------------------------ -- | Calendar Component names (4.6) data ComponentName = VEVENT -- ^ Event Component (4.6.1) | VTODO -- ^ To-do Component (4.6.2) | VJOURNAL -- ^ Journal Component (4.6.3) | VFREEBUSY -- ^ Free/Busy Component (4.6.4) | VTIMEZONE -- ^ Time Zone Component (4.6.5) | X_c String -- ^ Non-standard/Experimental Component -- Time Zone subcomponents | STANDARD -- ^ Standard Time Sub-component (4.6.5) | DAYLIGHT -- ^ Daylight Saving Time Sub-component (4.6.5) -- Event and To-do subcomponent | VALARM -- ^ Alarm Sub-component (4.6.6) deriving (Eq, Show) -- | Property names for Calendar Properties (4.7) and Component Properties (4.8) data PropertyName = -- Calendar properties (4.7) CALSCALE -- ^ Calendar Scale (4.7.1) | METHOD -- ^ Method (4.7.2) | PRODID -- ^ Product Identifier (4.7.3) | VERSION -- ^ Version (4.7.4) -- Component properties (4.8.1) | ATTACH -- ^ Attachment (4.8.1.1) | CATEGORIES -- ^ Categories (4.8.1.2) | CLASS -- ^ Classification (4.8.1.3) | COMMENT -- ^ Comment (4.8.1.4) | DESCRIPTION -- ^ Description (4.8.1.5) | GEO -- ^ Geographic Position (4.8.1.6) | LOCATION -- ^ Location (4.8.1.7) | PERCENT_COMPLETE -- ^ Percent Complete (4.8.1.8) | PRIORITY -- ^ Priority (4.8.1.9) | RESOURCES -- ^ Resources (4.8.1.10) | STATUS -- ^ Status (4.8.1.11) | SUMMARY -- ^ Summary (4.8.1.12) -- Date and Time Component properties (4.8.2) | COMPLETED -- ^ Date/Time Completed (4.8.2.1) | DTEND -- ^ Date/Time End (4.8.2.2) | DUE -- ^ Date/Time Due (4.8.2.3) | DTSTART -- ^ Date/Time Start (4.8.2.4) | DURATION -- ^ Duration (4.8.2.5) | FREEBUSY -- ^ Free/Busy Time (4.8.2.6) | TRANSP -- ^ Time Transparency (4.8.2.7) -- Time Zone Component properties (4.8.3) | TZID -- ^ Time Zone Identifier (4.8.3.1) | TZNAME -- ^ Time Zone Name (4.8.3.2) | TZOFFSETFROM -- ^ Time Zone Offset From (4.8.3.3) | TZOFFSETTO -- ^ Time Zone Offset To (4.8.3.4) | TZURL -- ^ Time Zone URL (4.8.3.5) -- Relationship Component properties (4.8.4) | ATTENDEE -- ^ Attendee (4.8.4.1) | CONTACT -- ^ Contact (4.8.4.2) | ORGANIZER -- ^ Organizer (4.8.4.3) | RECURRENCE_ID -- ^ Recurrence ID (4.8.4.4) | RELATED_TO -- ^ Related To (4.8.4.5) | URL -- ^ Uniform Resource Locator (4.8.4.6) | UID -- ^ Unique Identfier (4.8.4.7) -- Recurrence Component properties (4.8.5) | EXDATE -- ^ Exception Date/Times (4.8.5.1) | EXRULE -- ^ Exception Rule (4.8.5.2) | RDATE -- ^ Recurrence Date/Times (4.8.5.3) | RRULE -- ^ Recurrence Rule (4.8.5.4) -- Alarm Component properties (4.8.6) | ACTION -- ^ Action (4.8.6.1) | REPEAT -- ^ Repeat Count (4.8.6.2) | TRIGGER -- ^ Trigger (4.8.6.3) -- Change Management Component properties (4.8.7) | CREATED -- ^ Date/Time Created (4.8.7.1) | DTSTAMP -- ^ Date/Time Stamp (4.8.7.2) | LAST_MODIFIED -- ^ Last Modified (4.8.7.3) | SEQUENCE -- ^ Sequence Number (4.8.7.4) -- Miscellaneous Component properties (4.8.8) | X_ String -- ^ Non-standard Properties (4.8.8.1) | REQUEST_STATUS -- ^ Request Status (4.8.8.2) deriving (Eq, Show) -- | Parameter names (4.2) data ParamName = ALTREP -- ^ Alternate Text Represenation (4.2.1) | CN -- ^ Common Name (4.2.2) | CUTYPE -- ^ Calendar User Type (4.2.3) | DELEGATED_FROM -- ^ Delegators (4.2.4) | DELEGATED_TO -- ^ Delegatees (4.2.5) | DIR -- ^ Directory Entry Reference (4.2.6) | ENCODING -- ^ Inline Encoding (4.2.7) | FMTYPE -- ^ Format Type (4.2.8) | FBTYPE -- ^ Free/Busy Time Type (4.2.9) | LANGUAGE -- ^ Language (4.2.10) | MEMBER -- ^ Group or List Membership (4.2.11) | PARTSTAT -- ^ Participation Status (4.2.12) | RANGE -- ^ Recurrence Identfier Range (4.2.13) | RELATED -- ^ Alarm Trigger Relationship (4.2.14) | RELTYPE -- ^ Relationship Type (4.2.15) | ROLE -- ^ Participation Role (4.2.16) | RSVP -- ^ RSVP Expectation (4.2.17) | SENT_BY -- ^ Sent By (4.2.18) | TZIDp -- ^ Time Zone Identifier (4.2.19) | VALUE -- ^ Value Data Types (4.2.20) | X_p String -- ^ Non-standard Parameter (4.2.21) deriving (Eq, Show) data ParamValue = Paramtext String | QuotedString String deriving (Eq, Show) ------------------------------------------------------------ -- Parsed datastructers and helper functions ------------------------------------------------------------ type Value = String type Property = (PropertyName, [Param], Value) -- | iCalendar data data ICalendar = ICalendar { calendarProperties :: [Property] , components :: [Component] } deriving (Eq, Show) mkICalendar = ICalendar [] [] addCalendarProperty prop calendar = calendar { calendarProperties = prop : calendarProperties calendar } addComponent comp calendar = calendar { components = comp : components calendar } -- | Component data data Component = Component { cName :: ComponentName , properties :: [Property] , subcomponents :: [Component] } deriving (Eq, Show) mkComponent name = Component name [] [] addProperty prop component = component { properties = prop : properties component } addSubcomponent comp component = component { subcomponents = comp : subcomponents component } -- | Parameter data data Param = Param { pName :: ParamName , pValues :: [ParamValue] } deriving (Eq, Show) ------------------------------------------------------------ -- Parser helpers ------------------------------------------------------------ -- | Case insensitive string matching. Also wrapped in a 'try' so the matching -- will fail as a whole and not mid-string. ciString = try . sequence . (map (\c -> char (toUpper c) <|> char (toLower c))) crlf = string "\r\n" -- | Parses any character except controls and double quotation marks pQsafeChar = oneOf $ wsp ++ map chr (0x21 : [0x23..0x7e]) ++ nonUsAscii -- | Parses any character except controls, double quotation marks, ";", ":", "," pSafeChar = oneOf $ wsp ++ map chr (0x21 : [0x23..0x2b] ++ [0x2d..0x39] ++ [0x3c..0x7e]) ++ nonUsAscii -- | Parses any textual character pValueChar = oneOf $ wsp ++ map chr [0x21..0x7e] ++ nonUsAscii -- | Non US-ASCII characters nonUsAscii = map chr [0x80..0xF8] -- | Linear white space wsp = " \t" ------------------------------------------------------------ -- Scanner, creating a tokenstream ------------------------------------------------------------ -- TODO folding the lines should keep the original positions -- | Parses a "String" to an "ICalendar". Will unfold folded lines, so error messages might be off. parser = do input <- getInput setInput (foldLines input) pICalendar -- | Remove crlf's followed by space or tab, this is called "line folding". foldLines [] = [] foldLines ('\r':'\n':' ' :lines) = foldLines lines foldLines ('\r':'\n':'\t':lines) = foldLines lines foldLines (l:lines) = l : foldLines lines pICalendar = pBegin <* pVCalendar *> pCalendarPropertiesAndComponents <* eof pVCalendar = ciString "VCALENDAR" <* crlf pCalendarPropertiesAndComponents = addCalendarProperty <$> pProperty <*> pCalendarPropertiesAndComponents <|> addComponent <$> pComponent <*> pCalendarPropertiesAndComponents <|> mkICalendar <$ pEnd <* pVCalendar pComponent = do component <- pComponentBegin pPropertiesAndSubcomponents component pComponentBegin = mkComponent <$ pBegin <*> pComponentName pPropertiesAndSubcomponents component = addProperty <$> pProperty <*> pPropertiesAndSubcomponents component <|> addSubcomponent <$> pComponent <*> pPropertiesAndSubcomponents component <|> component <$ pComponentEnd component pProperty = (,,) <$> pPropertyName <*> many pParam <*> pValue pParam = Param <$ char ';' <*> pParamName <*> sepBy pParamValue (char ',') -- NOTE: pQuotedString has to go first, because the value may be empty pParamValue = pQuotedString <|> pParamtext pParamtext = Paramtext <$> many pSafeChar pQuotedString = QuotedString <$ char '"' <*> many pQsafeChar <* char '"' pValue = char ':' *> many pValueChar <* crlf -- TODO: This will disappear once every component has its own parser pComponentEnd c = do pEnd component <- pComponentName if component == cName c then return component else unexpected $ "END:" ++ show (cName c) pBegin = ciString "BEGIN:" pEnd = ciString "END:" pComponentName = ( VEVENT <$ ciString "VEVENT" <|> VTODO <$ ciString "VTODO" <|> VJOURNAL <$ ciString "VJOURNAL" <|> VFREEBUSY <$ ciString "VFREEBUSY" <|> VTIMEZONE <$ ciString "VTIMEZONE" <|> X_c <$ ciString "X-" <*> many1 (alphaNum <|> char '-') <|> STANDARD <$ ciString "STANDARD" <|> DAYLIGHT <$ ciString "DAYLIGHT" <|> VALARM <$ ciString "VALARM" ) <* crlf pPropertyName = CALSCALE <$ ciString "CALSCALE" <|> METHOD <$ ciString "METHOD" <|> PRODID <$ ciString "PRODID" <|> VERSION <$ ciString "VERSION" <|> ATTACH <$ ciString "ATTACH" <|> CATEGORIES <$ ciString "CATEGORIES" <|> CLASS <$ ciString "CLASS" <|> COMMENT <$ ciString "COMMENT" <|> DESCRIPTION <$ ciString "DESCRIPTION" <|> GEO <$ ciString "GEO" <|> LOCATION <$ ciString "LOCATION" <|> PERCENT_COMPLETE <$ ciString "PERCENT-COMPLETE" <|> PRIORITY <$ ciString "PRIORITY" <|> RESOURCES <$ ciString "RESOURCES" <|> STATUS <$ ciString "STATUS" <|> SUMMARY <$ ciString "SUMMARY" <|> COMPLETED <$ ciString "COMPLETED" <|> DTEND <$ ciString "DTEND" <|> DUE <$ ciString "DUE" <|> DTSTART <$ ciString "DTSTART" <|> DURATION <$ ciString "DURATION" <|> FREEBUSY <$ ciString "FREEBUSY" <|> TRANSP <$ ciString "TRANSP" <|> TZID <$ ciString "TZID" <|> TZNAME <$ ciString "TZNAME" <|> TZOFFSETFROM <$ ciString "TZOFFSETFROM" <|> TZOFFSETTO <$ ciString "TZOFFSETTO" <|> TZURL <$ ciString "TZURL" <|> ATTENDEE <$ ciString "ATTENDEE" <|> CONTACT <$ ciString "CONTACT" <|> ORGANIZER <$ ciString "ORGANIZER" <|> RECURRENCE_ID <$ ciString "RECURRENCE-ID" <|> RELATED_TO <$ ciString "RELATED-TO" <|> URL <$ ciString "URL" <|> UID <$ ciString "UID" <|> EXDATE <$ ciString "EXDATE" <|> EXRULE <$ ciString "EXRULE" <|> RDATE <$ ciString "RDATE" <|> RRULE <$ ciString "RRULE" <|> ACTION <$ ciString "ACTION" <|> REPEAT <$ ciString "REPEAT" <|> TRIGGER <$ ciString "TRIGGER" <|> CREATED <$ ciString "CREATED" <|> DTSTAMP <$ ciString "DTSTAMP" <|> LAST_MODIFIED <$ ciString "LAST-MODIFIED" <|> SEQUENCE <$ ciString "SEQUENCE" <|> X_ <$ ciString "X-" <*> many1 (alphaNum <|> char '-') <|> REQUEST_STATUS <$ ciString "REQUEST-STATUS" pParamName = ( ALTREP <$ ciString "ALTREP" <|> CN <$ ciString "CN" <|> CUTYPE <$ ciString "CUTYPE" <|> DELEGATED_FROM <$ ciString "DELEGATED-FROM" <|> DELEGATED_TO <$ ciString "DELEGATED-TO" <|> DIR <$ ciString "DIR" <|> ENCODING <$ ciString "ENCODING" <|> FMTYPE <$ ciString "FMTYPE" <|> FBTYPE <$ ciString "FBTYPE" <|> LANGUAGE <$ ciString "LANGUAGE" <|> MEMBER <$ ciString "MEMBER" <|> PARTSTAT <$ ciString "PARTSTAT" <|> RANGE <$ ciString "RANGE" <|> RELATED <$ ciString "RELATED" <|> RELTYPE <$ ciString "RELTYPE" <|> ROLE <$ ciString "ROLE" <|> RSVP <$ ciString "RSVP" <|> SENT_BY <$ ciString "SENT-BY" <|> TZIDp <$ ciString "TZID" <|> VALUE <$ ciString "VALUE" <|> X_p <$ ciString "X-" <*> many1 (alphaNum <|> char '-') ) <* char '='