module System.Time.Parse (parseCalendarTime,parsecCalendarTime) where import Control.Monad import Data.Char (isSpace) import System.Locale import System.Time import Text.ParserCombinators.Parsec -- | Parse a date string as formatted by 'formatCalendarTime'. -- -- The resulting 'CalendarTime' will only have those fields set that -- are represented by a format specifier in the format string, and those -- fields will be set to the values given in the date string. -- If the same field is specified multiple times, the rightmost -- occurence takes precedence. -- -- The resulting date is not neccessarily a valid date. For example, -- if there is no day of the week specifier in the format string, -- the value of 'ctWDay' will most likely be invalid. -- -- Format specifiers are % followed by some character. All other -- characters are treated literally. Whitespace in the format string -- matches zero or more arbitrary whitespace characters. -- -- Format specifiers marked with * are matched, but do not set any -- field in the output. -- -- Some of the format specifiers are marked as space-padded or -- zero-padded. Regardless of this, space-padded, zero-padded -- or unpadded inputs are accepted. Note that strings using -- unpadded fields without separating the fields may cause -- strange parsing. -- -- Supported format specfiers: -- -- [%%] a % character. -- -- [%a] locale's abbreviated weekday name (Sun ... Sat) -- -- [%A] locale's full weekday name (Sunday .. Saturday) -- -- [%b] locale's abbreviated month name (Jan..Dec) -- -- [%B] locale's full month name (January..December) -- -- [%c] locale's date and time format (Thu Mar 25 17:47:03 CET 2004) -- -- [%C] century [00-99] -- -- [%d] day of month, zero padded (01..31) -- -- [%D] date (%m\/%d\/%y) -- -- [%e] day of month, space padded ( 1..31) -- -- [%h] same as %b -- -- [%H] hour, 24-hour clock, zero padded (00..23) -- -- [%I] hour, 12-hour clock, zero padded (01..12) -- -- [%j] day of the year, zero padded (001..366) -- -- [%k] hour, 24-hour clock, space padded ( 0..23) -- -- [%l] hour, 12-hour clock, space padded ( 1..12) -- -- [%m] month, zero padded (01..12) -- -- [%M] minute, zero padded (00..59) -- -- [%n] a newline character -- -- [%p] locale's AM or PM indicator -- -- [%r] locale's 12-hour time format (hh:mm:ss AM\/PM) -- -- [%R] hours and minutes, 24-hour clock (hh:mm) -- -- [%s] * seconds since '00:00:00 1970-01-01 UTC' -- -- [%S] seconds, zero padded (00..59) -- -- [%t] a horizontal tab character -- -- [%T] time, 24-hour clock (hh:mm:ss) -- -- [%u] numeric day of the week (1=Monday, 7=Sunday) -- -- [%U] * week number, weeks starting on Sunday, zero padded (01-53) -- -- [%V] * week number (as per ISO-8601), -- week 1 is the first week with a Thursday, -- zero padded, (01-53) -- -- [%w] numeric day of the week, (0=Sunday, 6=Monday) -- -- [%W] * week number, weeks starting on Monday, zero padded (01-53) -- -- [%x] locale's preferred way of printing dates (%m\/%d\/%y) -- -- [%X] locale's preferred way of printing time. (%H:%M:%S) -- -- [%y] year, within century, zero padded (00..99) -- -- [%Y] year, including century. Not padded -- (this is probably a bug, but formatCalendarTime does -- it this way). (0-9999) -- -- [%Z] time zone abbreviation (e.g. CET) or RFC-822 style numeric -- timezone (-0500) parseCalendarTime :: TimeLocale -- ^ Time locale -> String -- ^ Date format -> String -- ^ String to parse -> Maybe CalendarTime -- ^ 'Nothing' if parsing failed. parseCalendarTime l fmt s = case runParser parser epoch "" s of Left err -> Nothing Right p -> Just p where parser = pCalendarTime l fmt >> getState -- | A Parsec combinator version of 'parseCalendarTime'. parsecCalendarTime :: TimeLocale -- ^ Time locale -> String -- ^ Date format, see 'parseCalendarTime'. -> Parser CalendarTime parsecCalendarTime l fmt = parserWithState epoch (pCalendarTime l fmt) -- FIXME: hackety-hack-hack parserWithState :: st -> GenParser tok st a -> GenParser tok st' st parserWithState st p = do pst <- getParserState let pos = statePos pst let p' = setPosition pos >> p >> getParserState r = runParser p' st (sourceName pos) (stateInput pst) case r of Left err -> fail $ show err -- FIXME: hack, there is no way -- to fail with a given ParseError Right pst' -> do setParserState (pst { stateInput = stateInput pst', statePos = statePos pst' }) return (stateUser pst') -- FIXME: verify input -- FIXME: years outside 1000-9999 probably don't work -- FIXME: set ctYDay -- FIXME: set ctIsDST -- FIXME: missing formats from GNU date(1): -- %F same as %Y-%m-%d -- %g the 2-digit year corresponding to the %V week number -- %G the 4-digit year corresponding to the %V week number -- %N nanoseconds (000000000..999999999) -- %P locale's lower case am or pm indicator (blank in many locales) -- %z RFC-822 style numeric timezone (-0500) (a nonstandard extension) -- | The Parsec parser used by 'parseCalendarTime'. pCalendarTime :: TimeLocale -- ^ Time locale -> String -- ^ Date format, see 'parseCalendarTime'. -> CharParser CalendarTime () pCalendarTime l fmt = doFmt fmt where -- not padded -- FIXME: implement doFmt ('%':'-':cs) = doFmt ('%':cs) -- space padded -- FIXME: implement doFmt ('%':'_':cs) = doFmt ('%':cs) doFmt ('%':c:cs) = decode c >> doFmt cs doFmt (c:cs) | isSpace c = whitespace >> doFmt cs doFmt (c:cs) = char c >> doFmt cs doFmt "" = return () decode '%' = char '%' >> return () decode 'a' = (parseEnum $ map snd $ wDays l) >>= setWDay decode 'A' = (parseEnum $ map fst $ wDays l) >>= setWDay decode 'b' = (parseEnum $ map snd $ months l) >>= setMonth decode 'B' = (parseEnum $ map fst $ months l) >>= setMonth decode 'c' = doFmt (dateTimeFmt l) decode 'C' = read2 >>= \c -> updateYear (\y -> c * 100 + y `rem` 100) decode 'd' = read2 >>= setDay decode 'D' = doFmt "%m/%d/%y" decode 'e' = read2 >>= setDay decode 'h' = decode 'b' decode 'H' = read2 >>= setHour decode 'I' = read2 >>= setHour12 decode 'j' = read3 >>= setYDay decode 'k' = read2 >>= setHour decode 'l' = read2 >>= setHour12 decode 'm' = read2 >>= \mon -> setMonth (toEnum (mon-1)) decode 'M' = read2 >>= setMin -- strptime(3) accepts "arbitrary whitespace" for %n decode 'n' = whitespace >> return () decode 'p' = do x <- (string am >> return 0) <|> (string pm >> return 12) updateHour (\h -> x + h `rem` 12) where (am,pm) = amPm l decode 'r' = doFmt (time12Fmt l) decode 'R' = doFmt "%H:%M" -- FIXME: implement %s. -- FIXME: implement %s in formatCalendarTime decode 's' = int >> return () decode 'S' = read2 >>= setSec -- FIXME: strptime(3) accepts "arbitrary whitespace" for %t decode 't' = char '\t' >> return () decode 'T' = doFmt "%H:%M:%S" decode 'u' = readN 1 >>= setWDay . toEnum . (\w -> if w == 7 then 0 else w) -- FIXME: implement %U. decode 'U' = read2 >> return () -- FIXME: implement %V. decode 'V' = read2 >> return () decode 'w' = readN 1 >>= setWDay . toEnum -- FIXME: implement %W. decode 'W' = read2 >> return () decode 'x' = doFmt (dateFmt l) decode 'X' = doFmt (timeFmt l) -- FIXME: should probably be zero padded, -- need to change formatCalendarTime too decode 'Y' = readN 4 >>= setYear -- FIXME: maybe 04 should be 2004, not 1904? decode 'y' = read2 >>= \c -> updateYear (\y -> (y `quot` 100) * 100 + c) -- FIXME: are timezone names always [A-Z]+ ? -- FIXME: set ctTZ when parsing timezone name and -- ctTZName when parsing offset decode 'Z' = tzname <|> tzoffset where tzname = many1 (oneOf ['A'..'Z']) >>= setTZName tzoffset = do s <- sign h <- read2 m <- read2 setTZ (s * (h * 3600 + m * 60)) -- following the example of strptime(3), -- whitespace matches zero or more whitespace -- characters in the input string decode c | isSpace c = spaces >> return () decode c = char c >> return () epoch :: CalendarTime epoch = CalendarTime { ctYear = 1970, ctMonth = January, ctDay = 1, ctHour = 0, ctMin = 0, ctSec = 0, ctPicosec = 0, ctWDay = Thursday, ctYDay = 1, ctTZName = "UTC", ctTZ = 0, ctIsDST = False } parseEnum :: Enum a => [String] -> CharParser st a parseEnum ss = choice (zipWith tryString ss (enumFrom (toEnum 0))) where tryString s x = try (string s) >> return x setYear x = updateState (\t -> t{ ctYear = x }) setMonth x = updateState (\t -> t{ ctMonth = x }) setDay x = updateState (\t -> t{ ctDay = x }) setHour x = updateState (\t -> t{ ctHour = x }) setMin x = updateState (\t -> t{ ctMin = x }) setSec x = updateState (\t -> t{ ctSec = x }) setWDay x = updateState (\t -> t{ ctWDay = x }) setYDay x = updateState (\t -> t{ ctYDay = x }) setTZName x = updateState (\t -> t{ ctTZName = x }) setTZ x = updateState (\t -> t{ ctTZ = x }) updateYear f = updateState (\t -> t{ ctYear = f (ctYear t) }) updateHour f = updateState (\t -> t{ ctHour = f (ctHour t) }) setHour12 x = updateHour (\h -> (h `quot` 12) * 12 + from12 x) where from12 h = if h == 12 then 0 else h read2, read3 :: CharParser st Int read2 = readN 2 read3 = readN 3 -- | Read up to a given number of digits, optionally left-padded -- with whitespace and interpret them as an 'Int'. readN :: Int -> CharParser st Int readN n = liftM read (spaces >> choice [try (count m digit) | m <- [n,n-1..1]]) int :: CharParser st Int int = liftM read (many1 digit) sign :: CharParser st Int sign = (char '+' >> return 1) <|> (char '-' >> return (-1)) -- | Matches zero or more whitespace characters. whitespace :: CharParser st String whitespace = many (satisfy isSpace)