{-# LANGUAGE DeriveDataTypeable #-} {-# OPTIONS_HADDOCK hide #-} ----------------------------------------------------------------------------- -- | -- Module : Data.Time.LocalTime.TimeZone.Olson.Parse -- Copyright : Yitzchak Gale 2010 -- -- Maintainer : Yitzchak Gale -- Portability : portable -- -- A parser for binary Olson timezone files whose format is specified -- by the tzfile(5) man page on Unix-like systems. For more -- information about this format, see -- . Functions are provided for -- converting the parsed data into 'TimeZoneSeries' objects. {- Copyright (c) 2010 Yitzchak Gale. All rights reserved. For licensing information, see the BSD3-style license in the file LICENSE that was originally distributed by the author together with this file. -} module Data.Time.LocalTime.TimeZone.Olson.Parse ( -- * Parsing Olson timezone files getTimeZoneSeriesFromOlsonFile, getOlsonFromFile, olsonToTimeZoneSeries, getOlson, OlsonError ) where import Data.Time.LocalTime.TimeZone.Olson.Types import Data.Time.LocalTime.TimeZone.Series (TimeZoneSeries(..)) import Data.Time (TimeZone(TimeZone)) import Data.Time.Clock.POSIX (posixSecondsToUTCTime) import Data.Binary.Get (Get, runGet, getWord8, getWord32be, getWord64be, getByteString, getRemainingLazyByteString) import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as L import Data.Monoid (mappend) import Data.List (sortBy, groupBy) import Data.Maybe (listToMaybe, fromMaybe) import Data.Word (Word8) import Data.Int (Int32, Int64) import Data.Ord (comparing) import Data.Function (on) import Data.Typeable (Typeable) import Control.Monad (guard, replicateM, replicateM_, when) import Control.Exception.Extensible (try, throw, Exception, ErrorCall) -- | An exception indicating that the binary data being parsed was not -- valid Olson timezone data data OlsonError = OlsonError String deriving Typeable instance Show OlsonError where show (OlsonError msg) = msg instance Exception OlsonError -- | Convert parsed Olson timezone data into a @TimeZoneSeries@. olsonToTimeZoneSeries :: OlsonData -> Maybe TimeZoneSeries olsonToTimeZoneSeries (OlsonData ttimes ttinfos@(dflt0:_) _ _) = fmap (TimeZoneSeries $ mkTZ dflt) . mapM (lookupTZ ttinfos) . uniqTimes . sortBy futureToPast $ ttimes where dflt = fromMaybe dflt0 . listToMaybe $ filter isStd ttinfos isStd (TtInfo _ isdst _ _) = not isdst mkTZ (TtInfo gmtoff isdst _ abbr) = TimeZone ((gmtoff + 30) `div` 60) isdst abbr lookupTZ ttinfos ttime = fmap (((,) $ toUTC ttime) . mkTZ) . listToMaybe $ drop (transIndex ttime) ttinfos toUTC = posixSecondsToUTCTime . fromIntegral . transTime uniqTimes = map last . groupBy ((==) `on` transTime) futureToPast = comparing $ negate . transTime olsonToTimeZoneSeries _ = Nothing -- | Read timezone data from a binary Olson timezone file and convert -- it into a @TimeZoneSeries@ for use together with the types and -- fucntions of "Data.Time". This is the function from this module -- for which you are most likely to have use. -- -- If the values in the Olson timezone file exceed the standard size -- limits (see 'defaultLimits'), this function throws an -- exception. For other behavior, use 'getOlson' and -- 'Data.Binary.Get.runGet' directly. getTimeZoneSeriesFromOlsonFile :: FilePath -> IO TimeZoneSeries getTimeZoneSeriesFromOlsonFile fp = getOlsonFromFile fp >>= maybe (throwOlson fp "no timezone found in OlsonData") return . olsonToTimeZoneSeries -- | Parse a binary Olson timezone file. -- -- If the values in the Olson timezone file exceed the standard size -- limits (see 'defaultLimits'), this function throws an -- exception. For other behavior, use 'getOlson' and -- 'Data.Binary.Get.runGet' directly. getOlsonFromFile :: FilePath -> IO OlsonData getOlsonFromFile fp = do e <- try . fmap (runGet $ getOlson defaultLimits) $ L.readFile fp either (formatError fp) return e formatError :: FilePath -> ErrorCall -> IO a formatError fp e = throwOlson fp $ show e -- | A binary parser for binary Olson timezone files getOlson :: SizeLimits -> Get OlsonData getOlson limits = do (version, part1) <- getOlsonPart True limits get32bitInteger -- There is one part for Version 1 format data, and two parts and a POSIX -- TZ string for Version 2 format data case version of 0 -> return part1 50 -> do (_, part2) <- getOlsonPart False limits get64bitInteger posixTZ <- getPosixTZ return $ part1 `mappend` part2 `mappend` posixTZ _ -> verify (const False) "invalid version number" undefined -- Parse the part of an Olson file that contains timezone data getOlsonPart :: Integral a => Bool -> SizeLimits -> Get a -> Get (Word8, OlsonData) getOlsonPart verifyMagic limits getTime = do magic <- fmap (toASCII . B.unpack) $ getByteString 4 when verifyMagic $ verify_ (== "TZif") "missing magic number" magic version <- getWord8 replicateM_ 15 getWord8 -- padding nulls tzh_ttisgmtcnt <- get32bitInt tzh_ttisstdcnt <- get32bitInt tzh_leapcnt <- get32bitInt >>= verify (withinLimit maxLeaps) "too many leap second specifications" tzh_timecnt <- get32bitInt >>= verify (withinLimit maxTimes) "too many timezone transitions" tzh_typecnt <- get32bitInt >>= verify (withinLimit maxTypes) "too many timezone type specifications" verify (withinLimit maxTypes) "too many isgmt specifiers" tzh_ttisgmtcnt verify (withinLimit maxTypes) "too many isstd specifiers" tzh_ttisstdcnt tzh_charcnt <- get32bitInt >>= verify (withinLimit maxAbbrChars) "too many tilezone specifiers" times <- fmap (map toInteger) $ replicateM tzh_timecnt getTime indexes <- replicateM tzh_timecnt get8bitInt ttinfos <- replicateM tzh_typecnt getTtInfo abbr_chars <- fmap (toASCII . B.unpack) $ getByteString tzh_charcnt leaps <- replicateM tzh_leapcnt $ getLeapInfo getTime isstds <- replicateM tzh_ttisstdcnt getBool isgmts <- replicateM tzh_ttisgmtcnt getBool return (version, OlsonData (zipWith Transition times indexes) (map (flip lookupAbbr abbr_chars) . zipWith setTtype ttinfos $ zipWithExtend boolsToTType False False isstds isgmts) leaps Nothing ) where withinLimit limit value = maybe True (value <=) $ limit limits lookupAbbr (TtInfo gmtoff isdst ttype abbrind) = TtInfo gmtoff isdst ttype . takeWhile (/= '\NUL') . drop abbrind setTtype ttinfo ttype = ttinfo {tt_ttype = ttype} boolsToTType _ isgmt | isgmt = UTC boolsToTType isstd _ | isstd = Std | otherwise = Wall -- A variant of zipWith whose result is the length of the longer -- rather than the shorter list, by extending the shorter list with -- a default value zipWithExtend :: (a -> b -> c) -> a -> b -> [a] -> [b] -> [c] zipWithExtend f x0 y0 (x:xs) (y:ys) = f x y : zipWithExtend f x0 y0 xs ys zipWithExtend f x0 _ [] ys = map (f x0) ys zipWithExtend f _ y0 xs _ = map (flip f y0) xs -- Parse a POSIX-style TZ string. -- We don't try to understand the TZ string, we just pass it along whole. getPosixTZ :: Get OlsonData getPosixTZ = do getWord8 >>= verify (== 10) "POSIX TZ string not preceded by newline" posixTZ <- fmap (L.takeWhile (/= 10)) getRemainingLazyByteString -- We don't check for the trailing newline, in order to keep it lazy return . OlsonData [] [] [] $ do guard (not $ L.null posixTZ) Just . toASCII $ L.unpack posixTZ -- Parse a ttinfo struct. Each ttinfo struct corresponds to a single -- Data.Time.TimeZone object. getTtInfo :: Get (TtInfo Int) getTtInfo = do gmtoff <- get32bitInt isdst <- getBool abbrind <- get8bitInt return $ TtInfo gmtoff isdst Wall abbrind -- Parse leap second info. (usually not used) getLeapInfo :: Integral a => Get a -> Get LeapInfo getLeapInfo getTime = do lTime <- fmap toInteger getTime lOffset <- get32bitInt return $ LeapInfo lTime lOffset -- Our 8-bit ints are unsigned, so we can convert them directly get8bitInt :: Get Int get8bitInt = fmap fromIntegral getWord8 getInt32 :: Get Int32 getInt32 = fmap fromIntegral getWord32be get32bitInt :: Get Int get32bitInt = fmap fromIntegral getInt32 -- via Int32 to get the sign right -- in case we are on a 64-bit platform get32bitInteger :: Get Integer get32bitInteger = fmap fromIntegral getInt32 -- via Int32 to get the sign right getInt64 :: Get Int64 getInt64 = fmap fromIntegral getWord64be get64bitInteger :: Get Integer get64bitInteger = fmap fromIntegral getInt64 -- via Int64 to get the sign right getBool :: Get Bool getBool = fmap (/= 0) getWord8 toASCII :: [Word8] -> String toASCII = map (toEnum . fromIntegral) verify :: Monad m => (a -> Bool) -> String -> a -> m a verify pred msg val | pred val = return val | otherwise = error msg verify_ :: Monad m => (a -> Bool) -> String -> a -> m () verify_ pred msg val | pred val = return () | otherwise = error msg throwOlson :: FilePath -> String -> IO a throwOlson fp msg = throw . OlsonError $ fp ++ ": invalid timezone file: " ++ msg