{-# LANGUAGE OverloadedStrings #-} -- ---------------------------------------------------------------------------- {- | Normalization and validation for date. Days are represented here as in ISO 8601:2000 Second Edition: ISO (International Organization for Standardization). Representations of dates and times, second edition, 2000-12-15. NOT as in ISO 8601 ISO (International Organization for Standardization). Representations of dates and times, 1988-06-15. The main difference is dealing with year 0. in the older ISO standard, this is excluded and "-0001" is the representation of year 1 Before Common Era "-1 BCE". In the latter standard "0000" represents "-1 BCE" and "-0001" represents "-2 BCE" -} -- source hxt-xmlschema/src/Text/XML/HXT/XMLSchema/W3CDataTypeCheck.hs -- ---------------------------------------------------------------------------- module Hunt.Index.Schema.Normalize.Date ( normalize, denormalize , isAnyDate , isAnyDate' , isTime , nullDay , showGYearMonth , showGYear , showGMonthDay , showGMonth , showGDay , showTime ) where import Control.Applicative import Control.Monad import Data.List import Data.Maybe import Data.Text (Text) import qualified Data.Text as T import Data.Char (isDigit) import Data.Function (on) import Data.Ratio ((%)) import Data.Time (Day, DiffTime, UTCTime (..), addUTCTime, fromGregorian) import Text.Regex.XMLSchema.String import Hunt.Utility -- ------------------------------------------------------------ -- | Normalize a date representation to store in the index or search for. normalize :: Text -> Text normalize t = fromMaybe t (T.pack . normDateRep . showDateTime . toUTC <$> (readAnyDateM . T.unpack $ t)) where -- XXX: no proper support for dates before year 0 (1 BCE) this way normDateRep :: String -> String normDateRep s = if head' s == Just '-' then ('-':) . fil . tail $ s else fil s where fil = filter (not . (`elem` "-T:")) -- | Function takes normalized Date and transforms it back a readable form. -- We don't transform it back to the original representation, since that -- is never used, but to a general readable date format denormalize :: Text -> Text denormalize t = T.concat [y1, y2, "-", m, "-", d, " ", h, ":", i, ":", s] where [y1,y2,m,d,h,i,s] = T.chunksOf 2 t -- ------------------------------------------------------------ -- | Checks if the string is a date representation (syntactically). isAnyDate :: String -> Bool isAnyDate s = any ($ s) $ map fst safeDateReaders -- XXX: generally (showDate . readAnyDate) /= id -- | Same as 'isAnyDate' but also checks if @(showDate . readAnyDate)@ produces the same result. -- /NOTE/: excludes dates before year 0 (1 BCE). isAnyDate' :: String -> Bool isAnyDate' s = fromMaybe False $ do d <- readAnyDateM s guard $ ((==s) . showDate . readAnyDate) s return . not . isPrefixOf "-" . showDate . toUTC $ d -- | Unsafe 'readAnyDateM'. readAnyDate :: String -> Date readAnyDate = fromJust . readAnyDateM -- | Try to read a date. readAnyDateM :: String -> Maybe Date readAnyDateM s = head' . catMaybes . map readDateM $ safeDateReaders where readDateM :: (String -> Bool, String -> Date) -> Maybe Date readDateM (v, r) = guard (v s) >> return (r s) -- | Tuples of date validator and reader. safeDateReaders :: [(String -> Bool, String -> Date)] safeDateReaders = zip validators readers where validators :: [String -> Bool] validators = [ isDateTime, isDate, isGYearMonth, isGYear, isGMonthDay, isGMonth, isGDay] readers :: [String -> Date] readers = [readDateTime, readDate, readGYearMonth, readGYear, readGMonthDay, readGMonth, readGDay] -- ------------------------------------------------------------ -- source hxt-xmlschema/src/Text/XML/HXT/XMLSchema/W3CDataTypeCheck.hs -- ---------------------------------------- -- -- Days are represented here -- as in ISO 8601:2000 Second Edition: -- ISO (International Organization for Standardization). -- Representations of dates and times, second edition, 2000-12-15. -- -- NOT as in -- ISO 8601 -- ISO (International Organization for Standardization). -- Representations of dates and times, 1988-06-15. -- -- The main difference is dealing with year 0. -- in the older ISO standard, this is excluded and -- "-0001" is the representation of year 1 Before Common Era "-1 BCE". -- In the latter standard "0000" represents "-1 BCE" and "-0001" represents "-2 BCE" data Date = Date { _dUTCTime :: UTCTime , _dTZ :: MaybeTimeZone } deriving (Show) type MaybeTimeZone = Maybe Seconds type Seconds = Int instance Eq Date where (==) = (==) `on` toUTCTime instance Ord Date where compare = compare `on` toUTCTime mkDateTime :: Day -> DiffTime -> MaybeTimeZone -> Date mkDateTime d t z = Date (UTCTime d t) z toUTCTime :: Date -> UTCTime toUTCTime (Date d Nothing) = d toUTCTime (Date d (Just tz)) = addUTCTime (fromInteger . toInteger $ tz) d -- | Convert to UTC time. Eliminates the timezone offset. toUTC :: Date -> Date toUTC d = Date (toUTCTime d) Nothing -- to UTC -- ------------------------------------------------------------ isDateTime, isDate, isTime, isGYearMonth, isGYear, isGMonthDay, isGMonth, isGDay :: String -> Bool [isDateTime, isDate, isTime, isGYearMonth, isGYear, isGMonthDay, isGMonth, isGDay] = map matchRE rexDates rexDates :: [Regex] rexDates = map rex [dateTime, date, time, gYearMonth, gYear, gMonthDay, gMonth, gDay] where dateTime = ymd ++ "T" ++ hms ++ tz time = hms ++ tz date = ymd ++ tz gYearMonth = ym ++ tz gYear = y ++ tz gMonthDay = "--" ++ m2 ++ "-" ++ t2 ++ tz gMonth = "--" ++ m2 ++ tz gDay = "--" ++ "-" ++ t2 ++ tz y = "-?" ++ y4' ym = y ++ "-" ++ m2 ymd = ym ++ "-" ++ t2 hms = alt (h2 ++ ":" ++ i2 ++ ":" ++ s2 ++ fr) ("24:00:00" ++ opt ".0+") -- 24:00 is legal tz = opt (alt tz0 "Z") tz0 = (alt "\\-" "\\+") ++ tz1 tz1 = alt (h13 ++ ":" ++ i2) "14:00:00" m2 = alt "0[1-9]" "1[0-2]" -- Month t2 = alt "0[1-9]" (alt "[12][0-9]" "3[01]") -- Tag h2 = alt "[01][0-9]" "2[0-3]" -- Hour i2 = "[0-5][0-9]" -- mInute s2 = i2 -- Seconds {- -- this conforms to ISO 8601 from 1988 y1 = "000[1-9]" -- "0000" isn't a year, "-0001" represents "-1 BCE" y2 = "00[1-9][0-9]" -- leading 0-s are only allowd for year < 1000 y3 = "0[1-9][0-9]{2}" y4 = "[1-9][0-9]{3,}" y4' = alt y4 $ alt y3 $ alt y2 y1 -- -} -- {- -- this conforms to ISO 8601 Second Edition from 2000 y4 = "[0-9]{4}" -- year "0000" is legal and represents "-1 BCE" y4' = opt "[1-9][0-9]*" ++ y4 -- -} fr = opt ".[0-9]+" h13 = alt "0[0-9]" "1[0-3]" opt x = "(" ++ x ++ ")?" alt x1 x2 = "((" ++ x1 ++ ")|(" ++ x2 ++ "))" -- ------------------------------------------------------------ readDate , readGYearMonth , readGYear , readGMonthDay , readGMonth , readGDay :: String -> Date readDate = readDate' readYearMonthDayS readGYearMonth = readDate' readYearMonthS readGYear = readDate' readYearS readGMonthDay = readDate' readMonthDayS readGMonth = readDate' readMonthS readGDay = readDate' readDayS readTimeZone :: String -> MaybeTimeZone readTimeZone "" = Nothing readTimeZone "Z" = Just 0 readTimeZone (s : xs) = Just . ( if s == '-' then negate else id ) . readZone $ xs where readZone s' = 60 * (60 * read hs + read ms) where (hs, (_ : ms)) = span (/= ':') s' readYearMonthDayS :: String -> (Day, String) readYearMonthDayS s0 = (fromGregorian (sign $ read year) (read month) (read day), rest) where (sign, s ) = if head s0 == '-' then (negate, tail s0) else (id, s0) (year, (_ : rest1)) = span (/= '-') s (month, (_ : rest2)) = span (/= '-') rest1 (day, rest ) = span isDigit rest2 readYearMonthS :: String -> (Day, String) readYearMonthS s0 = (fromGregorian (sign $ read year) (read month) 1, rest) where (sign, s ) = if head s0 == '-' then (negate, tail s0) else (id, s0) (year, (_ : rest1)) = span (/= '-') s (month, rest ) = span isDigit rest1 readYearS :: String -> (Day, String) readYearS s0 = (fromGregorian (sign $ read year) 1 1, rest) where (sign, s ) = if head s0 == '-' then (negate, tail s0) else (id, s0) (year, rest ) = span isDigit s readMonthDayS :: String -> (Day, String) readMonthDayS s0 = (fromGregorian 1 (read month) (read day), rest) where (month, (_ : rest1)) = span isDigit . drop 2 $ s0 (day, rest ) = span isDigit rest1 readMonthS :: String -> (Day, String) readMonthS s0 = (fromGregorian 1 (read month) 1, rest) where (month, rest ) = span isDigit . drop 2 $ s0 readDayS :: String -> (Day, String) readDayS s0 = (fromGregorian 1 1 (read day), rest) where (day, rest ) = span isDigit . drop 3 $ s0 readHourMinSec :: String -> DiffTime readHourMinSec s = fromInteger (60 * (60 * read hours + read minutes)) + fromRational (readDecimal seconds) where (hours, (_ : rest)) = span (/= ':') s (minutes, (_ : seconds)) = span (/= ':') rest readDateTime :: String -> Date readDateTime s = mkDateTime day (readHourMinSec time) (readTimeZone zone) where (day, (_ : rest)) = readYearMonthDayS s (time, zone) = span (\ x -> isDigit x || x `elem` ":.") rest readDate' :: (String -> (Day, String)) -> String -> Date readDate' read' s = mkDateTime day nullTime (readTimeZone zone) where (day, zone) = read' s nullTime :: DiffTime nullTime = fromInteger 0 nullDay :: Day nullDay = fromGregorian 1 1 1 -- ---------------------------------------- -- | Reads a decimal from a string readDecimal :: String -> Rational readDecimal ('+':s) = readDecimal' s readDecimal ('-':s) = negate $ readDecimal' s readDecimal s = readDecimal' s -- | Helper function to read a decimal from a string readDecimal' :: String -> Rational readDecimal' s | f == 0 = (n % 1) | otherwise = (n % 1) + (f % (10 ^ (toInteger $ length fs))) where (ns, fs') = span (/= '.') s fs = drop 1 fs' f :: Integer f | null fs = 0 | otherwise = read fs n :: Integer n | null ns = 0 | otherwise = read ns -- -------------------- -- the show must go on showDateTime :: Date -> String showDateTime (Date d tz) = ymd ++ "T" ++ hms ++ showTimeZone tz where (ymd : hms : _) = words . show $ d showDate' :: (String -> String) -> Date -> String showDate' fmt (Date d tz) = fmt ymd ++ showTimeZone tz where (ymd : _) = words . show $ d dropRev :: Int -> String -> String dropRev i = reverse . drop i . reverse showDate :: Date -> String showDate = showDate' $ id showGYearMonth :: Date -> String showGYearMonth = showDate' $ dropRev 3 showGYear :: Date -> String showGYear = showDate' $ dropRev 6 showGMonthDay :: Date -> String showGMonthDay = showDate' $ ('-' :) . reverse . take 6 . reverse showGMonth :: Date -> String showGMonth = showDate' $ ('-' :) . reverse . take 3 . drop 3 . reverse showGDay :: Date -> String showGDay = showDate' $ ('-' :) . ('-' :) . reverse . take 3 . reverse -- it's showTime :: Date -> String showTime (Date d tz) = hms ++ showTimeZone tz where (_ymd : hms : _) = words . show $ d showTimeZone :: MaybeTimeZone -> String showTimeZone Nothing = "" showTimeZone (Just s) | s == 0 = "Z" | s > 0 = '+' : showHourMin s | otherwise = '-' : showHourMin (negate s) showHourMin :: Int -> String showHourMin s0 = showDec 2 (s `div` 60) ++ ":" ++ showDec 2 (s `mod` 60) where s = s0 `div` 60 showDec :: Int -> Int -> String showDec n = reverse . toStr n where toStr 0 _ = "" toStr l i = show (i `mod` 10) ++ toStr (l-1) (i `div` 10) -- ------------------------------------------------------------ -- | Creates a regex from a string rex :: String -> Regex rex regex | isZero ex = error $ "syntax error in regexp " ++ show regex ++ "." | otherwise = ex where ex = parseRegex regex -- ------------------------------------------------------------