{-# LANGUAGE ScopedTypeVariables #-} -- | (Un)picklers for data types present in The Sports Network XML -- feed. -- module TSN.Picklers ( pickler_tests, xp_ambiguous_time, xp_attr_option, xp_date, xp_date_padded, xp_datetime, xp_early_line_date, xp_earnings, xp_fracpart_only_double, xp_gamedate, xp_tba_time, xp_time, xp_time_dots, xp_time_stamp ) where -- System imports. import Data.Char ( toUpper ) import Data.List ( intercalate ) import Data.List.Split ( chunksOf ) import Data.Maybe ( catMaybes, listToMaybe ) import Data.String.Utils ( replace ) import Data.Time.Clock ( UTCTime ) import Data.Time.Format ( formatTime, parseTime ) import Data.Tree.NTree.TypeDefs ( NTree(..) ) import System.Locale ( TimeLocale( wDays, months ), defaultTimeLocale ) import Test.Tasty ( TestTree, testGroup ) import Test.Tasty.HUnit ( (@?=), testCase ) import Text.Read ( readMaybe ) import Text.XML.HXT.Arrow.Pickle ( xpText, xpWrap, xpWrapMaybe ) import Text.XML.HXT.Arrow.Pickle.Xml ( PU ) import Text.XML.HXT.Core ( XmlTree, XNode( XTag, XText ), mkName, pickleDoc, unpickleDoc ) -- Local imports. import TSN.Parse ( parse_time_stamp, time_format, time_stamp_format ) -- | The format string for a base date in m/d/yyyy format. The -- day/month are not padded at all. This will match for example, -- -- * 2\/15\/1983 -- -- * 1\/1\/0000 -- date_format :: String date_format = "%-m/%-d/%Y" -- | The format string for a base date in mm/dd/yyyy format. The -- day/month are padded to two characters with zeros. This will -- match for example, -- -- * 02\/15\/1983 -- -- * 01\/01\/0000 -- date_format_padded :: String date_format_padded = "%0m/%0d/%Y" -- | (Un)pickle a UTCTime without the time portion. -- -- /Examples/: -- -- This should parse: -- -- >>> let tn = text_node "2/15/1983" -- >>> unpickleDoc xp_date tn -- Just 1983-02-15 00:00:00 UTC -- -- But for some reason, it can also parse a leading zero in the -- month. Whatever. This isn't required behavior. -- -- >>> let tn = text_node "02/15/1983" -- >>> unpickleDoc xp_date tn -- Just 1983-02-15 00:00:00 UTC -- xp_date :: PU UTCTime xp_date = (to_date, from_date) `xpWrapMaybe` xpText where to_date :: String -> Maybe UTCTime to_date = parseTime defaultTimeLocale date_format from_date :: UTCTime -> String from_date = formatTime defaultTimeLocale date_format -- | (Un)pickle a UTCTime without the time portion. The day/month are -- padded to two characters with zeros. -- -- Examples: -- -- >>> let tn = text_node "02/15/1983" -- >>> unpickleDoc xp_date_padded tn -- Just 1983-02-15 00:00:00 UTC -- -- >>> let tn = text_node "06/07/2014" -- >>> unpickleDoc xp_date_padded tn -- Just 2014-06-07 00:00:00 UTC -- xp_date_padded :: PU UTCTime xp_date_padded = (to_date, from_date) `xpWrapMaybe` xpText where to_date :: String -> Maybe UTCTime to_date = parseTime defaultTimeLocale date_format_padded from_date :: UTCTime -> String from_date = formatTime defaultTimeLocale date_format_padded -- | Format a number as a string using a comma as the thousands -- separator. -- -- Examples: -- -- >>> format_commas 0 -- "0" -- >>> format_commas 10 -- "10" -- >>> format_commas 100 -- "100" -- >>> format_commas 1000 -- "1,000" -- >>> format_commas 10000 -- "10,000" -- >>> format_commas 100000 -- "100,000" -- >>> format_commas 1000000 -- "1,000,000" -- format_commas :: Int -> String format_commas x = reverse (intercalate "," $ chunksOf 3 $ reverse $ show x) -- | Parse \ from an 'AutoRaceResultsListing'. These are -- essentially 'Int's, but they look like, -- -- * \336,826\ -- -- * \1,000,191\ -- -- * \TBA\ -- -- Examples: -- -- >>> let tn = text_node "1,000,191" -- >>> unpickleDoc xp_earnings tn -- Just (Just 1000191) -- -- >>> let tn = text_node "TBA" -- >>> unpickleDoc xp_earnings tn -- Just Nothing -- xp_earnings :: PU (Maybe Int) xp_earnings = (to_earnings, from_earnings) `xpWrap` xpText where strip_commas :: String -> String strip_commas = replace "," "" to_earnings :: String -> Maybe Int to_earnings s | s == "TBA" = Nothing | otherwise = Just $ (read . strip_commas) s from_earnings :: Maybe Int -> String from_earnings Nothing = "TBA" from_earnings (Just i) = format_commas i -- | Pickle a 'Double' that can be missing its leading zero (for -- values less than one). For example, we've seen, -- -- 0.5 -- -- Which 'xpPrim' can't handle without the leading -- zero. Unfortunately there's no way pickle/unpickle can be -- inverses of each other here, since \"0.5\" and \".5\" should -- unpickle to the same 'Double'. -- -- Examples: -- -- >>> let tn = text_node "0.5" -- >>> unpickleDoc xp_fracpart_only_double tn -- Just 0.5 -- -- >>> let tn = text_node ".5" -- >>> unpickleDoc xp_fracpart_only_double tn -- Just 0.5 -- -- >>> let tn = text_node "foo" -- >>> unpickleDoc xp_fracpart_only_double tn -- Nothing -- xp_fracpart_only_double :: PU Double xp_fracpart_only_double = (to_double, from_double) `xpWrapMaybe` xpText where -- | Convert a 'String' to a 'Double', maybe. We always prepend a -- zero, since it will fix the fraction-only values, and not hurt -- the ones that already have a leading integer. to_double :: String -> Maybe Double to_double s = readMaybe ("0" ++ s) from_double :: Double -> String from_double = show -- | (Un)pickle an unpadded 'UTCTime'. Used for example on the -- \ elements in an 'AutoRaceResults' message. -- -- Examples: -- -- >>> let tn = text_node "6/1/2014 1:00:00 PM" -- >>> unpickleDoc xp_datetime tn -- Just 2014-06-01 13:00:00 UTC -- -- >>> let tn = text_node "5/24/2014 2:45:00 PM" -- >>> unpickleDoc xp_datetime tn -- Just 2014-05-24 14:45:00 UTC -- -- Padded! For some reason it works with only one zero in front. I -- dunno man. NOT required (or even desired?) behavior. -- -- >>> let tn = text_node "05/24/2014 2:45:00 PM" -- >>> unpickleDoc xp_datetime tn -- Just 2014-05-24 14:45:00 UTC -- xp_datetime :: PU UTCTime xp_datetime = (to_datetime, from_datetime) `xpWrapMaybe` xpText where format = date_format ++ " " ++ "%-I:%M:%S %p" to_datetime :: String -> Maybe UTCTime to_datetime = parseTime defaultTimeLocale format from_datetime :: UTCTime -> String from_datetime = formatTime defaultTimeLocale format -- | Takes a 'UTCTime', and returns the English suffix that would be -- appropriate after the day of the month. For example, if we have a -- UTCTime representing Christmas, this would return \"th\" because -- \"th\" is the right suffix of \"December 25th\". -- -- Examples: -- -- >>> import Data.Maybe ( fromJust ) -- >>> :{ -- let parse_date :: String -> Maybe UTCTime -- parse_date = parseTime defaultTimeLocale date_format -- :} -- -- >>> let dates = [ "1/" ++ (d : "/1970") | d <- ['1'..'9'] ] -- >>> let suffixes = map (date_suffix . fromJust . parse_date) dates -- >>> suffixes -- ["st","nd","rd","th","th","th","th","th","th"] -- date_suffix :: UTCTime -> String date_suffix t = case (reverse daystr) of [] -> [] ('1':_) -> "st" ('2':_) -> "nd" ('3':_) -> "rd" _ -> "th" where daystr = formatTime defaultTimeLocale "%d" t -- | (Un)pickle a UTCTime from a weather forecast's gamedate. Example -- input looks like, -- -- When unpickling we get rid of the suffixes \"st\", \"nd\", \"rd\", and -- \"th\". During pickling, we add them back based on the last digit -- of the date. -- -- Examples: -- -- >>> let tn = text_node "Monday, December 30th" -- >>> let (Just gd) = unpickleDoc xp_gamedate tn -- >>> gd -- 1970-12-30 00:00:00 UTC -- >>> pickleDoc xp_gamedate gd -- NTree (XTag "/" []) [NTree (XText "Wednesday, December 30th") []] -- xp_gamedate :: PU UTCTime xp_gamedate = (to_gamedate, from_gamedate) `xpWrapMaybe` xpText where format = "%A, %B %-d" to_gamedate :: String -> Maybe UTCTime to_gamedate s = parseTime defaultTimeLocale format s' where s' = case (reverse s) of (c2:c1:cs) -> let suffix = [c1,c2] in if suffix `elem` ["st","nd","rd","th"] then reverse cs else s -- Unknown suffix, leave it alone. _ -> s -- The String is less than two characters long, -- leave it alone. from_gamedate :: UTCTime -> String from_gamedate d = s ++ (date_suffix d) where s = formatTime defaultTimeLocale format d -- | (Un)pickle a UTCTime without the date portion. Doesn't work if -- the fields aren't zero-padded to two characters. -- -- /Examples/: -- -- Padded, should work: -- -- >>> let tn = text_node "04:35 PM" -- >>> unpickleDoc xp_time tn -- Just 1970-01-01 16:35:00 UTC -- -- Unpadded, should fail: -- -- >>> let tn = text_node "4:35 PM" -- >>> unpickleDoc xp_time tn -- Nothing -- xp_time :: PU UTCTime xp_time = (to_time, from_time) `xpWrapMaybe` xpText where to_time :: String -> Maybe UTCTime to_time = parseTime defaultTimeLocale time_format from_time :: UTCTime -> String from_time = formatTime defaultTimeLocale time_format -- | (Un)pickle a UTCTime without the date portion. This differs from -- 'xp_time' in that it uses periods in the AM/PM part, i.e. \"A.M.\" -- and \"P.M.\" It also doesn't use padding for the \"hours\" part. -- -- /Examples/: -- -- A standard example of the correct form: -- -- >>> let tn = text_node "11:30 A.M." -- >>> let (Just result) = unpickleDoc xp_time_dots tn -- >>> result -- 1970-01-01 11:30:00 UTC -- >>> pickleDoc xp_time_dots result -- NTree (XTag "/" []) [NTree (XText "11:30 A.M.") []] -- -- Another miracle, it still parses with a leading zero! -- -- >>> let tn = text_node "01:30 A.M." -- >>> unpickleDoc xp_time_dots tn -- Just 1970-01-01 01:30:00 UTC -- xp_time_dots :: PU UTCTime xp_time_dots = (to_time, from_time) `xpWrapMaybe` xpText where -- | The hours arent padded with zeros. nopad_time_format :: String nopad_time_format = "%-I:%M %p" to_time :: String -> Maybe UTCTime to_time = (parseTime defaultTimeLocale nopad_time_format) . (replace "." "") from_time :: UTCTime -> String from_time t = replace "AM" "A.M." (replace "PM" "P.M." s) where s = formatTime defaultTimeLocale nopad_time_format t -- | (Un)pickle a UTCTime without the date portion, allowing for a -- value of \"TBA\" (which gets translated to 'Nothing'). -- -- /Examples/: -- -- A failed parse will return 'Nothing': -- -- >>> let tn = text_node "YO" -- >>> unpickleDoc xp_tba_time tn -- Just Nothing -- -- And so will parsing a \"TBA\": -- -- >>> let tn = text_node "TBA" -- >>> unpickleDoc xp_tba_time tn -- Just Nothing -- -- But re-pickling 'Nothing' gives only \"TBA\": -- -- >>> pickleDoc xp_tba_time Nothing -- NTree (XTag "/" []) [NTree (XText "TBA") []] -- -- A normal time is also parsed successfully, of course: -- -- >>> let tn = text_node "08:10 PM" -- >>> unpickleDoc xp_tba_time tn -- Just (Just 1970-01-01 20:10:00 UTC) -- xp_tba_time :: PU (Maybe UTCTime) xp_tba_time = (to_time, from_time) `xpWrap` xpText where to_time :: String -> Maybe UTCTime to_time s | s == "TBA" = Nothing | otherwise = parseTime defaultTimeLocale time_format s from_time :: Maybe UTCTime -> String from_time Nothing = "TBA" from_time (Just t) = formatTime defaultTimeLocale time_format t -- | (Un)pickle the \ element format to/from a 'UTCTime'. -- The time_stamp elements look something like, -- -- \ January 6, 2014, at 10:11 PM ET \ -- -- TSN doesn't provide a proper time zone name, only \"ET\" for -- \"Eastern Time\". But \"Eastern Time\" changes throughout the -- year, depending on one's location, for daylight-savings -- time. It's really not any more useful to be off by one hour than -- it is to be off by 5 hours, so rather than guess at EDT/EST, we -- just store the timestamp as UTC. -- -- Examples: -- -- >>> let tn = text_node " January 6, 2014, at 10:11 PM ET " -- >>> let (Just tstamp) = unpickleDoc xp_time_stamp tn -- >>> tstamp -- 2014-01-06 22:11:00 UTC -- >>> pickleDoc xp_time_stamp tstamp -- NTree (XTag "/" []) [NTree (XText " January 6, 2014, at 10:11 PM ET ") []] -- xp_time_stamp :: PU UTCTime xp_time_stamp = (parse_time_stamp, from_time_stamp) `xpWrapMaybe` xpText where -- | We have to re-pad the time_stamp_format with a leading and -- trailing space; see the documentation of 'time_stamp_format' -- for more information. from_time_stamp :: UTCTime -> String from_time_stamp = formatTime defaultTimeLocale (" " ++ time_stamp_format ++ " ") -- | (Un)pickle an ambiguous 12-hour AM/PM time, which is ambiguous -- because it's missing the AM/PM part. -- -- Examples: -- -- >>> let tn = text_node "8:00" -- >>> unpickleDoc xp_ambiguous_time tn -- Just 1970-01-01 08:00:00 UTC -- xp_ambiguous_time :: PU UTCTime xp_ambiguous_time = (to_time, from_time) `xpWrapMaybe` xpText where ambiguous_time_format :: String ambiguous_time_format = "%-I:%M" to_time :: String -> Maybe UTCTime to_time = parseTime defaultTimeLocale ambiguous_time_format from_time :: UTCTime -> String from_time = formatTime defaultTimeLocale ambiguous_time_format -- | Pickle a date value from a \ element as they appear in the -- early lines. This is a particularly wacky format, but then so is -- the associated time (see 'xp_ambiguous_time'). -- -- Examples: -- -- >>> let tn = text_node "SUNDAY, MAY 25TH (05/25/2014)" -- >>> let (Just result) = unpickleDoc xp_early_line_date tn -- >>> result -- 2014-05-25 00:00:00 UTC -- >>> pickleDoc xp_early_line_date result -- NTree (XTag "/" []) [NTree (XText "SUNDAY, MAY 25TH (05/25/2014)") []] -- -- >>> let tn = text_node "SATURDAY, JUNE 7TH (06/07/2014)" -- >>> let (Just result) = unpickleDoc xp_early_line_date tn -- >>> result -- 2014-06-07 00:00:00 UTC -- >>> pickleDoc xp_early_line_date result -- NTree (XTag "/" []) [NTree (XText "SATURDAY, JUNE 7TH (06/07/2014)") []] -- xp_early_line_date :: PU UTCTime xp_early_line_date = (to_time, from_time) `xpWrapMaybe` xpText where -- | We need to create our own time locale that talks IN ALL CAPS. -- Actually, 'parseTime' doesn't seem to care about the -- case. But when we spit it back out again ('formatTime'), -- we'll want it to be in all caps. -- caps_time_locale :: TimeLocale caps_time_locale = defaultTimeLocale { wDays = caps_days, months = caps_months } caps_days :: [(String,String)] caps_days = map both_to_upper (wDays defaultTimeLocale) caps_months :: [(String,String)] caps_months = map both_to_upper (months defaultTimeLocale) both_to_upper :: (String,String) -> (String,String) both_to_upper (s1,s2) = (map toUpper s1, map toUpper s2) wacko_date_formats :: [String] wacko_date_formats = ["%A, %B %-d" ++ suffix ++ " (" ++ date_format_padded ++ ")" | suffix <- ["ST", "ND", "RD","TH"] ] to_time :: String -> Maybe UTCTime to_time s = listToMaybe $ catMaybes possible_parses where possible_parses = [ parseTime caps_time_locale fmt s | fmt <- wacko_date_formats ] from_time :: UTCTime -> String from_time t = formatTime caps_time_locale fmt t where upper_suffix = map toUpper (date_suffix t) fmt = "%A, %B %-d" ++ upper_suffix ++ " (" ++ date_format_padded ++ ")" -- | This is a replacement for @xpOption xpFoo@ within an 'xpAttr'. -- There's a bug in -- newer versions of HXT that prevents us from using the usual -- 'xpOption' solution, so this is our stopgap. It should work on -- any type that can be unpickled with a plain read/show. -- xp_attr_option :: forall a. (Read a, Show a) => PU (Maybe a) xp_attr_option = (to_a, from_a) `xpWrap` xpText where to_a :: String -> Maybe a to_a = readMaybe from_a :: Maybe a -> String from_a Nothing = "" from_a (Just x) = show x -- | Create an 'XmlTree' containing only the given text. This is -- useful for testing (un)picklers, where we don't want to have to -- bother to create a dummy XML document. -- -- Examples: -- -- >>> text_node "8:00" -- NTree (XText "8:00") [] -- text_node :: String -> XmlTree text_node s = NTree (XText s) [] -- -- * Tasty Tests -- -- | A list of all tests for this module. This primary exists to -- eliminate the unused import/export warnings for 'unpickleDoc' and -- 'text_node' which are otherwise only used in the doctests. -- pickler_tests :: TestTree pickler_tests = testGroup "Pickler tests" [ test_pickle_of_unpickle_is_identity ] -- | If we unpickle something and then pickle it, we should wind up -- with the same thing we started with (plus an additional root -- element). -- test_pickle_of_unpickle_is_identity :: TestTree test_pickle_of_unpickle_is_identity = testCase "pickle composed with unpickle is (almost) the identity" $ do let tn = text_node "8:00" let (Just utctime) = unpickleDoc xp_ambiguous_time tn let actual = pickleDoc xp_ambiguous_time utctime let expected = NTree (XTag (mkName "/") []) [tn] actual @?= expected