module TSN.Parse ( ParseError, format_parse_error, parse_game_id, parse_message, parse_schedule_id, parse_tests, parse_time_stamp, parse_xml_time_stamp, parse_xmlfid, time_format, time_stamp_format ) where import Data.Either.Utils ( maybeToEither ) import Data.Time.Clock ( UTCTime ) import Data.Time.Format ( parseTime ) import System.Locale ( defaultTimeLocale ) import Test.Tasty ( TestTree, testGroup ) import Test.Tasty.HUnit ( (@?=), testCase ) import Text.Read ( readMaybe ) import Text.XML.HXT.Core ( XmlTree, (>>>), (/>), getChildren, getText, hasName, runLA ) -- Local imports import Xml ( unsafe_read_document ) -- | When parsing an element from an XML document (like the -- XML_File_ID), there are a few things that can happen. First of -- all, it can work. Good for you. -- -- Or, you may find nothing. Like, the element is missing. We -- represent that with a 'ParseNotFound' containing the name of -- thing thing not-found as a 'String'. -- -- Finally, you could find something, but be unable to interpret it -- as the type you were expecting. For example, if you parse -- \"WHATSUP\" out of a \ which is supposed to contain -- integers. We represent this case with a 'ParseMismatch' -- containing the name of the thing that you were looking for, the -- value that had the unexpected type, and finally the name of the -- expected type (used in error messages). -- data ParseError = ParseNotFound String | ParseMismatch String String String deriving (Eq, Show) -- | Take a 'ParseError' and turn it into a human-readable description -- of the problem. -- format_parse_error :: ParseError -> String format_parse_error (ParseNotFound item) = "No " ++ item ++ " elements found." format_parse_error (ParseMismatch item val expected_type) = "Could not parse " ++ item ++ " " ++ val ++ " as " ++ expected_type ++ "." -- | Parse the \"message\" element out of a document tree and return -- it as an 'XmlTree'. We use an 'Either' for consistency. -- parse_message :: XmlTree -> Either ParseError XmlTree parse_message xmltree = case elements of [] -> Left $ ParseNotFound "message" (x:_) -> Right x where parse :: XmlTree -> [XmlTree] parse = runLA $ hasName "/" /> hasName "message" elements = parse xmltree -- | Parse an 'Int' from a direct descendent of the (top-level) -- \ element in an XmlTree. This is used to implement the -- XML_File_ID, game_id, and schedule_id (the last two are specific -- to "TSN.XML.GameInfo") parsers. -- -- If the parse fails, we return the corresponding 'ParseError' -- wrapped in a 'Left'. Otherwise the parsed value is returned in a -- 'Right'. -- parse_message_int :: String -> XmlTree -> Either ParseError Int parse_message_int child xmltree = case parse_results of [] -> Left $ ParseNotFound child (x:_) -> x where parse :: XmlTree -> [String] parse = runLA $ hasName "/" /> hasName "message" /> hasName child >>> getChildren >>> getText read_either_int :: String -> Either ParseError Int read_either_int s = maybeToEither (ParseMismatch child s "integer") (readMaybe s) elements = parse xmltree parse_results = map read_either_int elements -- | Parse an optional 'Int' from a direct descendent of the -- (top-level) \ element in an XmlTree. This is just like -- 'parse_message_int', except we expect the element/value to be -- missing sometimes. -- -- To handle the fact that the element/value is optional, we pattern -- match on the 'ParseError' that comes back in case of failure. If -- we didn't find anything, we turn that into a \"successful -- nothing\". But if we find a value and it can't be parsed, we let -- the error propagate, because that shouldn't happen. Of course, if -- the parse worked, that's nice too: we wrap the parsed value in a -- 'Just' and return that wrapped in a 'Right' -- parse_message_int_optional :: String -> XmlTree -> Either ParseError (Maybe Int) parse_message_int_optional child xmltree = case (parse_message_int child xmltree) of Left (ParseNotFound _) -> Right Nothing Left pm@(ParseMismatch {}) -> Left pm Right whatever -> Right (Just whatever) -- | Extract the \"XML_File_ID\" element from a document. If we fail -- to parse an XML_File_ID, we return an appropriate 'ParseError' -- wrapped in a 'Left' constructor. The reason should be one of two -- things: -- -- 1. No XML_File_ID elements were found. -- -- 2. An XML_File_ID element was found, but it could not be read -- into an Int. -- -- In general we expect some non-integer XML_File_IDs, because they -- appear on the feed. But the htsn daemon refuses to save them at -- the moment, so if we ever see an XML_File_ID that we can't parse, -- it's truly an error. -- parse_xmlfid :: XmlTree -> Either ParseError Int parse_xmlfid = parse_message_int "XML_File_ID" -- | Extract the \ element from within the top-level -- \ of a document. These appear in the "TSN.XML.GameInfo" -- documents. Unlike the \ elements, the \ -- can be missing from GameInfo documents, so for our implementation -- we use 'parse_message_int_optional' instead. -- parse_game_id :: XmlTree -> Either ParseError (Maybe Int) parse_game_id = parse_message_int_optional "game_id" -- | Extract the \ element from within the top-level -- \ of a document. Identical to 'parse_game_id' except -- for the element name. -- parse_schedule_id :: XmlTree -> Either ParseError (Maybe Int) parse_schedule_id = parse_message_int_optional "schedule_id" -- | The format string for times appearing in the feed. -- time_format :: String time_format = "%I:%M %p" -- | The format string for a time_stamp. We have removed the -- leading/trailing space so that parseTime and formatTime are NOT -- inverses of one another. We should be able to rectify this once -- everything is updated to support time-1.5. See, -- -- -- time_stamp_format :: String time_stamp_format = "%B %-d, %Y, at " ++ time_format ++ " ET" -- | Parse a time stamp from a 'String' (maybe). TSN doesn't provide a -- proper time zone name, so we parse it as UTC, and maybe our -- eventual consumer can figure out a way to deduce the time zone. -- parse_time_stamp :: String -> Maybe UTCTime parse_time_stamp = parseTime defaultTimeLocale time_stamp_format -- | Extract the \"time_stamp\" element from a document. If we fail to -- parse a time_stamp, we return an appropriate 'ParseError'. The -- reason should be one of two things: -- -- 1. No time_Stamp elements were found. -- -- 2. A time_stamp element was found, but it could not be read -- into a UTCTime. -- -- We don't expect to run into any time_stamps that we can't parse, -- and they can never be missing, so both conditions are truly -- errors. -- parse_xml_time_stamp :: XmlTree -> Either ParseError UTCTime parse_xml_time_stamp xmltree = case parse_results of [] -> Left $ ParseNotFound "time_stamp" (x:_) -> x where parse :: XmlTree -> [String] parse = runLA $ hasName "/" /> hasName "message" /> hasName "time_stamp" >>> getChildren >>> getText read_either_utctime :: String -> Either ParseError UTCTime read_either_utctime s = maybeToEither (ParseMismatch "time_stamp" s "date/time") (parse_time_stamp s) elements = parse xmltree parse_results = map read_either_utctime elements -- -- * Tests -- -- | A list of all tests for this module. -- parse_tests :: TestTree parse_tests = testGroup "TSN.Parse tests" [ test_parse_game_id, test_parse_missing_game_id, test_parse_missing_schedule_id, test_parse_schedule_id, test_parse_xmlfid ] where sample_path :: String sample_path = "test/xml/gameinfo/CBASK_Lineup_XML.xml" desc :: String -> String desc child = "a known " ++ child ++ " is parsed correctly" -- | Actual implementation of the test for parse_xmlfid, -- parse_game_id, and parse_schedule_id. -- test_child :: String -> Int -> TestTree test_child child expected = testCase (desc child) $ do xmltree <- unsafe_read_document sample_path let actual = parse_message_int child xmltree actual @?= (Right expected) -- | Make sure we can parse a game_id into the expected value. -- test_parse_game_id :: TestTree test_parse_game_id = test_child "game_id" 97865 -- | Make sure we can parse a schedule_id (different from the -- game_id) into the expected value. -- test_parse_schedule_id :: TestTree test_parse_schedule_id = test_child "schedule_id" 10199 -- | Make sure we can parse an XML_File_ID into the expected value. -- test_parse_xmlfid :: TestTree test_parse_xmlfid = test_child "XML_File_ID" 17 -- | The game_id element can be missing, so we test that too. -- test_parse_missing_game_id :: TestTree test_parse_missing_game_id = testCase "missing game_id is not an error" $ do xmltree <- unsafe_read_document "test/xml/gameinfo/MLB_Matchup_XML.xml" let actual = parse_game_id xmltree let expected = Right Nothing actual @?= expected -- | The schedule_id element can be missing, so we test that too. -- test_parse_missing_schedule_id :: TestTree test_parse_missing_schedule_id = testCase "missing schedule_id is not an error" $ do let path = "test/xml/gameinfo/recapxml-no-game-schedule-ids.xml" xmltree <- unsafe_read_document path let actual = parse_schedule_id xmltree let expected = Right Nothing actual @?= expected