-- | Minimal XML functionality needed to parse each document's -- XML_File_ID. -- module TSN.Xml ( parse_xmlfid, xml_tests ) where import Data.Either.Utils ( maybeToEither ) import Test.Tasty ( TestTree, testGroup ) import Test.Tasty.HUnit ( (@?=), Assertion, testCase ) import Text.Read ( readMaybe ) import Text.XML.HXT.Core ( (>>>), (/>), getChildren, getText, hasName, runLA, xreadDoc ) -- | A tiny parser written in HXT to extract the "XML_File_ID" element -- from a document. If we fail to parse an XML_File_ID, we return -- the reason wrapped in a 'Left' constructor. The reason should be -- one of two things: -- -- 1. No XML_File_ID elements were found. -- -- 2. An XML_File_ID element was found, but it could not be read -- into an Integer. -- -- We use an Either rather than a Maybe because we do expect some -- non-integer XML_File_IDs. In the examples, you will see -- NHL_DepthChart_XML.XML with an XML_File_ID of "49618.61" and -- CFL_Boxscore_XML1.xml with an XML_File_ID of "R28916". According -- to Brijesh Patel of TSN, these are special category files and not -- part of the usual feed. -- -- We want to report them differently, "just in case." -- parse_xmlfid :: String -- ^ The XML Document -> Either String Integer parse_xmlfid doc = case parse_results of [] -> Left "No XML_File_ID elements found." (x:_) -> x where parse :: String -> [String] parse = runLA (xreadDoc >>> hasName "message" /> hasName "XML_File_ID" >>> getChildren >>> getText) read_either_integer :: String -> Either String Integer read_either_integer s = let msg = "Could not parse XML_File_ID " ++ s ++ " as an integer." in maybeToEither msg (readMaybe s) elements = parse doc parse_results = map read_either_integer elements -- * Tasty Tests xml_tests :: TestTree xml_tests = testGroup "XML tests" [ xml_file_id_tests ] xml_file_id_tests :: TestTree xml_file_id_tests = testCase "XML_File_ID is parsed correctly" $ do let xmlfids = ["19908216", "19908216", "19908245", "19908246", "19908247"] mapM_ check xmlfids where check :: String -> Assertion check xmlfid = do xml <- readFile ("test/xml/" ++ xmlfid ++ ".xml") let actual = parse_xmlfid xml -- The maybeToEither should always succeed here, so the error -- message goes unused. let expected = maybeToEither "derp" (readMaybe xmlfid) actual @?= expected