{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TypeFamilies #-} -- | Parse TSN XML for the DTD \"earlylineXML.dtd\". For that DTD, -- each \ element contains a bunch of \s, and those -- \s contain a single \. In the database, we merge -- the date info into the games, and key the games to the messages. -- -- Real life is not so simple, however. There is another module, -- "TSN.XML.MLBEarlyLine" that is something of a subclass of this -- one. It contains early lines, but only for MLB games. The data -- types and XML schema are /almost/ the same, but TSN like to make -- things difficult. -- -- A full list of the differences is given in that module. In this -- one, we mention where data types have been twerked a little to -- support the second document type. -- module TSN.XML.EarlyLine ( EarlyLine, -- Used in TSN.XML.MLBEarlyLine EarlyLineGame, -- Used in TSN.XML.MLBEarlyLine dtd, pickle_message, -- * Tests early_line_tests, -- * WARNING: these are private but exported to silence warnings EarlyLineConstructor(..), EarlyLineGameConstructor(..) ) where -- System imports. import Control.Monad ( join ) import Data.Time ( UTCTime(..) ) import Data.Tuple.Curry ( uncurryN ) import qualified Data.Vector.HFixed as H ( HVector, convert ) import Database.Groundhog ( countAll, deleteAll, insert_, migrate ) import Database.Groundhog.Core ( DefaultKey ) import Database.Groundhog.Generic ( runDbConn, runMigrationSilent ) import Database.Groundhog.Sqlite ( withSqliteConn ) import Database.Groundhog.TH ( groundhog, mkPersist ) import qualified GHC.Generics as GHC ( Generic ) import Test.Tasty ( TestTree, testGroup ) import Test.Tasty.HUnit ( (@?=), testCase ) import Text.XML.HXT.Core ( PU, xp4Tuple, xp6Tuple, xp7Tuple, xpAttr, xpElem, xpInt, xpList, xpOption, xpPair, xpText, xpWrap ) -- Local imports. import Misc ( double_just ) import TSN.Codegen ( tsn_codegen_config ) import TSN.DbImport ( DbImport(..), ImportResult(..), run_dbmigrate ) import TSN.Picklers ( xp_ambiguous_time, xp_attr_option, xp_early_line_date, xp_time_stamp ) import TSN.XmlImport ( XmlImport(..) ) import Xml ( FromXml(..), ToDb(..), pickle_unpickle, unpickleable, unsafe_unpickle ) -- | The DTD to which this module corresponds. Used to invoke dbimport. -- dtd :: String dtd = "earlylineXML.dtd" -- -- * DB/XML data types -- -- * EarlyLine/Message -- | Database representation of a 'Message'. It lacks the \ -- elements since they're really properties of the games that they -- contain. -- data EarlyLine = EarlyLine { db_xml_file_id :: Int, db_heading :: String, db_category :: String, db_sport :: String, db_title :: String, db_time_stamp :: UTCTime } deriving (Eq, Show) -- | XML Representation of an 'EarlyLine'. It has the same -- fields, but in addition contains the 'xml_dates'. -- data Message = Message { xml_xml_file_id :: Int, xml_heading :: String, xml_category :: String, xml_sport :: String, xml_title :: String, xml_dates :: [EarlyLineDate], xml_time_stamp :: UTCTime } deriving (Eq, GHC.Generic, Show) -- | For 'H.convert'. -- instance H.HVector Message instance ToDb Message where -- | The database analogue of a 'Message' is an 'EarlyLine'. -- type Db Message = EarlyLine -- | The 'FromXml' instance for 'Message' is required for the -- 'XmlImport' instance. -- instance FromXml Message where -- | To convert a 'Message' to an 'EarlyLine', we just drop -- the 'xml_dates'. -- from_xml Message{..} = EarlyLine { db_xml_file_id = xml_xml_file_id, db_heading = xml_heading, db_category = xml_category, db_sport = xml_sport, db_title = xml_title, db_time_stamp = xml_time_stamp } -- | This allows us to insert the XML representation 'Message' -- directly. -- instance XmlImport Message -- * EarlyLineDate / EarlyLineGameWithNote -- | This is a very sad data type. It exists so that we can -- successfully unpickle/pickle the MLB_earlylineXML.dtd documents -- and get back what we started with. In that document type, the -- dates all have multiple \s associated with them (as -- children). But the dates also have multiple \s as -- children, and we're supposed to figure out which notes go with -- which games based on the order that they appear in the XML -- file. Yeah, right. -- -- In any case, instead of expecting the games and notes in some -- nice order, we use this data type to expect \"a game and maybe a -- note\" multiple times. This will pair the notes with only one -- game, rather than all of the games that TSN think it should go -- with. But it allows us to pickle and unpickle correctly at least. -- data EarlyLineGameWithNote = EarlyLineGameWithNote (Maybe String) -- date_note, unused EarlyLineGameXml -- date_game deriving (Eq, GHC.Generic, Show) -- | Accessor for the game within a 'EarlyLineGameWithNote'. We define -- this ourselves to avoid an unused field warning for date_note. -- date_game :: EarlyLineGameWithNote -> EarlyLineGameXml date_game (EarlyLineGameWithNote _ g) = g -- | For 'H.convert'. -- instance H.HVector EarlyLineGameWithNote -- | XML representation of a \. It has a \"value\" attribute -- containing the actual date string. As children it contains a -- (non-optional) note, and a game. The note and date value are -- properties of the game as far as I can tell. -- data EarlyLineDate = EarlyLineDate { date_value :: UTCTime, date_games_with_notes :: [EarlyLineGameWithNote] } deriving (Eq, GHC.Generic, Show) -- | For 'H.convert'. -- instance H.HVector EarlyLineDate -- * EarlyLineGame / EarlyLineGameXml -- | Database representation of a \ in earlylineXML.dtd and -- MLB_earlylineXML.dtd. We've had to make a sacrifice here to -- support both document types. Since it's not possible to pair the -- \s with \s reliably in MLB_earlylineXML.dtd, we -- have omitted the notes entirely. This is sad, but totally not our -- fault. -- -- In earlylineXML.dtd, each \ and thus each \ is -- paired with exactly one \, so if we only cared about that -- document type, we could have retained the notes. -- -- In earlylinexml.DTD, the over/under is required, but in -- MLB_earlylinexml.DTD it is not. So another compromise is to have -- it optional here. -- -- The 'db_game_time' should be the combined date/time using the -- date value from the \ element's containing -- \. That's why EarlyLineGame isn't an instance of -- 'FromXmlFk': the foreign key isn't enough to construct one, we -- also need the date. -- data EarlyLineGame = EarlyLineGame { db_early_lines_id :: DefaultKey EarlyLine, db_game_time :: UTCTime, -- ^ Combined date/time db_away_team :: EarlyLineGameTeam, db_home_team :: EarlyLineGameTeam, db_over_under :: Maybe String } -- | XML representation of a 'EarlyLineGame'. Comparatively, it lacks -- only the foreign key to the parent message. -- data EarlyLineGameXml = EarlyLineGameXml { xml_game_time :: Maybe UTCTime, -- ^ Only an ambiguous time string, -- e.g. \"8:30\". Can be empty. xml_away_team :: EarlyLineGameTeamXml, xml_home_team :: EarlyLineGameTeamXml, xml_over_under :: Maybe String } deriving (Eq, GHC.Generic, Show) -- | For 'H.convert'. -- instance H.HVector EarlyLineGameXml -- * EarlyLineGameTeam / EarlyLineGameTeamXml -- | Database representation of an EarlyLine team, used in both -- earlylineXML.dtd and MLB_earlylineXML.dtd. It doubles as an -- embedded type within the DB representation 'EarlyLineGame'. -- -- The team name is /not/ optional. However, since we're overloading -- the XML representation, we're constructing 'db_team_name' name -- from two Maybes, 'xml_team_name_attr' and -- 'xml_team_name_text'. To ensure type safety (and avoid a runtime -- crash), we allow the database field to be optional as well. -- data EarlyLineGameTeam = EarlyLineGameTeam { db_rotation_number :: Maybe Int, -- ^ Usually there but sometimes empty. db_line :: Maybe String, -- ^ Can be blank, a Double, or \"off\". db_team_name :: Maybe String, -- ^ NOT optional, see the data type docs. db_pitcher :: Maybe String -- ^ Optional in MLB_earlylineXML.dtd, -- always absent in earlylineXML.dtd. } -- | This here is an abomination. What we've got is an XML -- representation, not for either earlylineXML.dtd or -- MLB_earlylineXML.dtd, but one that will work for /both/. Even -- though they represent the teams totally differently! Argh! -- -- The earlylineXML.dtd teams look like, -- -- \Miami\ -- -- While the MLB_earlylineXML.dtd teams look like, -- -- -- D.Haren -- -130 -- -- -- So that's cool. This data type has placeholders that should allow -- the name/line to appear either as an attribute or as a text -- node. We'll sort it all out in the conversion to -- EarlyLineGameTeam. -- data EarlyLineGameTeamXml = EarlyLineGameTeamXml { xml_rotation_number :: Maybe Int, xml_line_attr :: Maybe String, xml_team_name_attr :: Maybe String, xml_team_name_text :: Maybe String, xml_pitcher :: Maybe String, xml_line_elem :: Maybe String } deriving (Eq, Show) instance ToDb EarlyLineGameTeamXml where -- | The database analogue of a 'EarlyLineGameTeamXml' is an -- 'EarlyLineGameTeam', although the DB type is merely embedded -- in another type. -- type Db EarlyLineGameTeamXml = EarlyLineGameTeam -- | The 'FromXml' instance for 'EarlyLineGameTeamXml' lets us convert -- it to a 'EarlyLineGameTeam' easily. -- instance FromXml EarlyLineGameTeamXml where -- | To convert a 'EarlyLineGameTeamXml' to an 'EarlyLineGameTeam', -- we figure how its fields were represented and choose the ones -- that are populated. For example if the \"line\" attribute was -- there, we'll use it, but if now, we'll use the \ -- element. -- from_xml EarlyLineGameTeamXml{..} = EarlyLineGameTeam { db_rotation_number = xml_rotation_number, db_line = merge xml_line_attr xml_line_elem, db_team_name = merge xml_team_name_attr xml_team_name_text, db_pitcher = xml_pitcher } where merge :: Maybe String -> Maybe String -> Maybe String merge Nothing y = y merge x Nothing = x merge _ _ = Nothing -- | Convert an 'EarlyLineDate' into a list of 'EarlyLineGame's. Each -- date has one or more games, and the fields that belong to the date -- should really be in the game anyway. So the database -- representation of a game has the combined fields of the XML -- date/game. -- -- This function gets the games out of a date, and then sticks the -- date value inside the games. It also adds the foreign key -- reference to the games' parent message, and returns the result. -- -- This would convert a single date to a single game if we only -- needed to support earlylineXML.dtd and not MLB_earlylineXML.dtd. -- date_to_games :: (DefaultKey EarlyLine) -> EarlyLineDate -> [EarlyLineGame] date_to_games fk date = map convert_game games_only where -- | Get the list of games out of a date (i.e. drop the notes). -- games_only :: [EarlyLineGameXml] games_only = (map date_game (date_games_with_notes date)) -- | Stick the date value into the given game. If our -- 'EarlyLineGameXml' has an 'xml_game_time', then we combine it -- with the day portion of the supplied @date@. If not, then we -- just use @date as-is. -- combine_date_time :: Maybe UTCTime -> UTCTime combine_date_time (Just t) = UTCTime (utctDay $ date_value date) (utctDayTime t) combine_date_time Nothing = date_value date -- | Convert an XML game to a database one. -- convert_game :: EarlyLineGameXml -> EarlyLineGame convert_game EarlyLineGameXml{..} = EarlyLineGame { db_early_lines_id = fk, db_game_time = combine_date_time xml_game_time, db_away_team = from_xml xml_away_team, db_home_team = from_xml xml_home_team, db_over_under = xml_over_under } -- -- * Database stuff -- instance DbImport Message where dbmigrate _ = run_dbmigrate $ do migrate (undefined :: EarlyLine) migrate (undefined :: EarlyLineGame) dbimport m = do -- Insert the message and obtain its ID. msg_id <- insert_xml m -- Create a function that will turn a list of dates into a list of -- games by converting each date to its own list of games, and -- then concatenating all of the game lists together. let convert_dates_to_games = concatMap (date_to_games msg_id) -- Now use it to make dem games. let games = convert_dates_to_games (xml_dates m) -- And insert all of them mapM_ insert_ games return ImportSucceeded mkPersist tsn_codegen_config [groundhog| - entity: EarlyLine dbName: early_lines constructors: - name: EarlyLine uniques: - name: unique_early_lines type: constraint # Prevent multiple imports of the same message. fields: [db_xml_file_id] - entity: EarlyLineGame dbName: early_lines_games constructors: - name: EarlyLineGame fields: - name: db_early_lines_id reference: onDelete: cascade - name: db_away_team embeddedType: - {name: rotation_number, dbName: away_team_rotation_number} - {name: line, dbName: away_team_line} - {name: team_name, dbName: away_team_name} - {name: pitcher, dbName: away_team_pitcher} - name: db_home_team embeddedType: - {name: rotation_number, dbName: home_team_rotation_number} - {name: line, dbName: home_team_line} - {name: team_name, dbName: home_team_name} - {name: pitcher, dbName: home_team_pitcher} - embedded: EarlyLineGameTeam fields: - name: db_rotation_number dbName: rotation_number - name: db_line dbName: line - name: db_team_name dbName: team_name - name: db_pitcher dbName: pitcher |] -- -- * Pickling -- -- | Pickler for the top-level 'Message'. -- pickle_message :: PU Message pickle_message = xpElem "message" $ xpWrap (from_tuple, H.convert) $ xp7Tuple (xpElem "XML_File_ID" xpInt) (xpElem "heading" xpText) (xpElem "category" xpText) (xpElem "sport" xpText) (xpElem "title" xpText) (xpList pickle_date) (xpElem "time_stamp" xp_time_stamp) where from_tuple = uncurryN Message -- | Pickler for a '\ followed by a \. We turn them into -- a 'EarlyLineGameWithNote'. -- pickle_game_with_note :: PU EarlyLineGameWithNote pickle_game_with_note = xpWrap (from_tuple, H.convert) $ xpPair (xpOption $ xpElem "note" xpText) pickle_game where from_tuple = uncurry EarlyLineGameWithNote -- | Pickler for the \ elements within each \. -- pickle_date :: PU EarlyLineDate pickle_date = xpElem "date" $ xpWrap (from_tuple, H.convert) $ xpPair (xpAttr "value" xp_early_line_date) (xpList pickle_game_with_note) where from_tuple = uncurry EarlyLineDate -- | Pickler for the \ elements within each \. -- pickle_game :: PU EarlyLineGameXml pickle_game = xpElem "game" $ xpWrap (from_tuple, H.convert) $ xp4Tuple (xpElem "time" (xpOption xp_ambiguous_time)) pickle_away_team pickle_home_team (xpElem "over_under" (xpOption xpText)) where from_tuple = uncurryN EarlyLineGameXml -- | Pickle an away team (\) element within a \. Most -- of the work (common with the home team pickler) is done by -- 'pickle_team'. -- pickle_away_team :: PU EarlyLineGameTeamXml pickle_away_team = xpElem "teamA" pickle_team -- | Pickle a home team (\) element within a \. Most -- of the work (common with theaway team pickler) is done by -- 'pickle_team'. -- pickle_home_team :: PU EarlyLineGameTeamXml pickle_home_team = xpElem "teamH" pickle_team -- | Team pickling common to both 'pickle_away_team' and -- 'pickle_home_team'. Handles everything inside the \ and -- \ elements. We try to parse the line/name as both an -- attribute and an element in order to accomodate -- MLB_earlylineXML.dtd. -- -- The \"line\" and \"pitcher\" fields wind up being double-Maybes, -- since they can be empty even if they exist. -- pickle_team :: PU EarlyLineGameTeamXml pickle_team = xpWrap (from_tuple, to_tuple') $ xp6Tuple (xpAttr "rotation" xp_attr_option) (xpOption $ xpAttr "line" (xpOption xpText)) (xpOption $ xpAttr "name" xpText) (xpOption xpText) (xpOption $ xpElem "pitcher" (xpOption xpText)) (xpOption $ xpElem "line" (xpOption xpText)) where from_tuple (u,v,w,x,y,z) = EarlyLineGameTeamXml u (join v) w x (join y) (join z) to_tuple' (EarlyLineGameTeamXml u v w x y z) = (u, double_just v, w, x, double_just y, double_just z) -- -- * Tasty Tests -- -- | A list of all tests for this module. -- early_line_tests :: TestTree early_line_tests = testGroup "EarlyLine tests" [ test_on_delete_cascade, test_pickle_of_unpickle_is_identity, test_unpickle_succeeds ] -- | If we unpickle something and then pickle it, we should wind up -- with the same thing we started with. WARNING: success of this -- test does not mean that unpickling succeeded. -- test_pickle_of_unpickle_is_identity :: TestTree test_pickle_of_unpickle_is_identity = testGroup "pickle-unpickle tests" [ check "pickle composed with unpickle is the identity" "test/xml/earlylineXML.xml", check "pickle composed with unpickle is the identity (empty game time)" "test/xml/earlylineXML-empty-game-time.xml" ] where check desc path = testCase desc $ do (expected, actual) <- pickle_unpickle pickle_message path actual @?= expected -- | Make sure we can actually unpickle these things. -- test_unpickle_succeeds :: TestTree test_unpickle_succeeds = testGroup "unpickle tests" [ check "unpickling succeeds" "test/xml/earlylineXML.xml", check "unpickling succeeds (empty game time)" "test/xml/earlylineXML-empty-game-time.xml" ] where check desc path = testCase desc $ do actual <- unpickleable path pickle_message let expected = True actual @?= expected -- | Make sure everything gets deleted when we delete the top-level -- record. -- test_on_delete_cascade :: TestTree test_on_delete_cascade = testGroup "cascading delete tests" [ check "deleting early_lines deletes its children" "test/xml/earlylineXML.xml", check "deleting early_lines deletes its children (empty game time)" "test/xml/earlylineXML-empty-game-time.xml" ] where check desc path = testCase desc $ do results <- unsafe_unpickle path pickle_message let a = undefined :: EarlyLine let b = undefined :: EarlyLineGame actual <- withSqliteConn ":memory:" $ runDbConn $ do runMigrationSilent $ do migrate a migrate b _ <- dbimport results deleteAll a count_a <- countAll a count_b <- countAll b return $ sum [count_a, count_b] let expected = 0 actual @?= expected