name: htsn-import version: 0.2.0 cabal-version: >= 1.8 author: Michael Orlitzky maintainer: Michael Orlitzky category: Utils license: GPL-3 license-file: doc/LICENSE build-type: Simple extra-source-files: doc/dbschema/*.png doc/htsn-importrc.example doc/man1/htsn-import.1 doc/CHANGES.database doc/README.development doc/TODO makefile schema/*.dtd schemagen/AutoRacingResultsXML/*.xml schemagen/Auto_Racing_Schedule_XML/*.xml schemagen/CBASK_3PPctXML/*.xml schemagen/Cbask_All_Tourn_Teams_XML/*.xml schemagen/CBASK_AssistsXML/*.xml schemagen/Cbask_Awards_XML/*.xml schemagen/CBASK_BlocksXML/*.xml schemagen/Cbask_Conf_Standings_XML/*.xml schemagen/Cbask_DivII_III_Indv_Stats_XML/*.xml schemagen/Cbask_DivIII_Team_Stats_XML/*.xml schemagen/Cbask_DivII_Team_Stats_XML/*.xml schemagen/CBASK_FGPctXML/*.xml schemagen/CBASK_FoulsXML/*.xml schemagen/CBASK_FTPctXML/*.xml schemagen/Cbask_Indv_Scoring_XML/*.xml schemagen/CBASK_Lineup_XML/*.xml schemagen/CBASK_MinutesXML/*.xml schemagen/Cbask_Polls_XML/*.xml schemagen/cbaskpreviewxml/*.xml schemagen/CBASK_ReboundsXML/*.xml schemagen/CBASK_ScoringLeadersXML/*.xml schemagen/Cbask_Team_ThreePT_Made_XML/*.xml schemagen/Cbask_Team_ThreePT_PCT_XML/*.xml schemagen/Cbask_Team_Win_Pct_XML/*.xml schemagen/CBASK_TopTwentyFiveResult_XML/*.xml schemagen/Cbask_Top_Twenty_Five_XML/*.xml schemagen/Cbask_Tourn_Awards_XML/*.xml schemagen/Cbask_Tourn_Champs_XML/*.xml schemagen/Cbask_Tourn_Indiv_XML/*.xml schemagen/Cbask_Tourn_Leaders_XML/*.xml schemagen/Cbask_Tourn_MVP_XML/*.xml schemagen/Cbask_Tourn_Records_XML/*.xml schemagen/cflpreviewxml/*.xml schemagen/earlylineXML/*.xml schemagen/Heartbeat/*.xml schemagen/Injuries_Detail_XML/*.xml schemagen/injuriesxml/*.xml schemagen/jfilexml/*.xml schemagen/LeagueScheduleXML/*.xml schemagen/Matchup_NBA_NHL_XML/*.xml schemagen/Minor_Baseball_League_Leaders_XML/*.xml schemagen/Minor_Baseball_Standings_XML/*.xml schemagen/Minor_Baseball_Transactions_XML/*.xml schemagen/minorscoresxml/*.xml schemagen/mlbbattingavgxml/*.xml schemagen/mlbdoublesleadersxml/*.xml schemagen/MLB_ERA_Leaders/*.xml schemagen/MLB_Fielding_XML/*.xml schemagen/MLBGamesPlayedXML/*.xml schemagen/MLB_Gaming_Matchup_XML/*.xml schemagen/MLBGIDPXML/*.xml schemagen/MLBHitByPitchXML/*.xml schemagen/mlbhitsleadersxml/*.xml schemagen/mlbhomerunsxml/*.xml schemagen/MLBHRFreqXML/*.xml schemagen/MLBIntWalksXML/*.xml schemagen/MLBKORateXML/*.xml schemagen/MLB_Lineup_XML/*.xml schemagen/MLB_Matchup_XML/*.xml schemagen/mlbonbasepctxml/*.xml schemagen/MLBOPSXML/*.xml schemagen/MLB_earlylineXML/*.xml schemagen/MLB_Pitching_Appearances_Leaders/*.xml schemagen/MLB_Pitching_Balks_Leaders/*.xml schemagen/MLB_Pitching_CG_Leaders/*.xml schemagen/MLB_Pitching_ER_Allowed_Leaders/*.xml schemagen/MLB_Pitching_Hit_Batters_Leaders/*.xml schemagen/MLB_Pitching_Hits_Allowed_Leaders/*.xml schemagen/MLB_Pitching_HR_Allowed_Leaders/*.xml schemagen/MLB_Pitching_IP_Leaders/*.xml schemagen/MLB_Pitching_Runs_Allowed_Leaders/*.xml schemagen/MLB_Pitching_Saves_Leaders/*.xml schemagen/MLB_Pitching_Shut_Outs_Leaders/*.xml schemagen/MLB_Pitching_Starts_Leaders/*.xml schemagen/MLB_Pitching_Strike_Outs_Leaders/*.xml schemagen/MLB_Pitching_Walks_Leaders/*.xml schemagen/MLB_Pitching_WHIP_Leaders/*.xml schemagen/MLB_Pitching_Wild_Pitches_Leaders/*.xml schemagen/MLB_Pitching_Win_Percentage_Leaders/*.xml schemagen/MLB_Pitching_WL_Leaders/*.xml schemagen/MLBPlateAppsXML/*.xml schemagen/mlbpreviewxml/*.xml schemagen/mlbrbisxml/*.xml schemagen/mlbrunsleadersxml/*.xml schemagen/MLBSacFliesXML/*.xml schemagen/MLBSacrificesXML/*.xml schemagen/MLBSBSuccessXML/*.xml schemagen/mlbsluggingpctxml/*.xml schemagen/mlbstandxml/*.xml schemagen/mlbstandxml_preseason/*.xml schemagen/mlbstolenbasexml/*.xml schemagen/mlbtotalbasesleadersxml/*.xml schemagen/mlbtriplesleadersxml/*.xml schemagen/MLBWalkRateXML/*.xml schemagen/mlbwalksleadersxml/*.xml schemagen/MLBXtraBaseHitsXML/*.xml schemagen/MLS_Preview_XML/*.xml schemagen/NBA3PPctXML/*.xml schemagen/NBAAssistsXML/*.xml schemagen/NBABlocksXML/*.xml schemagen/nbaconfrecxml/*.xml schemagen/nbadaysxml/*.xml schemagen/nbadivisionsxml/*.xml schemagen/NBAFGPctXML/*.xml schemagen/NBAFoulsXML/*.xml schemagen/NBAFTPctXML/*.xml schemagen/NBA_Gaming_Matchup_XML/*.xml schemagen/NBALineupXML/*.xml schemagen/NBAMinutesXML/*.xml schemagen/NBA_Playoff_Matchup_XML/*.xml schemagen/nbapreviewxml/*.xml schemagen/NBAReboundsXML/*.xml schemagen/NBAScorersXML/*.xml schemagen/nbastandxml/*.xml schemagen/NBAStealsXML/*.xml schemagen/nbateamleadersxml/*.xml schemagen/NBA_Team_Stats_XML/*.xml schemagen/nbatripledoublexml/*.xml schemagen/NBATurnoversXML/*.xml schemagen/NCAA_Conference_Schedule_XML/*.xml schemagen/NCAA_FB_Preview_XML/*.xml schemagen/newsxml/*.xml schemagen/nflfirstdownxml/*.xml schemagen/NFLFumbleLeaderXML/*.xml schemagen/NFLGiveTakeXML/*.xml schemagen/NFLInside20XML/*.xml schemagen/NFL_KickingLeaders_XML/*.xml schemagen/NFLKickoffsXML/*.xml schemagen/NFLMondayNightXML/*.xml schemagen/NFL_NBA_Draft_XML/*.xml schemagen/NFL_NCAA_FB_Matchup_XML/*.xml schemagen/NFLPassLeadXML/*.xml schemagen/nflpreviewxml/*.xml schemagen/NFLQBStartsXML/*.xml schemagen/NFL_Roster_XML/*.xml schemagen/NFLSackLeadersXML/*.xml schemagen/nflstandxml/*.xml schemagen/NFLTeamRankingsXML/*.xml schemagen/NFL_Team_Stats_XML/*.xml schemagen/NFLTopPerformanceXML/*.xml schemagen/NFLTotalYardageXML/*.xml schemagen/nhlpreviewxml/*.xml schemagen/Odds_XML/*.xml schemagen/recapxml/*.xml schemagen/Schedule_Changes_XML/*.xml schemagen/scoresxml/*.xml schemagen/Transactions_XML/*.xml schemagen/weatherxml/*.xml schemagen/Weekly_Sched_XML/*.xml schemagen/WNBA3PPctXML/*.xml schemagen/WNBAAssistsXML/*.xml schemagen/WNBABlocksXML/*.xml schemagen/WNBAFGPctXML/*.xml schemagen/WNBAFoulsXML/*.xml schemagen/WNBAFTPctXML/*.xml schemagen/WNBAMinutesXML/*.xml schemagen/WNBAReboundsXML/*.xml schemagen/WNBAScorersXML/*.xml schemagen/wnbastandxml/*.xml schemagen/WNBAStealsXML/*.xml schemagen/WNBA_Team_Leaders_XML/*.xml schemagen/WNBATurnoversXML/*.xml schemagen/WorldBaseballPreviewXML/*.xml test/shell/*.test test/xml/*.xml test/xml/*.dtd test/xml/gameinfo/*.xml test/xml/gameinfo/*.dtd test/xml/sportinfo/*.xml test/xml/sportinfo/*.dtd synopsis: Import XML files from The Sports Network into an RDBMS. description: /Usage/: . @ htsn-import [OPTIONS] [FILES] @ . The Sports Network offers an XML feed containing various sports news and statistics. Our sister program /htsn/ is capable of retrieving the feed and saving the individual XML documents contained therein. But what to do with them? . The purpose of /htsn-import/ is to take these XML documents and get them into something we can use, a relational database management system (RDBMS), i.e. \"a SQL database\". The structure of relational database, is, well, relational, and the feed XML is not. So there is some work to do before the data can be inserted. . First, we must parse the XML. Each supported document type (see below) has a full pickle/unpickle implementation (\"pickle\" is simply a synonym for serialize here). That means that we parse the entire document into a data structure, and if we pickle (serialize) that data structure, we get the exact same XML document tha we started with. . This is important for two reasons. First, it serves as a second level of validation. The first validation is performed by the XML parser, but if that succeeds and unpicking fails, we know that something is fishy. Second, we don't ever want to be surprised by some new element or attribute showing up in the XML. The fact that we can unpickle the whole thing now means that we won't be surprised in the future. . The aforementioned feature is especially important because we automatically migrate the database schema every time we import a document. If you attempt to import a \"newsxml.dtd\" document, all database objects relating to the news will be created if they do not exist. We don't want the schema to change out from under us without warning, so it's important that no XML be parsed that would result in a different schema than we had previously. Since we can pickle/unpickle everything already, this should be impossible. . Examples and usage documentation are available in the man page. executable htsn-import build-depends: base >= 4.6 && < 5, cmdargs >= 0.10.6, configurator >= 0.2, directory >= 1.2, filepath >= 1.3, hslogger >= 1.2, htsn-common >= 0.0.1, hxt >= 9.3, groundhog >= 0.5, groundhog-postgresql >= 0.5, groundhog-sqlite >= 0.5, groundhog-th >= 0.5, MissingH >= 1.2, old-locale >= 1, split >= 0.2, tasty >= 0.8, tasty-hunit >= 0.8, time >= 1.4, transformers >= 0.3, tuple >= 0.2 main-is: Main.hs hs-source-dirs: src/ other-modules: Backend CommandLine Configuration ConnectionString ExitCodes OptionalConfiguration TSN.Codegen TSN.Database TSN.DbImport TSN.Location TSN.Parse TSN.Picklers TSN.Team TSN.XmlImport TSN.XML.AutoRacingResults TSN.XML.AutoRacingSchedule TSN.XML.EarlyLine TSN.XML.GameInfo TSN.XML.Heartbeat TSN.XML.Injuries TSN.XML.InjuriesDetail TSN.XML.JFile TSN.XML.MLBEarlyLine TSN.XML.News TSN.XML.Odds TSN.XML.ScheduleChanges TSN.XML.Scores TSN.XML.SportInfo TSN.XML.Weather Xml ghc-options: -Wall -fwarn-hi-shadowing -fwarn-missing-signatures -fwarn-name-shadowing -fwarn-orphans -fwarn-type-defaults -fwarn-tabs -fwarn-incomplete-record-updates -fwarn-monomorphism-restriction -fwarn-unused-do-bind -O2 ghc-prof-options: -prof -fprof-auto -fprof-cafs -- The following unbreak profiling with template haskell. We have -- to build the program twice; once without profile and again with -- these flags. -hisuf hi_p -osuf o_p test-suite testsuite type: exitcode-stdio-1.0 hs-source-dirs: src test main-is: TestSuite.hs build-depends: base >= 4.6 && < 5, cmdargs >= 0.10.6, configurator >= 0.2, directory >= 1.2, filepath >= 1.3, hslogger >= 1.2, htsn-common >= 0.0.1, hxt >= 9.3, groundhog >= 0.5, groundhog-postgresql >= 0.5, groundhog-sqlite >= 0.5, groundhog-th >= 0.5, MissingH >= 1.2, old-locale >= 1, split >= 0.2, tasty >= 0.8, tasty-hunit >= 0.8, time >= 1.4, transformers >= 0.3, tuple >= 0.2 -- It's not entirely clear to me why I have to reproduce all of this. ghc-options: -Wall -fwarn-hi-shadowing -fwarn-missing-signatures -fwarn-name-shadowing -fwarn-orphans -fwarn-type-defaults -fwarn-tabs -fwarn-incomplete-record-updates -fwarn-monomorphism-restriction -fwarn-unused-do-bind -O2 test-suite doctests type: exitcode-stdio-1.0 hs-source-dirs: test main-is: Doctests.hs build-depends: base >= 4.6 && < 5, -- Additional test dependencies. doctest >= 0.9 -- It's not entirely clear to me why I have to reproduce all of this. ghc-options: -Wall -fwarn-hi-shadowing -fwarn-missing-signatures -fwarn-name-shadowing -fwarn-orphans -fwarn-type-defaults -fwarn-tabs -fwarn-incomplete-record-updates -fwarn-monomorphism-restriction -fwarn-unused-do-bind -rtsopts -threaded -optc-O3 -optc-march=native -O2 -- These won't work without shelltestrunner installed in your -- $PATH. Maybe there is some way to tell Cabal that. test-suite shelltests type: exitcode-stdio-1.0 hs-source-dirs: test main-is: ShellTests.hs build-depends: base >= 4.6 && < 5, cmdargs >= 0.10.6, configurator >= 0.2, directory >= 1.2, filepath >= 1.3, hslogger >= 1.2, htsn-common >= 0.0.1, hxt >= 9.3, groundhog >= 0.5, groundhog-postgresql >= 0.5, groundhog-sqlite >= 0.5, groundhog-th >= 0.5, MissingH >= 1.2, old-locale >= 1, split >= 0.2, process >= 1.1, tasty >= 0.8, tasty-hunit >= 0.8, time >= 1.4, transformers >= 0.3, tuple >= 0.2 source-repository head type: git location: http://michael.orlitzky.com/git/htsn-import.git branch: master