{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}

-- | Parse TSN XML for the DTD \"newsxml.dtd\". Each document contains
--   a root element \<message\> that contains an entire news item.
--
module TSN.XML.News (
  dtd,
  has_only_single_sms,
  pickle_message,
  -- * Tests
  news_tests,
  -- * WARNING: these are private but exported to silence warnings
  News_LocationConstructor(..),
  News_TeamConstructor(..),
  NewsConstructor(..) )
where

-- System imports.
import Data.Data ( Data, constrFields, dataTypeConstrs, dataTypeOf )
import Data.Time.Clock ( UTCTime )
import Data.List.Utils ( join, split )
import Data.Tuple.Curry ( uncurryN )
import Data.Typeable ( Typeable )
import qualified Data.Vector.HFixed as H ( HVector, convert )
import Database.Groundhog (
  countAll,
  deleteAll,
  insert_,
  migrate )
import Database.Groundhog.Core ( DefaultKey )
import Database.Groundhog.Generic ( runDbConn, runMigrationSilent )
import Database.Groundhog.Sqlite ( withSqliteConn )
import Database.Groundhog.TH (
  groundhog,
  mkPersist )
import qualified GHC.Generics as GHC ( Generic )
import Test.Tasty ( TestTree, testGroup )
import Test.Tasty.HUnit ( (@?=), testCase )
import Text.XML.HXT.Core (
  PU,
  XmlTree,
  (/>),
  (>>>),
  addNav,
  descendantAxis,
  filterAxis,
  followingSiblingAxis,
  hasName,
  remNav,
  runLA,
  xp13Tuple,
  xpAttr,
  xpElem,
  xpInt,
  xpList,
  xpOption,
  xpPair,
  xpText,
  xpWrap )

-- Local imports.
import TSN.Codegen (
  tsn_codegen_config,
  tsn_db_field_namer ) -- Used in a test
import TSN.Database ( insert_or_select )
import TSN.DbImport ( DbImport(..), ImportResult(..), run_dbmigrate )
import TSN.Location ( Location(..), pickle_location )
import TSN.Picklers ( xp_attr_option, xp_time_stamp )
import TSN.Team ( Team(..) )
import TSN.XmlImport ( XmlImport(..) )
import Xml (
  FromXml(..),
  ToDb(..),
  pickle_unpickle,
  unpickleable,
  unsafe_read_invalid_document,
  unsafe_unpickle )


-- | The DTD to which this module corresponds. Used to invoke dbimport.
--
dtd :: String
dtd = "newsxml.dtd"


--
-- DB/XML Data types
--

-- * News/Message

-- | The msg_id child of \<message\> contains an event_id attribute; we
--   embed it into the 'News' type. We (pointlessly) use the \"db_\"
--   prefix here so that the two names don't collide on \"id\" when
--   Groundhog is creating its fields using our field namer.
--
--   The leading underscores prevent unused field warnings.
--
data MsgId =
  MsgId {
    _db_msg_id       :: Int,
    _db_event_id     :: Maybe Int }
  deriving (Data, Eq, GHC.Generic, Show, Typeable)

-- | For 'H.convert'.
--
instance H.HVector MsgId


-- | The XML representation of a news item (\<message\>).
--
data Message =
  Message {
    xml_xml_file_id :: Int,
    xml_heading :: String,
    xml_mid :: MsgId,
    xml_category :: String,
    xml_sport :: String,
    xml_url :: Maybe String,
    xml_teams :: [NewsTeamXml],
    xml_locations :: [Location],
    xml_sms :: Maybe String,
    xml_editor :: Maybe String,
    xml_text :: Maybe String,     -- Text and continue seem to show up in pairs,
    xml_continue :: Maybe String, -- either both present or both missing.
    xml_time_stamp :: UTCTime }
  deriving (Eq, GHC.Generic, Show)


-- | For 'H.convert'.
--
instance H.HVector Message


-- | The database representation of a news item. We drop several
--   uninteresting fields from 'Message', and omit the list fields which
--   will be represented as join tables.
--
data News =
  News {
    db_xml_file_id :: Int,
    db_mid :: MsgId,
    db_sport :: String,
    db_url :: Maybe String,
    db_sms :: Maybe String,
    db_editor :: Maybe String,
    db_text :: Maybe String,
    db_continue :: Maybe String,
    db_time_stamp :: UTCTime }
  deriving (Data, Eq, Show, Typeable)


instance ToDb Message where
  -- | The database representation of 'Message' is 'News'.
  type Db Message = News

-- | Convert the XML representation 'Message' to the database
--   representation 'News'.
--
instance FromXml Message where
  -- | We use a record wildcard so GHC doesn't complain that we never
  --   used the field names.
  --
  --   To convert, we drop some fields.
  --
  from_xml Message{..} = News { db_xml_file_id = xml_xml_file_id,
                                db_mid = xml_mid,
                                db_sport = xml_sport,
                                db_url = xml_url,
                                db_sms = xml_sms,
                                db_editor = xml_editor,
                                db_text = xml_text,
                                db_continue = xml_continue,
                                db_time_stamp = xml_time_stamp }

-- | This lets us insert the XML representation 'Message' directly.
--
instance XmlImport Message


-- * NewsTeamXml

-- | The XML type for teams as they show up in the news. We can't
--   reuse the representation from "TSN.Team" because our name doesn't
--   appear optional and we have no abbreviation.
--
data NewsTeamXml =
  NewsTeamXml { xml_team_id :: String,
                xml_team_name :: String }
  deriving (Eq, GHC.Generic, Show)


-- | For 'H.convert'.
--
instance H.HVector NewsTeamXml


instance ToDb NewsTeamXml where
  -- | The database representation of 'NewsTeamXml' is 'Team'.
  type Db NewsTeamXml = Team

-- | Convert the XML representation 'NewsTeamXml' to the database
--   representation 'Team'.
--
instance FromXml NewsTeamXml where
  from_xml NewsTeamXml{..} =
    Team { team_id = xml_team_id,
           abbreviation = Nothing,
           name = Just xml_team_name }

-- | Allow us to import 'NewsTeamXml' directly.
--
instance XmlImport NewsTeamXml


-- * News_Team

-- | Mapping between News records and Team records in the database. We
--   don't name the fields because we don't use the names explicitly;
--   that means we have to give them nice database names via
--   groundhog.
--
data News_Team = News_Team (DefaultKey News) (DefaultKey Team)


-- * News_Location

-- | Mapping between 'News' records and 'Location' records in the
--   database. We don't name the fields because we don't use the names
--   explicitly; that means we have to give them nice database names
--   via groundhog.
--
data News_Location = News_Location
                       (DefaultKey News)
                       (DefaultKey Location)


-- | Some newsxml documents contain two \<SMS\> elements in a row,
--   violating the DTD. The second one has always been empty, but it's
--   irrelevant: we can't parse these, and would like to detect them
--   in order to report the fact that the busted document is
--   unsupported.
--
--   This function detects whether two \<SMS\> elements appear in a
--   row, as siblings.
--
has_only_single_sms :: XmlTree -> Bool
has_only_single_sms xmltree =
    case elements of
    [] -> True
    _  -> False
  where
    parse :: XmlTree -> [XmlTree]
    parse = runLA $  hasName "/"
                  /> hasName "message"
                  >>> addNav
                  >>> descendantAxis
                  >>> filterAxis (hasName "SMS")
                  >>> followingSiblingAxis
                  >>> remNav
                  >>> hasName "SMS"

    elements = parse xmltree


--
-- * Database code
--

-- | Define 'dbmigrate' and 'dbimport' for 'Message's. The import is
--   slightly non-generic because of our 'News_Team' and
--   'News_Location' join tables.
--
instance DbImport Message where
  dbmigrate _ =
    run_dbmigrate $ do
      migrate (undefined :: Location)
      migrate (undefined :: News)
      migrate (undefined :: Team)
      migrate (undefined :: News_Team)
      migrate (undefined :: News_Location)

  dbimport message = do
    -- Insert the message and acquire its primary key (unique ID)
    news_id <- insert_xml message

    -- Now insert the teams. We use insert_xml_or_select because we
    -- know that most teams will already exist, and we want to get
    -- back the id for the existing team when there's a collision.
    team_ids <- mapM insert_xml_or_select (xml_teams message)

    -- Now that the teams have been inserted, create
    -- news__team records mapping beween the two.
    let news_teams = map (News_Team news_id) team_ids
    mapM_ insert_ news_teams

    -- Do all of that over again for the Locations.
    loc_ids <- mapM insert_or_select (xml_locations message)
    let news_news_locations = map (News_Location news_id) loc_ids
    mapM_ insert_ news_news_locations

    return ImportSucceeded


-- These types have fields with e.g. db_ and xml_ prefixes, so we
-- use our own codegen to peel those off before naming the columns.
mkPersist tsn_codegen_config [groundhog|
- entity: News
  constructors:
    - name: News
      uniques:
        - name: unique_news
          type: constraint
          # Prevent multiple imports of the same message.
          fields: [db_xml_file_id]
      fields:
        - name: db_mid
          embeddedType:
            - {name: msg_id, dbName: msg_id}
            - {name: event_id, dbName: event_id}

- embedded: MsgId
  fields:
    - name: _db_msg_id
      dbName: msg_id
    - name: _db_event_id
      dbName: event_id

- entity: News_Team
  dbName: news__teams
  constructors:
    - name: News_Team
      fields:
        - name: news_Team0 # Default created by mkNormalFieldName
          dbName: news_id
          reference:
            onDelete: cascade
        - name: news_Team1 # Default created by mkNormalFieldName
          dbName: teams_id
          reference:
            onDelete: cascade

- entity: News_Location
  dbName: news__locations
  constructors:
    - name: News_Location
      fields:
        - name: news_Location0 # Default created by mkNormalFieldName
          dbName: news_id
          reference:
            onDelete: cascade
        - name: news_Location1 # Default created by mkNormalFieldName
          dbName: locations_id
          reference:
            onDelete: cascade
|]


--
-- XML Picklers
--

-- | Convert a 'NewsTeamXml' to/from XML.
--
pickle_news_team :: PU NewsTeamXml
pickle_news_team =
  xpElem "team" $
    xpWrap (from_tuple, H.convert) $
      xpPair (xpAttr "id" xpText)
             xpText -- team name
  where
    from_tuple = uncurry NewsTeamXml


-- | Convert a 'MsgId' to/from XML.
--
pickle_msg_id :: PU MsgId
pickle_msg_id =
  xpElem "msg_id" $
    xpWrap (from_tuple, H.convert) $
    xpPair xpInt (xpAttr "EventId" xp_attr_option)
  where
    from_tuple = uncurryN MsgId


-- | Convert a 'Message' to/from XML.
--
pickle_message :: PU Message
pickle_message =
  xpElem "message" $
    xpWrap (from_tuple, H.convert) $
    xp13Tuple (xpElem "XML_File_ID" xpInt)
              (xpElem "heading" xpText)
              pickle_msg_id
              (xpElem "category" xpText)
              (xpElem "sport" xpText)
              (xpElem "url" $ xpOption xpText)
              (xpList pickle_news_team)
              (xpList pickle_location)
              (xpElem "SMS" $ xpOption xpText)
              (xpOption (xpElem "Editor" xpText))
              (xpOption (xpElem "text" xpText))
              pickle_continue
              (xpElem "time_stamp" xp_time_stamp)
  where
    from_tuple = uncurryN Message


    -- | We combine all of the \<continue\> elements into one 'String'
    --   while unpickling and do the reverse while pickling.
    --
    pickle_continue :: PU (Maybe String)
    pickle_continue =
      xpOption $
        xpWrap (to_string, from_string) $
          xpElem "continue" $
            xpList (xpElem "P" xpText)
      where
        from_string :: String -> [String]
        from_string = split "\n"

        to_string :: [String] -> String
        to_string = join "\n"


--
-- Tasty Tests
--

-- | A list of all tests for this module.
--
news_tests :: TestTree
news_tests =
  testGroup
    "News tests"
    [ test_news_fields_have_correct_names,
      test_on_delete_cascade,
      test_pickle_of_unpickle_is_identity,
      test_unpickle_succeeds,
      test_sms_detected_correctly ]


-- | Make sure our codegen is producing the correct database names.
--
test_news_fields_have_correct_names :: TestTree
test_news_fields_have_correct_names =
  testCase "news fields get correct database names" $
    mapM_ check (zip actual expected)
  where
    -- This is cool, it uses the (derived) Data instance of
    -- News.News to get its constructor names.
    field_names :: [String]
    field_names =
      constrFields . head $ dataTypeConstrs $ dataTypeOf (undefined :: News)

    expected :: [String]
    expected =
      map (\x -> tsn_db_field_namer "herp" "derp" 8675309 x 90210) field_names

    actual :: [String]
    actual = ["xml_file_id",
              "mid",
              "sport",
              "url",
              "sms",
              "editor",
              "text",
              "continue"]

    check (x,y) = (x @?= y)


-- | If we unpickle something and then pickle it, we should wind up
--   with the same thing we started with. WARNING: success of this
--   test does not mean that unpickling succeeded.
--
test_pickle_of_unpickle_is_identity :: TestTree
test_pickle_of_unpickle_is_identity = testGroup "pickle-unpickle tests"
  [ check "pickle composed with unpickle is the identity"
          "test/xml/newsxml.xml",

    check "pickle composed with unpickle is the identity (with Editor)"
          "test/xml/newsxml-with-editor.xml",

    check "pickle composed with unpickle is the identity (empty SMS)"
          "test/xml/newsxml-empty-sms.xml" ]
  where
    check desc path = testCase desc $ do
      (expected, actual) <- pickle_unpickle pickle_message path
      actual @?= expected


-- | Make sure we can actually unpickle these things.
--
test_unpickle_succeeds :: TestTree
test_unpickle_succeeds = testGroup "unpickle tests"
  [ check "unpickling succeeds"
          "test/xml/newsxml.xml",

    check "unpickling succeeds (with Editor)"
          "test/xml/newsxml-with-editor.xml",

    check "unpickling succeeds (empty SMS)"
          "test/xml/newsxml-empty-sms.xml" ]
  where
    check desc path = testCase desc $ do
      actual <- unpickleable path pickle_message
      let expected = True
      actual @?= expected


-- | Make sure everything gets deleted when we delete the top-level
--   record.
--
test_on_delete_cascade :: TestTree
test_on_delete_cascade = testGroup "cascading delete tests"
  [ check "deleting news deletes its children"
          "test/xml/newsxml.xml"
          4 -- 2 news_teams and 2 news_locations that should remain.
    ,
    check "deleting news deletes its children (empty SMS)"
          "test/xml/newsxml-empty-sms.xml"
          4 -- 2 news_teams and 2 news_locations
  ]
  where
    check desc path expected = testCase desc $ do
      news <- unsafe_unpickle path pickle_message
      let a = undefined :: Location
      let b = undefined :: News
      let c = undefined :: Team
      let d = undefined :: News_Team
      let e = undefined :: News_Location
      actual <- withSqliteConn ":memory:" $ runDbConn $ do
                  runMigrationSilent $ do
                    migrate a
                    migrate b
                    migrate c
                    migrate d
                    migrate e
                  _ <- dbimport news
                  deleteAll b
                  count_a <- countAll a
                  count_b <- countAll b
                  count_c <- countAll c
                  count_d <- countAll d
                  count_e <- countAll e
                  return $ count_a + count_b + count_c + count_d + count_e
      actual @?= expected


-- | We want to make sure the single-SMS documents and the multi-SMS
--   documents are identified correctly.
--
test_sms_detected_correctly :: TestTree
test_sms_detected_correctly =
  testGroup "newsxml SMS count determined correctly"
    [ check "test/xml/newsxml.xml"
            "single SMS detected correctly"
            True,
      check "test/xml/newsxml-multiple-sms.xml"
            "multiple SMS detected correctly"
            False ]
  where
    check path desc expected = testCase desc $ do
      xmltree <- unsafe_read_invalid_document path
      let actual = has_only_single_sms xmltree
      actual @?= expected