module NLP.Antfarm.Refex where

import Data.Maybe ( mapMaybe )
import Data.Text ( Text )
import Data.Tree ( Tree )
import qualified Data.Set as Set

import NLP.Minimorph.Number

import NLP.Antfarm.Cardinality

-- ----------------------------------------------------------------------
-- * Referring expressions
-- ----------------------------------------------------------------------

-- | Input needed to realise a subunit of a referring expression
--   A subunit corresponds to 'RefGroup' (but in practice,
--   we need the whole 'DiscourseUnit', not just the 'RefGroup'
--   root)
data SubRxInput = SubRxInput
    { srxInpDet    :: SingPlu [Text] -- ^ determiner (can be empty)
    , srxInpWord   :: SingPlu Text   -- ^ main word
    , srxInpEntity :: DiscourseUnit
    }

-- | Output for a subunit of a referring expression
data SubRx = SubRx
    { srxNumber        :: Number
    , srxDiscriminator :: Discriminator
    , srxDet           :: SingPlu [Text]
    , srxWord          :: SingPlu Text
    }
  deriving (Eq, Show)

-- | A single referring expression has subunits, each of which
--   potentially having examples
type RxInput = [Tree SubRxInput]

-- | A referring expression
type Rx = [Tree SubRx]

-- ----------------------------------------------------------------------
-- * Discourse units
-- ----------------------------------------------------------------------

-- | A discourse unit includes all instances and constraints needed
--   to uniquely identify it. (see note discourse tree)
--
--   In the current implementation, a referring expression may contain
--   more than one discourse unit.  So in a referring expression “three cats
--   and at most two dogs (a poodle and a labrador)”, the “at most two dogs
--   (a poodle and a labrador)” and “three cats” would each correspond to
--   different 'DiscourseUnit's
type DiscourseUnit = Tree RefGroup

-- | A sub-unit in a referring expression, instances of and/or constraints
--   over class.  So in a referring expression “three cats and at most two
--   dogs”, the “at most two dogs” and “three cats” would each be 'RefGroup's
data RefGroup = RefGroup
    { rgClass  :: Text
    , rgIdxes  :: Set.Set Text
    , rgBounds :: Bounds
    }
  deriving (Ord, Eq)

rgIdxList :: RefGroup -> [Text]
rgIdxList = Set.toList . rgIdxes

-- | A unique object
type RefKey      = (Text, Text)

-- ----------------------------------------------------------------------
-- * Bounds
-- ----------------------------------------------------------------------

data Bounds = Bounds
    { bUnknown :: [Text]
        --  the whole idea of unknown constraints makes me very
        --  unhappy; we want to pass through any malformed
        --  constraints as is, so for now the only thing I can
        --  think to do with them is collect them in the groups
        --  which just seems wrong
    , bLower   :: Maybe Int -- ^ lower
    , bUpper   :: Maybe Int -- ^ upper
    }
  deriving (Ord, Eq)

emptyBounds :: Bounds
emptyBounds = Bounds [] Nothing Nothing

explicitBounds :: [Constraint] -> Bounds
explicitBounds cs = Bounds
    { bUnknown = [ t | Unknown t <- cs ]
    , bLower   = maximum `orNothing` mapMaybe lowerBound cs
    , bUpper   = minimum `orNothing` mapMaybe upperBound cs
    }
  where
    orNothing _ [] = Nothing
    orNothing f xs = Just (f xs)

-- | When two 'Bounds' are combined the result is narrower: the highest low
--   and the lowest high.
--
--   The unknown bounds are not really defined.  We concatenate them, for
--   what it's worth, which is at least sensible when none or only one of
--   them is defined, but not ideal when both are
narrow :: Bounds -> Bounds -> Bounds
narrow b1 b2 =
   b1 { bUnknown = bUnknown b1 ++ bUnknown b2
      , bLower   = mergeWith max (bLower b1) (bLower b2)
      , bUpper   = mergeWith min (bUpper b1) (bUpper b2)
      }
  where
    mergeWith _ Nothing    Nothing = Nothing
    mergeWith _ x@(Just _) Nothing = x
    mergeWith _ Nothing x@(Just _) = x
    mergeWith f (Just x)  (Just y) = Just (f x y)

-- ----------------------------------------------------------------------
-- * Number
-- ----------------------------------------------------------------------

-- | Fuzzy number is a variant on 'Number' that allows us the option
--   of overriding what would otherwise be singular agreement
--
--   If you don't need to, or have no idea why somebody would even want
--   to do such a thing, just 'defuzz' it
data FuzzyNumber = FN_Plural
                 | FN_MaybeSingular
                 | FN_Singular
  deriving (Eq, Show)

-- | @defuzz@ treats 'FN_MaybeSingular' as 'Singular'
defuzz :: FuzzyNumber -> Number
defuzz FN_Plural        = Plural
defuzz FN_MaybeSingular = Singular
defuzz FN_Singular      = Singular

-- | Somewhat abstract representation of subrx discriminators
--   (but in reality just based on English)
--
--   A discriminator is what we call the optional bit of text that helps
--   you distinguish one set instances of a class from another, eg,
--   “the same” or “another three”, or simply “the“.  This isn't a
--   technical term as far as I'm aware, just a made-up convenience word
data Discriminator = NilDiscriminator
                   | Bounded BoundsExpr
                   | TheSame
                   | TheOther
                   | TheOrdinal    Int
                   | NewOrdinal    Int
                   | Another       Int
                   | PlainCardinal Int
                   | CardinalOfThe Int
                   | The
  deriving (Eq, Show)

data BoundsExpr = SayAtLeast Int
                | SayAtMost  Int
                | SayBetween Int Int
                | SayExactly Int
                | SayArbitrary Text
  deriving (Eq, Show)