{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE LambdaCase                 #-}
{-# LANGUAGE OverloadedStrings          #-}

module Language.English.Pluralize (pluralize, testPluralization) where

import           Control.Applicative   ((<|>))
import           Data.ByteString       as BS
import           Data.ByteString.Char8 as C
import           Data.Maybe            (fromMaybe)
import           Data.Monoid           ((<>))
import           Data.Set              as S
import           Data.String           (IsString, fromString)

{-| Pluralize an english word -}
pluralize :: ByteString -> ByteString
pluralize x = unPlural . fromMaybe (Plural $ x <> "s")
   $ special raw
  <|> match' noChange
  <|> match' exceptions
  <|> match' yrule
  <|> match' compound
  <|> match' weirdEnders
  <|> match' short
  <|> match' single
  where match' = S.foldl' (<|>) Nothing . S.map (match raw)
        raw = Raw x

{-| For testing in the repl. Pass the number of things, and an input -}
testPluralization :: Int -> ByteString -> ByteString
testPluralization 1 x = "1 " <> x
testPluralization n x = fromString (show n) <> " " <> pluralize x


newtype SuffixToMatch = SuffixToMatch ByteString   deriving (Eq,Ord,IsString)
newtype SuffixToAdd = SuffixToAdd ByteString       deriving (Eq,Ord,IsString)
newtype Plural = Plural { unPlural :: ByteString } deriving (Eq,Ord,IsString)
newtype Raw = Raw ByteString                       deriving (Eq,Ord,IsString)
type Matcher = (SuffixToMatch, SuffixToAdd)


match :: Raw -> Matcher -> Maybe Plural
match (Raw raw) (SuffixToMatch toMatch, (SuffixToAdd toAdd)) =
   Plural . (<> toAdd) <$> BS.stripSuffix toMatch raw


(|>) :: SuffixToMatch -> SuffixToAdd -> Matcher
(|>) = (,)


single :: Set Matcher
single = S.fromList
  $ "s" |> "ses"
  : "x" |> "xes"
  : "o" |> "oes"
  : "z" |> "zes"
  : []


short :: Set Matcher
short = S.fromList
  $ "ex"   |> "ices"
  : "ix"   |> "ices"
  : "is"   |> "es"
  : "sh"   |> "shes"
  : "ch"   |> "ches"
  : "eaf"  |> "eaves"
  : "alf"  |> "alves"
  : "ief"  |> "ieves"
  : "oof"  |> "ooves"
  : "ife"  |> "ives"
  : "if"   |> "lves"
  : "ndum" |> "nda"
  : "um"   |> "a"
  : "ia"   |> "ium"
  : "ma"   |> "mata"
  : "na"   |> "nae"
  : "ta"   |> "tum"
  : []


yrule :: Set Matcher
yrule = S.fromList
  $ "ay"  |> "ays"
  : "ey"  |> "eys"
  : "iy"  |> "iys"
  : "oy"  |> "oys"
  : "uy"  |> "uys"
  : "quy" |> "quies"
  : []


weirdEnders :: Set Matcher
weirdEnders = S.fromList
  $ "ox"         |> "oxen"
  : "y"          |> "ies"
  : "cs"         |> "csen"
  : "alga"       |> "algae"
  : "atlas"      |> "atlases"
  : "alumna"     |> "alumnae"
  : "alumnus"    |> "alumni"
  : "ameoba"     |> "ameobae"
  : "automaton"  |> "automata"
  : "bacillus"   |> "bacilli"
  : "cactus"     |> "cacti"
  : "cannon"     |> "cannon"
  : "child"      |> "children"
  : "corpus"     |> "corpora"
  : "foot"       |> "feet"
  : "formula"    |> "formulae"
  : "graffito"   |> "graffiti"
  : "rion"       |> "ria"
  : "focus"      |> "foci"
  : "genus"      |> "genera"
  : "goose"      |> "geese"
  : "hedron"     |> "hedra"
  : "insigne"    |> "insignia"
  : "life"       |> "lives"
  : "louse"      |> "lice"
  : "man"        |> "men"
  : "mouse"      |> "mice"
  : "nucleus"    |> "nuclei"
  : "opus"       |> "opera"
  : "panino"     |> "panini"
  : "paparazzo"  |> "paparazzi"
  : "phalanx"    |> "phalanges"
  : "phenomenon" |> "phenomena"
  : "person"     |> "people"
  : "portico"    |> "porticos"
  : "polis"      |> "poleis"
  : "quarto"     |> "quartos"
  : "radius"     |> "radii"
  : "rhinoceros" |> "rhinoceri"
  : "dwarf"      |> "dwarves"
  : "syllabus"   |> "syllabi"
  : "terminus"   |> "termini"
  : "hippopotamus" |> "hippopotami"
  : "virtuoso"     |> "virtuosi"
  : "brother"      |> "brethren"
  : "beau"         |> "beaux"
  : "bureau"       |> "bureaux"
  : "tableau"      |> "tableaux"
  : "ulus"       |> "uli"
  : "ubus"       |> "ubi"
  : "ngus"       |> "ngi"
  : "ylus"       |> "yli"
  : "tooth"      |> "teeth"
  : "uterus"     |> "uteri"
  : "viscus"     |> "viscera"
  : []


exceptions :: Set Matcher
exceptions = S.fromList
  $ "proof"   |> "proofs"
  : "igloo"   |> "igloos"
  : "zero"    |> "zeros"
  : "photo"   |> "photos"
  : "hetero"  |> "heteros"
  : "schema"  |> "schemas"
  : "magma"   |> "magmas"
  : "enema"   |> "enemas"
  : "motto"   |> "mottos"
  : "dogma"   |> "dogmas"
  : "lemma"   |> "lemmas"
  : "schema"  |> "schemas"
  : "kimono"  |> "kimonos"
  : "pro"     |> "pros"
  : "solo"    |> "solos"
  : "canto"   |> "cantos"
  : "cargo"   |> "cargos"
  : "banjo"   |> "banjos"
  : "piano"   |> "pianos"

  : "octopus" |> "octopuses"
  : "box"     |> "boxes"
  : []


noChange :: Set Matcher
noChange = S.fromList $ (\x -> SuffixToMatch x |> SuffixToAdd x) <$>
  [ "bison"
  , "buffalo"
  , "cod"
  , "carp"
  , "cattle"
  , "duck"
  , "fish"
  , "moose"
  , "people"
  , "deer"
  , "offspring"
  , "aircraft"
  , "mackerel"
  , "salmon"
  , "shrimp"
  , "sheep"
  , "squid"
  , "trout"
  , "pike"
  , "police"
  , "perch"
  , "ies" ]


special :: Raw -> Maybe Plural
special (Raw x) =
  let (b,m) = C.breakEnd (== ' ') x
      as s = Just . Plural $ b <> s
  in case m of
    "die"   -> as "dice"
    "Atlas" -> as "Atlantes"
    "cello" -> as "celli"
    "Harry" -> as "Harrys"
    _       -> Nothing

compound :: Set Matcher
compound = S.fromList
  $ "coat-of-arms" |> "coats-of-arms"
  : "son-in-law" |> "sons-in-law"
  : "minister-president" |> "ministers-president"
  : "governor-general" |> "governors-general"
  : "passerby" |> "passersby"
  : []