{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
module Language.English.Pluralize (pluralize, testPluralization) where
import Control.Applicative ((<|>))
import Data.ByteString as BS
import Data.ByteString.Char8 as C
import Data.Maybe (fromMaybe)
import Data.Monoid ((<>))
import Data.Set as S
import Data.String (IsString, fromString)
pluralize :: ByteString -> ByteString
pluralize x = unPlural . fromMaybe (Plural $ x <> "s")
$ special raw
<|> match' noChange
<|> match' exceptions
<|> match' yrule
<|> match' compound
<|> match' weirdEnders
<|> match' short
<|> match' single
where match' = S.foldl' (<|>) Nothing . S.map (match raw)
raw = Raw x
testPluralization :: Int -> ByteString -> ByteString
testPluralization 1 x = "1 " <> x
testPluralization n x = fromString (show n) <> " " <> pluralize x
newtype SuffixToMatch = SuffixToMatch ByteString deriving (Eq,Ord,IsString)
newtype SuffixToAdd = SuffixToAdd ByteString deriving (Eq,Ord,IsString)
newtype Plural = Plural { unPlural :: ByteString } deriving (Eq,Ord,IsString)
newtype Raw = Raw ByteString deriving (Eq,Ord,IsString)
type Matcher = (SuffixToMatch, SuffixToAdd)
match :: Raw -> Matcher -> Maybe Plural
match (Raw raw) (SuffixToMatch toMatch, (SuffixToAdd toAdd)) =
Plural . (<> toAdd) <$> BS.stripSuffix toMatch raw
(|>) :: SuffixToMatch -> SuffixToAdd -> Matcher
(|>) = (,)
single :: Set Matcher
single = S.fromList
$ "s" |> "ses"
: "x" |> "xes"
: "o" |> "oes"
: "z" |> "zes"
: []
short :: Set Matcher
short = S.fromList
$ "ex" |> "ices"
: "ix" |> "ices"
: "is" |> "es"
: "sh" |> "shes"
: "ch" |> "ches"
: "eaf" |> "eaves"
: "alf" |> "alves"
: "ief" |> "ieves"
: "oof" |> "ooves"
: "ife" |> "ives"
: "if" |> "lves"
: "ndum" |> "nda"
: "um" |> "a"
: "ia" |> "ium"
: "ma" |> "mata"
: "na" |> "nae"
: "ta" |> "tum"
: []
yrule :: Set Matcher
yrule = S.fromList
$ "ay" |> "ays"
: "ey" |> "eys"
: "iy" |> "iys"
: "oy" |> "oys"
: "uy" |> "uys"
: "quy" |> "quies"
: []
weirdEnders :: Set Matcher
weirdEnders = S.fromList
$ "ox" |> "oxen"
: "y" |> "ies"
: "cs" |> "csen"
: "alga" |> "algae"
: "atlas" |> "atlases"
: "alumna" |> "alumnae"
: "alumnus" |> "alumni"
: "ameoba" |> "ameobae"
: "automaton" |> "automata"
: "bacillus" |> "bacilli"
: "cactus" |> "cacti"
: "cannon" |> "cannon"
: "child" |> "children"
: "corpus" |> "corpora"
: "foot" |> "feet"
: "formula" |> "formulae"
: "graffito" |> "graffiti"
: "rion" |> "ria"
: "focus" |> "foci"
: "genus" |> "genera"
: "goose" |> "geese"
: "hedron" |> "hedra"
: "insigne" |> "insignia"
: "life" |> "lives"
: "louse" |> "lice"
: "man" |> "men"
: "mouse" |> "mice"
: "nucleus" |> "nuclei"
: "opus" |> "opera"
: "panino" |> "panini"
: "paparazzo" |> "paparazzi"
: "phalanx" |> "phalanges"
: "phenomenon" |> "phenomena"
: "person" |> "people"
: "portico" |> "porticos"
: "polis" |> "poleis"
: "quarto" |> "quartos"
: "radius" |> "radii"
: "rhinoceros" |> "rhinoceri"
: "dwarf" |> "dwarves"
: "syllabus" |> "syllabi"
: "terminus" |> "termini"
: "hippopotamus" |> "hippopotami"
: "virtuoso" |> "virtuosi"
: "brother" |> "brethren"
: "beau" |> "beaux"
: "bureau" |> "bureaux"
: "tableau" |> "tableaux"
: "ulus" |> "uli"
: "ubus" |> "ubi"
: "ngus" |> "ngi"
: "ylus" |> "yli"
: "tooth" |> "teeth"
: "uterus" |> "uteri"
: "viscus" |> "viscera"
: []
exceptions :: Set Matcher
exceptions = S.fromList
$ "proof" |> "proofs"
: "igloo" |> "igloos"
: "zero" |> "zeros"
: "photo" |> "photos"
: "hetero" |> "heteros"
: "schema" |> "schemas"
: "magma" |> "magmas"
: "enema" |> "enemas"
: "motto" |> "mottos"
: "dogma" |> "dogmas"
: "lemma" |> "lemmas"
: "schema" |> "schemas"
: "kimono" |> "kimonos"
: "pro" |> "pros"
: "solo" |> "solos"
: "canto" |> "cantos"
: "cargo" |> "cargos"
: "banjo" |> "banjos"
: "piano" |> "pianos"
: "octopus" |> "octopuses"
: "box" |> "boxes"
: []
noChange :: Set Matcher
noChange = S.fromList $ (\x -> SuffixToMatch x |> SuffixToAdd x) <$>
[ "bison"
, "buffalo"
, "cod"
, "carp"
, "cattle"
, "duck"
, "fish"
, "moose"
, "people"
, "deer"
, "offspring"
, "aircraft"
, "mackerel"
, "salmon"
, "shrimp"
, "sheep"
, "squid"
, "trout"
, "pike"
, "police"
, "perch"
, "ies" ]
special :: Raw -> Maybe Plural
special (Raw x) =
let (b,m) = C.breakEnd (== ' ') x
as s = Just . Plural $ b <> s
in case m of
"die" -> as "dice"
"Atlas" -> as "Atlantes"
"cello" -> as "celli"
"Harry" -> as "Harrys"
_ -> Nothing
compound :: Set Matcher
compound = S.fromList
$ "coat-of-arms" |> "coats-of-arms"
: "son-in-law" |> "sons-in-law"
: "minister-president" |> "ministers-president"
: "governor-general" |> "governors-general"
: "passerby" |> "passersby"
: []