{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} module Language.English.Pluralize (pluralize, testPluralization) where import Control.Applicative ((<|>)) import Data.ByteString as BS import Data.ByteString.Char8 as C import Data.Maybe (fromMaybe) import Data.Monoid ((<>)) import Data.Set as S import Data.String (IsString, fromString) {-| Pluralize an english word -} pluralize :: ByteString -> ByteString pluralize x = unPlural . fromMaybe (Plural $ x <> "s") $ special raw <|> match' noChange <|> match' exceptions <|> match' yrule <|> match' compound <|> match' weirdEnders <|> match' short <|> match' single where match' = S.foldl' (<|>) Nothing . S.map (match raw) raw = Raw x {-| For testing in the repl. Pass the number of things, and an input -} testPluralization :: Int -> ByteString -> ByteString testPluralization 1 x = "1 " <> x testPluralization n x = fromString (show n) <> " " <> pluralize x newtype SuffixToMatch = SuffixToMatch ByteString deriving (Eq,Ord,IsString) newtype SuffixToAdd = SuffixToAdd ByteString deriving (Eq,Ord,IsString) newtype Plural = Plural { unPlural :: ByteString } deriving (Eq,Ord,IsString) newtype Raw = Raw ByteString deriving (Eq,Ord,IsString) type Matcher = (SuffixToMatch, SuffixToAdd) match :: Raw -> Matcher -> Maybe Plural match (Raw raw) (SuffixToMatch toMatch, (SuffixToAdd toAdd)) = Plural . (<> toAdd) <$> BS.stripSuffix toMatch raw (|>) :: SuffixToMatch -> SuffixToAdd -> Matcher (|>) = (,) single :: Set Matcher single = S.fromList $ "s" |> "ses" : "x" |> "xes" : "o" |> "oes" : "z" |> "zes" : [] short :: Set Matcher short = S.fromList $ "ex" |> "ices" : "ix" |> "ices" : "is" |> "es" : "sh" |> "shes" : "ch" |> "ches" : "eaf" |> "eaves" : "alf" |> "alves" : "ief" |> "ieves" : "oof" |> "ooves" : "ife" |> "ives" : "if" |> "lves" : "ndum" |> "nda" : "um" |> "a" : "ia" |> "ium" : "ma" |> "mata" : "na" |> "nae" : "ta" |> "tum" : [] yrule :: Set Matcher yrule = S.fromList $ "ay" |> "ays" : "ey" |> "eys" : "iy" |> "iys" : "oy" |> "oys" : "uy" |> "uys" : "quy" |> "quies" : [] weirdEnders :: Set Matcher weirdEnders = S.fromList $ "ox" |> "oxen" : "y" |> "ies" : "cs" |> "csen" : "alga" |> "algae" : "atlas" |> "atlases" : "alumna" |> "alumnae" : "alumnus" |> "alumni" : "ameoba" |> "ameobae" : "automaton" |> "automata" : "bacillus" |> "bacilli" : "cactus" |> "cacti" : "cannon" |> "cannon" : "child" |> "children" : "corpus" |> "corpora" : "foot" |> "feet" : "formula" |> "formulae" : "graffito" |> "graffiti" : "rion" |> "ria" : "focus" |> "foci" : "genus" |> "genera" : "goose" |> "geese" : "hedron" |> "hedra" : "insigne" |> "insignia" : "life" |> "lives" : "louse" |> "lice" : "man" |> "men" : "mouse" |> "mice" : "nucleus" |> "nuclei" : "opus" |> "opera" : "panino" |> "panini" : "paparazzo" |> "paparazzi" : "phalanx" |> "phalanges" : "phenomenon" |> "phenomena" : "person" |> "people" : "portico" |> "porticos" : "polis" |> "poleis" : "quarto" |> "quartos" : "radius" |> "radii" : "rhinoceros" |> "rhinoceri" : "dwarf" |> "dwarves" : "syllabus" |> "syllabi" : "terminus" |> "termini" : "hippopotamus" |> "hippopotami" : "virtuoso" |> "virtuosi" : "brother" |> "brethren" : "beau" |> "beaux" : "bureau" |> "bureaux" : "tableau" |> "tableaux" : "ulus" |> "uli" : "ubus" |> "ubi" : "ngus" |> "ngi" : "ylus" |> "yli" : "tooth" |> "teeth" : "uterus" |> "uteri" : "viscus" |> "viscera" : [] exceptions :: Set Matcher exceptions = S.fromList $ "proof" |> "proofs" : "igloo" |> "igloos" : "zero" |> "zeros" : "photo" |> "photos" : "hetero" |> "heteros" : "schema" |> "schemas" : "magma" |> "magmas" : "enema" |> "enemas" : "motto" |> "mottos" : "dogma" |> "dogmas" : "lemma" |> "lemmas" : "schema" |> "schemas" : "kimono" |> "kimonos" : "pro" |> "pros" : "solo" |> "solos" : "canto" |> "cantos" : "cargo" |> "cargos" : "banjo" |> "banjos" : "piano" |> "pianos" : "octopus" |> "octopuses" : "box" |> "boxes" : [] noChange :: Set Matcher noChange = S.fromList $ (\x -> SuffixToMatch x |> SuffixToAdd x) <$> [ "bison" , "buffalo" , "cod" , "carp" , "cattle" , "duck" , "fish" , "moose" , "people" , "deer" , "offspring" , "aircraft" , "mackerel" , "salmon" , "shrimp" , "sheep" , "squid" , "trout" , "pike" , "police" , "perch" , "ies" ] special :: Raw -> Maybe Plural special (Raw x) = let (b,m) = C.breakEnd (== ' ') x as s = Just . Plural $ b <> s in case m of "die" -> as "dice" "Atlas" -> as "Atlantes" "cello" -> as "celli" "Harry" -> as "Harrys" _ -> Nothing compound :: Set Matcher compound = S.fromList $ "coat-of-arms" |> "coats-of-arms" : "son-in-law" |> "sons-in-law" : "minister-president" |> "ministers-president" : "governor-general" |> "governors-general" : "passerby" |> "passersby" : []