minimorph- English spelling functions with an emphasis on simplicity.

Safe HaskellNone




Simple default rules for English morphology



commas :: Text -> [Text] -> TextSource

No Oxford commas, alas.

 commas "and" "foo bar"       == "foo and bar"
 commas "and" "foo, bar, baz" == "foo, bar and baz"


cardinal :: Int -> TextSource

 cardinal 1 == "one"
 cardinal 2 == "two"
 cardinal 3 == "three"
 cardinal 11 == "11"

ordinal :: Int -> TextSource

 ordinal 1 == "first"
 ordinal 2 == "second"
 ordinal 3 == "third"
 ordinal 11 == "11th"
 ordinal 42 == "42nd"

Nouns and verbs

defaultNounPlural :: Text -> TextSource

Heuristics for English plural for an unknown noun

 defaultNounPlural "egg"    == "eggs"
 defaultNounPlural "patch"  == "patches"
 defaultNounPlural "boy"    == "boys"
 defaultNounPlural "spy"    == "spies"
 defaultNounPlural "thesis" == "theses"


defaultVerbStuff :: Text -> (Text, Text)Source

Heuristics for 3rd person singular and past participle for an unknown regular verb

 defaultVerbStuff "walk"  == ("walks",  "walked")
 defaultVerbStuff "push"  == ("pushes", "pushed")
 defaultVerbStuff "play"  == ("plays",  "played")
 defaultVerbStuff "cry"   == ("cries",  "cried")


indefiniteDet :: Text -> TextSource

 indefiniteDet "dog"  == "a"
 indefiniteDet "egg"  == "an"
 indefiniteDet "ewe"  == "a"
 indefiniteDet "ewok" == "an"
 indefiniteDet "8th"  == "an"

wantsAn :: Text -> BoolSource

True if the indefinite determiner for a word would normally be an as opposed to a

acronymWantsAn :: Text -> BoolSource

Variant of wantsAn that assumes the input string is pronounced one letter at a time.

 wantsAn        "x-ray" == False
 acronymWantsAn "x-ray" == True

Note that this won't do the right thing for words like SCUBA You really have to reserve it for those separate-letter acronyms


looksLikeAcronym :: Text -> BoolSource

True if all upper case from second letter and up

 looksLikeAcronym "DNA"  == True
 looksLikeAcronym "tRNA" == True
 looksLikeAcronym "DnA"  == False

startsWithAcronym :: Text -> BoolSource

True if the first word (separating on either - or space) looks like an acronym


hasSibilantSuffix :: Text -> BoolSource

Ends with a sh sound

hasSemivowelPrefix :: Text -> BoolSource

Starts with a semivowel

hasCySuffix :: Text -> BoolSource

Last two letters are a consonant and y

isVowel :: Char -> BoolSource

Is a vowel

isLetterWithInitialVowelSound :: Char -> BoolSource

Letters that when pronounced independently in English sound like they begin with vowels

 isLetterWithInitialVowelSound 'r' == True
 isLetterWithInitialVowelSound 'k' == False

(In the above, r is pronounced are, but k is pronounced kay)

isConsonant :: Char -> BoolSource

Is a consonant