minimorph-0.1.6.0: English spelling functions with an emphasis on simplicity.

Copyright2012 Eric Kow (Computational Linguistics Ltd.)
LicenseBSD3
Maintainereric.kow@gmail.com
Stabilityexperimental
Portabilityportable
Safe HaskellSafe-Inferred
LanguageHaskell98

NLP.Minimorph.English

Contents

Description

Simple default rules for English morphology

Synopsis

Punctuation

commas :: Text -> [Text] -> Text Source

No Oxford commas, alas.

commas "and" "foo bar"       == "foo and bar"
commas "and" "foo, bar, baz" == "foo, bar and baz"

Numbers

cardinal :: Int -> Text Source

cardinal 1 == "one"
cardinal 2 == "two"
cardinal 3 == "three"
cardinal 11 == "11"

ordinalNotSpelled :: Int -> Text Source

ordinalNotSpelled 1 == "1st"
ordinalNotSpelled 2 == "2nd"
ordinalNotSpelled 11 == "11th"

ordinal :: Int -> Text Source

ordinal 1 == "first"
ordinal 2 == "second"
ordinal 3 == "third"
ordinal 11 == "11th"
ordinal 42 == "42nd"

Nouns and verbs

defaultNounPlural :: Text -> Text Source

Heuristics for English plural for an unknown noun.

defaultNounPlural "egg"    == "eggs"
defaultNounPlural "patch"  == "patches"
defaultNounPlural "boy"    == "boys"
defaultNounPlural "spy"    == "spies"
defaultNounPlural "thesis" == "theses"

http://www.paulnoll.com/Books/Clear-English/English-plurals-1.html http://en.wikipedia.org/wiki/English_plural

defaultVerbStuff :: Text -> (Text, Text) Source

Heuristics for 3rd person singular and past participle for an unknown regular verb. Doubling of final consonants can be handled via a table of (partially) irrefular verbs.

defaultVerbStuff "walk"  == ("walks",  "walked")
defaultVerbStuff "push"  == ("pushes", "pushed")
defaultVerbStuff "play"  == ("plays",  "played")
defaultVerbStuff "cry"   == ("cries",  "cried")

defaultPossesive :: Text -> Text Source

Heuristics for a possesive form for an unknown noun.

defaultPossesive "pass"        == "pass'"
defaultPossesive "SOS"         == "SOS'"
defaultPossesive "Mr Blinkin'" == "Mr Blinkin's"
defaultPossesive "cry"         == "cry's"

Determiners

indefiniteDet :: Text -> Text Source

indefiniteDet "dog"  == "a"
indefiniteDet "egg"  == "an"
indefiniteDet "ewe"  == "a"
indefiniteDet "ewok" == "an"
indefiniteDet "8th"  == "an"

wantsAn :: Text -> Bool Source

True if the indefinite determiner for a word would normally be an as opposed to a.

acronymWantsAn :: Text -> Bool Source

Variant of wantsAn that assumes the input string is pronounced one letter at a time.

wantsAn        "x-ray" == False
acronymWantsAn "x-ray" == True

Note that this won't do the right thing for words like SCUBA. You really have to reserve it for those separate-letter acronyms.

Acronyms

looksLikeAcronym :: Text -> Bool Source

True if all upper case from second letter and up.

looksLikeAcronym "DNA"  == True
looksLikeAcronym "tRNA" == True
looksLikeAcronym "x"    == False
looksLikeAcronym "DnA"  == False

startsWithAcronym :: Text -> Bool Source

True if the first word (separating on either - or space) looks like an acronym.

Sounds

hasSibilantSuffix :: Text -> Bool Source

Ends with a sh sound.

hasSemivowelPrefix :: Text -> Bool Source

Starts with a semivowel.

hasVowel_U_Prefix :: Text -> Bool Source

Starts with a vowel-y U sound

hasCySuffix :: Text -> Bool Source

Last two letters are a consonant and y.

hasCoSuffix :: Text -> Bool Source

Last two letters are a consonant and o.

isVowel :: Char -> Bool Source

Is a vowel.

isLetterWithInitialVowelSound :: Char -> Bool Source

Letters that when pronounced independently in English sound like they begin with vowels.

isLetterWithInitialVowelSound 'r' == True
isLetterWithInitialVowelSound 'k' == False

(In the above, r is pronounced "are", but k is pronounced "kay".)

isConsonant :: Char -> Bool Source

Is a consonant.