minimorph-0.2.0.0: English spelling functions with an emphasis on simplicity.

Safe HaskellSafe
LanguageHaskell2010

NLP.Minimorph.English

Contents

Description

Simple default rules for English morphology

Synopsis

Punctuation

commas :: Text -> [Text] -> Text Source #

No Oxford commas, alas.

commas "and" "foo bar"       == "foo and bar"
commas "and" "foo, bar, baz" == "foo, bar and baz"

Numbers

cardinal :: Int -> Text Source #

cardinal 0 == "zero"
cardinal 1 == "one"
cardinal 2 == "two"
cardinal 10 == "ten"
cardinal 11 == "11"

ordinalNotSpelled :: Int -> Text Source #

ordinalNotSpelled 1 == "1st"
ordinalNotSpelled 2 == "2nd"
ordinalNotSpelled 11 == "11th"

ordinal :: Int -> Text Source #

ordinal 1 == "first"
ordinal 2 == "second"
ordinal 3 == "third"
ordinal 11 == "11th"
ordinal 42 == "42nd"

Nouns and verbs

defaultNounPlural :: Text -> Text Source #

Heuristics for English plural for an unknown noun.

defaultNounPlural "egg"    == "eggs"
defaultNounPlural "patch"  == "patches"
defaultNounPlural "boy"    == "boys"
defaultNounPlural "spy"    == "spies"
defaultNounPlural "thesis" == "theses"

http://www.paulnoll.com/Books/Clear-English/English-plurals-1.html

http://en.wikipedia.org/wiki/English_plural

defaultVerbStuff :: Text -> (Text, Text) Source #

Heuristics for 3rd person singular and past participle for an unknown regular verb. Doubling of final consonants can be handled via a table of (partially) irregular verbs.

defaultVerbStuff "walk"  == ("walks",  "walked")
defaultVerbStuff "push"  == ("pushes", "pushed")
defaultVerbStuff "play"  == ("plays",  "played")
defaultVerbStuff "cry"   == ("cries",  "cried")

defaultPossesive :: Text -> Text Source #

Heuristics for a possesive form for an unknown noun.

defaultPossesive "pass"        == "pass'"
defaultPossesive "SOS"         == "SOS'"
defaultPossesive "Mr Blinkin'" == "Mr Blinkin's"
defaultPossesive "cry"         == "cry's"

Determiners

indefiniteDet :: Text -> Text Source #

indefiniteDet "dog"  == "a"
indefiniteDet "egg"  == "an"
indefiniteDet "ewe"  == "a"
indefiniteDet "ewok" == "an"
indefiniteDet "8th"  == "an"

wantsAn :: Text -> Bool Source #

True if the indefinite determiner for a word would normally be 'an' as opposed to 'a'.

acronymWantsAn :: Text -> Bool Source #

Variant of wantsAn that assumes the input string is pronounced one letter at a time.

wantsAn        "x-ray" == False
acronymWantsAn "x-ray" == True

Note that this won't do the right thing for words like "SCUBA". You really have to reserve it for those separate-letter acronyms.

Acronyms

looksLikeAcronym :: Text -> Bool Source #

True if all upper case from second letter and up.

looksLikeAcronym "DNA"  == True
looksLikeAcronym "tRNA" == True
looksLikeAcronym "x"    == False
looksLikeAcronym "DnA"  == False

startsWithAcronym :: Text -> Bool Source #

True if the first word (separating on either hyphen or space) looks like an acronym.

Sounds

hasSibilantSuffix :: Text -> Bool Source #

Ends with a 'sh' sound.

hasSemivowelPrefix :: Text -> Bool Source #

Starts with a semivowel.

hasVowel_U_Prefix :: Text -> Bool Source #

Starts with a vowel-y 'U' sound

hasCySuffix :: Text -> Bool Source #

Last two letters are a consonant and 'y'.

hasCoSuffix :: Text -> Bool Source #

Last two letters are a consonant and 'o'.

isVowel :: Char -> Bool Source #

Is a vowel.

isLetterWithInitialVowelSound :: Char -> Bool Source #

Letters that when pronounced independently in English sound like they begin with vowels.

isLetterWithInitialVowelSound 'r' == True
isLetterWithInitialVowelSound 'k' == False

(In the above, 'r' is pronounced "are", but 'k' is pronounced "kay".)

isConsonant :: Char -> Bool Source #

Is a consonant.