Copyright | 2012 Eric Kow (Computational Linguistics Ltd.) |
---|---|
License | BSD3 |
Maintainer | eric.kow@gmail.com |
Stability | experimental |
Portability | portable |
Safe Haskell | Safe-Inferred |
Language | Haskell98 |
Simple default rules for English morphology
- commas :: Text -> [Text] -> Text
- cardinal :: Int -> Text
- ordinalNotSpelled :: Int -> Text
- ordinal :: Int -> Text
- defaultNounPlural :: Text -> Text
- defaultVerbStuff :: Text -> (Text, Text)
- defaultPossesive :: Text -> Text
- anNumerals :: [Text]
- indefiniteDet :: Text -> Text
- wantsAn :: Text -> Bool
- acronymWantsAn :: Text -> Bool
- looksLikeAcronym :: Text -> Bool
- startsWithAcronym :: Text -> Bool
- hasSibilantSuffix :: Text -> Bool
- hasSemivowelPrefix :: Text -> Bool
- hasVowel_U_Prefix :: Text -> Bool
- hasCySuffix :: Text -> Bool
- hasCoSuffix :: Text -> Bool
- isVowel :: Char -> Bool
- isLetterWithInitialVowelSound :: Char -> Bool
- isConsonant :: Char -> Bool
Punctuation
commas :: Text -> [Text] -> Text Source
No Oxford commas, alas.
commas "and" "foo bar" == "foo and bar" commas "and" "foo, bar, baz" == "foo, bar and baz"
Numbers
cardinal :: Int -> Text Source
cardinal 1 == "one" cardinal 2 == "two" cardinal 3 == "three" cardinal 11 == "11"
ordinalNotSpelled :: Int -> Text Source
ordinalNotSpelled 1 == "1st" ordinalNotSpelled 2 == "2nd" ordinalNotSpelled 11 == "11th"
ordinal 1 == "first" ordinal 2 == "second" ordinal 3 == "third" ordinal 11 == "11th" ordinal 42 == "42nd"
Nouns and verbs
defaultNounPlural :: Text -> Text Source
Heuristics for English plural for an unknown noun.
defaultNounPlural "egg" == "eggs" defaultNounPlural "patch" == "patches" defaultNounPlural "boy" == "boys" defaultNounPlural "spy" == "spies" defaultNounPlural "thesis" == "theses"
http://www.paulnoll.com/Books/Clear-English/English-plurals-1.html http://en.wikipedia.org/wiki/English_plural
defaultVerbStuff :: Text -> (Text, Text) Source
Heuristics for 3rd person singular and past participle for an unknown regular verb. Doubling of final consonants can be handled via a table of (partially) irrefular verbs.
defaultVerbStuff "walk" == ("walks", "walked") defaultVerbStuff "push" == ("pushes", "pushed") defaultVerbStuff "play" == ("plays", "played") defaultVerbStuff "cry" == ("cries", "cried")
defaultPossesive :: Text -> Text Source
Heuristics for a possesive form for an unknown noun.
defaultPossesive "pass" == "pass'" defaultPossesive "SOS" == "SOS'" defaultPossesive "Mr Blinkin'" == "Mr Blinkin's" defaultPossesive "cry" == "cry's"
Determiners
anNumerals :: [Text] Source
indefiniteDet :: Text -> Text Source
indefiniteDet "dog" == "a" indefiniteDet "egg" == "an" indefiniteDet "ewe" == "a" indefiniteDet "ewok" == "an" indefiniteDet "8th" == "an"
wantsAn :: Text -> Bool Source
True if the indefinite determiner for a word would normally be
an
as opposed to a
.
acronymWantsAn :: Text -> Bool Source
Acronyms
looksLikeAcronym :: Text -> Bool Source
True if all upper case from second letter and up.
looksLikeAcronym "DNA" == True looksLikeAcronym "tRNA" == True looksLikeAcronym "x" == False looksLikeAcronym "DnA" == False
startsWithAcronym :: Text -> Bool Source
True if the first word (separating on either - or space) looks like an acronym.
Sounds
hasSibilantSuffix :: Text -> Bool Source
Ends with a sh sound.
hasSemivowelPrefix :: Text -> Bool Source
Starts with a semivowel.
hasVowel_U_Prefix :: Text -> Bool Source
Starts with a vowel-y U sound
hasCySuffix :: Text -> Bool Source
Last two letters are a consonant and y
.
hasCoSuffix :: Text -> Bool Source
Last two letters are a consonant and o
.
isLetterWithInitialVowelSound :: Char -> Bool Source
Letters that when pronounced independently in English sound like they begin with vowels.
isLetterWithInitialVowelSound 'r' == True isLetterWithInitialVowelSound 'k' == False
(In the above,
is pronounced r
"are"
, but
is pronounced
k
"kay"
.)
isConsonant :: Char -> Bool Source
Is a consonant.