| Safe Haskell | None |
|---|
Language.Guess
Description
Example usage:
>>>dat <- loadData'>>>head $ guess dat "this is a teststring"("en",0.49421052631578954)>>>take 2 $ guess dat "dette er en teststreng"[("no",0.5703030303030303),("da",0.5096969696969698)]>>>head $ guess dat "lorem ipsum dolor sit amet"("la",0.34199999999999997)
- type Trigram = (Char, Char, Char)
- type Frequency = Int
- type Rank = Int
- type Language = String
- threshold :: Int
- loadData :: FilePath -> IO (Map Language (Map Trigram Rank))
- loadData' :: IO (Map Language (Map Trigram Rank))
- guess :: Map Language (Map Trigram Rank) -> String -> [(Language, Double)]
- distance :: Map Trigram Rank -> Map Trigram Rank -> Double
- rank :: Map Trigram Frequency -> Map Trigram Rank
- parse :: String -> Map (Char, Char, Char) Frequency
- clean :: String -> String
Documentation
guess :: Map Language (Map Trigram Rank) -> String -> [(Language, Double)]Source
Guess the language of a string.
distance :: Map Trigram Rank -> Map Trigram Rank -> DoubleSource
Calculate distance between ranked trigram sets. Cavnar & Trenkle (1994)
rank :: Map Trigram Frequency -> Map Trigram RankSource
Convert a set of trigram frequencies to ranks.
Maximum of threshold, uses alphabetical sort to break ties.