Safe Haskell | None |
---|
Example usage:
>>>
dat <- loadData'
>>>
head $ guess dat "this is a teststring"
("en",0.49421052631578954)>>>
take 2 $ guess dat "dette er en teststreng"
[("no",0.5703030303030303),("da",0.5096969696969698)]>>>
head $ guess dat "lorem ipsum dolor sit amet"
("la",0.34199999999999997)
- type Trigram = (Char, Char, Char)
- type Frequency = Int
- type Rank = Int
- type Language = String
- threshold :: Int
- loadData :: FilePath -> IO (Map Language (Map Trigram Rank))
- loadData' :: IO (Map Language (Map Trigram Rank))
- guess :: Map Language (Map Trigram Rank) -> String -> [(Language, Double)]
- distance :: Map Trigram Rank -> Map Trigram Rank -> Double
- rank :: Map Trigram Frequency -> Map Trigram Rank
- parse :: String -> Map (Char, Char, Char) Frequency
- clean :: String -> String
Documentation
guess :: Map Language (Map Trigram Rank) -> String -> [(Language, Double)]Source
Guess the language of a string.
distance :: Map Trigram Rank -> Map Trigram Rank -> DoubleSource
Calculate distance between ranked trigram sets. Cavnar & Trenkle (1994)
rank :: Map Trigram Frequency -> Map Trigram RankSource
Convert a set of trigram frequencies to ranks.
Maximum of threshold
, uses alphabetical sort to break ties.