-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | Alphabet and word representations
--
-- Provides different encoding for characters and words in natural
-- language processing. A character will often be encoded as a unicode
-- text string as we deal with multi-symbol characters.
--
-- Internal encoding of IMMC symbols are 0-based integers, which allows
-- for the use of unboxed containers.
--
-- A very simple unigram-based scoring scheme and DSL to write such
-- schemes are also provided.
--
--
-- https://github.com/choener/NaturalLanguageAlphabets/blob/master/README.md
@package NaturalLanguageAlphabets
@version 0.0.2.0
-- | An alphabet, where each character is a short piece of Text.
module NLP.Alphabet.MultiChar
-- | Interns a MultiChar character.
internMultiChar :: MultiChar -> MultiChar
-- | Wrap a short bytestring. Read and Show instances behave like for
-- normal strings.
newtype MultiChar
MultiChar :: Text -> MultiChar
[getMultiChar] :: MultiChar -> Text
-- | Interned MultiChar.
--
-- TODO Check Ord instance. We compare on
-- uninternMultiChar.
data InternedMultiChar
InternedMultiChar :: {-# UNPACK #-} !Id -> {-# UNPACK #-} !MultiChar -> InternedMultiChar
[internedMultiCharId] :: InternedMultiChar -> {-# UNPACK #-} !Id
[uninternMultiChar] :: InternedMultiChar -> {-# UNPACK #-} !MultiChar
imcCache :: Cache InternedMultiChar
instance GHC.Generics.Selector NLP.Alphabet.MultiChar.S1_0_1InternedMultiChar
instance GHC.Generics.Selector NLP.Alphabet.MultiChar.S1_0_0InternedMultiChar
instance GHC.Generics.Constructor NLP.Alphabet.MultiChar.C1_0InternedMultiChar
instance GHC.Generics.Datatype NLP.Alphabet.MultiChar.D1InternedMultiChar
instance GHC.Generics.Selector NLP.Alphabet.MultiChar.S1_0_0MultiChar
instance GHC.Generics.Constructor NLP.Alphabet.MultiChar.C1_0MultiChar
instance GHC.Generics.Datatype NLP.Alphabet.MultiChar.D1MultiChar
instance Data.Hashable.Class.Hashable (Data.Interned.Internal.Description NLP.Alphabet.MultiChar.InternedMultiChar)
instance GHC.Classes.Eq (Data.Interned.Internal.Description NLP.Alphabet.MultiChar.InternedMultiChar)
instance Data.Data.Data NLP.Alphabet.MultiChar.InternedMultiChar
instance GHC.Generics.Generic NLP.Alphabet.MultiChar.InternedMultiChar
instance Data.Data.Data NLP.Alphabet.MultiChar.MultiChar
instance GHC.Generics.Generic NLP.Alphabet.MultiChar.MultiChar
instance GHC.Classes.Ord NLP.Alphabet.MultiChar.MultiChar
instance GHC.Classes.Eq NLP.Alphabet.MultiChar.MultiChar
instance GHC.Show.Show NLP.Alphabet.MultiChar.MultiChar
instance GHC.Read.Read NLP.Alphabet.MultiChar.MultiChar
instance Data.Hashable.Class.Hashable NLP.Alphabet.MultiChar.MultiChar
instance Data.String.IsString NLP.Alphabet.MultiChar.MultiChar
instance Data.Stringable.Stringable NLP.Alphabet.MultiChar.MultiChar
instance Control.DeepSeq.NFData NLP.Alphabet.MultiChar.MultiChar
instance Data.String.IsString NLP.Alphabet.MultiChar.InternedMultiChar
instance GHC.Classes.Eq NLP.Alphabet.MultiChar.InternedMultiChar
instance GHC.Classes.Ord NLP.Alphabet.MultiChar.InternedMultiChar
instance GHC.Read.Read NLP.Alphabet.MultiChar.InternedMultiChar
instance GHC.Show.Show NLP.Alphabet.MultiChar.InternedMultiChar
instance Data.Hashable.Class.Hashable NLP.Alphabet.MultiChar.InternedMultiChar
instance Data.Interned.Internal.Interned NLP.Alphabet.MultiChar.InternedMultiChar
instance Data.Stringable.Stringable NLP.Alphabet.MultiChar.InternedMultiChar
instance Control.DeepSeq.NFData NLP.Alphabet.MultiChar.InternedMultiChar
-- | This module keeps a persistent bimap between
-- InternedMultiChars and Ints
--
-- TODO make this a bimap Text - Vector. Compare
-- performance when printing backtracking results. (Do this after the
-- Builder-based backtracking is online)
module NLP.Alphabet.IMMC.Internal
immcBimap :: IORef (Bimap InternedMultiChar Int)
-- | Add InternedMultiChar and return Int key. Will
-- return key for existing string and thereby serves for lookup in
-- left-to-right direction.
immcBimapAdd :: InternedMultiChar -> Int
-- | Lookup the InternedMultiChar based on an Int key.
-- Unsafe totality assumption.
immcBimapLookupInt :: Int -> InternedMultiChar
-- | An implementation of Int-mapped MultiChars with
-- internalization.
module NLP.Alphabet.IMMC
newtype IMMC
IMMC :: Int -> IMMC
[getIMMC] :: IMMC -> Int
immc :: InternedMultiChar -> IMMC
instance Data.Vector.Unboxed.Base.Unbox NLP.Alphabet.IMMC.IMMC
instance Data.Vector.Generic.Mutable.Base.MVector Data.Vector.Unboxed.Base.MVector NLP.Alphabet.IMMC.IMMC
instance Data.Vector.Generic.Base.Vector Data.Vector.Unboxed.Base.Vector NLP.Alphabet.IMMC.IMMC
instance GHC.Classes.Ord NLP.Alphabet.IMMC.IMMC
instance Data.String.IsString NLP.Alphabet.IMMC.IMMC
instance GHC.Show.Show NLP.Alphabet.IMMC.IMMC
instance GHC.Read.Read NLP.Alphabet.IMMC.IMMC
instance Data.Hashable.Class.Hashable NLP.Alphabet.IMMC.IMMC
instance Data.Stringable.Stringable NLP.Alphabet.IMMC.IMMC
instance Control.DeepSeq.NFData NLP.Alphabet.IMMC.IMMC
instance Data.Binary.Class.Binary NLP.Alphabet.IMMC.IMMC
instance Data.Serialize.Serialize NLP.Alphabet.IMMC.IMMC
instance Data.Aeson.Types.Class.FromJSON NLP.Alphabet.IMMC.IMMC
instance Data.Aeson.Types.Class.ToJSON NLP.Alphabet.IMMC.IMMC
instance GHC.Generics.Selector NLP.Alphabet.IMMC.S1_0_0IMMC
instance GHC.Generics.Constructor NLP.Alphabet.IMMC.C1_0IMMC
instance GHC.Generics.Datatype NLP.Alphabet.IMMC.D1IMMC
instance GHC.Generics.Generic NLP.Alphabet.IMMC.IMMC
instance GHC.Classes.Eq NLP.Alphabet.IMMC.IMMC
-- | This module defines a simple scoring scheme based on pairs of
-- unigrams.
module NLP.Scoring.SimpleUnigram
-- | Score MultiChars x and y based on the
-- simple scoring system: (i) lookup (x,y) and use the score if found;
-- (ii) if (x,y) is not in the database, then return the default matching
-- defMatch score if x==y, otherwise return the default
-- mismatch defMismatch score.
scoreUnigram :: SimpleScoring -> IMMC -> IMMC -> Double
-- | Collect the hashtable and scalar values for simple scoring.
data SimpleScoring
SimpleScoring :: !(BasicHashTable (IMMC, IMMC) Double) -> !Double -> !Double -> !Double -> !Double -> !Double -> SimpleScoring
[simpleScore] :: SimpleScoring -> !(BasicHashTable (IMMC, IMMC) Double)
[gapScore] :: SimpleScoring -> !Double
[gapOpen] :: SimpleScoring -> !Double
[gapExtend] :: SimpleScoring -> !Double
[defMatch] :: SimpleScoring -> !Double
[defMismatch] :: SimpleScoring -> !Double
instance GHC.Show.Show NLP.Scoring.SimpleUnigram.SimpleScoring
module NLP.Scoring.SimpleUnigram.Import
-- | Each parsed line gives a set of characters, or tells us a score.
--
-- TODO add LPimport which starts a recursive import (note:
-- start by storing the hash or whatever of the file to be imported so
-- that we can comment on circular imports)
data ParsedLine
PLset :: Text -> [IMMC] -> ParsedLine
PLeq :: Text -> Double -> ParsedLine
PLeqset :: Text -> [IMMC] -> ParsedLine
PLinset :: Text -> Text -> Double -> ParsedLine
PLgap :: Double -> ParsedLine
PLgapopen :: Double -> ParsedLine
PLgapextend :: Double -> ParsedLine
PLdefmatch :: Double -> ParsedLine
PLdefmismatch :: Double -> ParsedLine
PLcomment :: Text -> ParsedLine
-- | Here we simple parse individual lines.
parseLine :: Text -> ParsedLine
-- | Parses a bytestring to create a simple scoring. We don't do much error
-- checking, many of the bindings below will easily fail.
--
-- TODO obviously: implement error-checking
genSimpleScoring :: Text -> SimpleScoring
-- | parse a simple scoring file.
simpleScoreFromFile :: FilePath -> IO SimpleScoring
instance GHC.Classes.Ord NLP.Scoring.SimpleUnigram.Import.ParsedLine
instance GHC.Classes.Eq NLP.Scoring.SimpleUnigram.Import.ParsedLine
instance GHC.Show.Show NLP.Scoring.SimpleUnigram.Import.ParsedLine
module NLP.Scoring.SimpleUnigram.Default
-- | Default simple unigram scores for a system of consonants, liquid
-- consonants, and vowels of arbitrary scale.
clvDefaults :: SimpleScoring