-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Alphabet and word representations -- @package NaturalLanguageAlphabets @version 0.0.1.0 -- | An alphabet, where each character is a short bytestring. -- -- Due to the overhead this incurs, we use ShortByteStrings -- internally. We also provide an Interned instance to further -- reduce overhead using hash-consing. module NLP.Alphabet.MultiChar -- | Interns a MultiChar character. internMultiChar :: MultiChar -> MultiChar -- | Wrap a short bytestring. Read and Show instances behave like for -- normal strings. newtype MultiChar MultiChar :: Text -> MultiChar getMultiChar :: MultiChar -> Text -- | Interned MultiChar. -- -- TODO Check Ord instance. We compare on -- uninternMultiChar. data InternedMultiChar InternedMultiChar :: {-# UNPACK #-} !Id -> {-# UNPACK #-} !MultiChar -> InternedMultiChar internedMultiCharId :: InternedMultiChar -> {-# UNPACK #-} !Id uninternMultiChar :: InternedMultiChar -> {-# UNPACK #-} !MultiChar imcCache :: Cache InternedMultiChar instance Typeable MultiChar instance Typeable InternedMultiChar instance Eq MultiChar instance Ord MultiChar instance Generic MultiChar instance Data MultiChar instance Generic InternedMultiChar instance Data InternedMultiChar instance Eq (Description InternedMultiChar) instance Hashable (Description InternedMultiChar) instance Datatype D1MultiChar instance Constructor C1_0MultiChar instance Selector S1_0_0MultiChar instance Datatype D1InternedMultiChar instance Constructor C1_0InternedMultiChar instance Selector S1_0_0InternedMultiChar instance Selector S1_0_1InternedMultiChar instance NFData InternedMultiChar instance Stringable InternedMultiChar instance Interned InternedMultiChar instance Hashable InternedMultiChar instance Show InternedMultiChar instance Read InternedMultiChar instance Ord InternedMultiChar instance Eq InternedMultiChar instance IsString InternedMultiChar instance NFData MultiChar instance Stringable MultiChar instance IsString MultiChar instance Hashable MultiChar instance Read MultiChar instance Show MultiChar -- | This module keeps a persistent bimap between -- InternedMultiChars and Ints -- -- TODO make this a bimap Text - Vector. Compare -- performance when printing backtracking results. (Do this after the -- Builder-based backtracking is online) module NLP.Alphabet.IMMC.Internal immcBimap :: IORef (Bimap InternedMultiChar Int) -- | Add InternedMultiChar and return Int key. Will -- return key for existing string and thereby serves for lookup in -- left-to-right direction. immcBimapAdd :: InternedMultiChar -> Int -- | Lookup the InternedMultiChar based on an Int key. -- Unsafe totality assumption. immcBimapLookupInt :: Int -> InternedMultiChar -- | An implementation of Int-mapped MultiChars with -- internalization. module NLP.Alphabet.IMMC newtype IMMC IMMC :: Int -> IMMC getIMMC :: IMMC -> Int immc :: InternedMultiChar -> IMMC instance NFData IMMC instance Stringable IMMC instance Hashable IMMC instance Read IMMC instance Show IMMC instance IsString IMMC instance Ord IMMC instance Vector Vector IMMC instance MVector MVector IMMC instance Unbox IMMC instance Eq IMMC instance Generic IMMC instance Datatype D1IMMC instance Constructor C1_0IMMC instance Selector S1_0_0IMMC -- | This module defines a simple scoring scheme based on pairs of -- unigrams. module NLP.Scoring.SimpleUnigram -- | Score MultiChars x and y based on the -- simple scoring system: (i) lookup (x,y) and use the score if found; -- (ii) if (x,y) is not in the database, then return the default matching -- defMatch score if x==y, otherwise return the default -- mismatch defMismatch score. scoreUnigram :: SimpleScoring -> IMMC -> IMMC -> Double -- | Collect the hashtable and scalar values for simple scoring. data SimpleScoring SimpleScoring :: !(BasicHashTable (IMMC, IMMC) Double) -> !Double -> !Double -> !Double -> !Double -> !Double -> SimpleScoring simpleScore :: SimpleScoring -> !(BasicHashTable (IMMC, IMMC) Double) gapScore :: SimpleScoring -> !Double gapOpen :: SimpleScoring -> !Double gapExtend :: SimpleScoring -> !Double defMatch :: SimpleScoring -> !Double defMismatch :: SimpleScoring -> !Double instance Show SimpleScoring module NLP.Scoring.SimpleUnigram.Import -- | Each parsed line gives a set of characters, or tells us a score. -- -- TODO add LPimport which starts a recursive import (note: -- start by storing the hash or whatever of the file to be imported so -- that we can comment on circular imports) data ParsedLine PLset :: Text -> [IMMC] -> ParsedLine PLeq :: Text -> Double -> ParsedLine PLeqset :: Text -> [IMMC] -> ParsedLine PLinset :: Text -> Text -> Double -> ParsedLine PLgap :: Double -> ParsedLine PLgapopen :: Double -> ParsedLine PLgapextend :: Double -> ParsedLine PLdefmatch :: Double -> ParsedLine PLdefmismatch :: Double -> ParsedLine PLcomment :: Text -> ParsedLine -- | Here we simple parse individual lines. parseLine :: Text -> ParsedLine -- | Parses a bytestring to create a simple scoring. We don't do much error -- checking, many of the bindings below will easily fail. -- -- TODO obviously: implement error-checking genSimpleScoring :: Text -> SimpleScoring -- | parse a simple scoring file. simpleScoreFromFile :: FilePath -> IO SimpleScoring instance Show ParsedLine instance Eq ParsedLine instance Ord ParsedLine module NLP.Scoring.SimpleUnigram.Default -- | Default simple unigram scores for a system of consonants, liquid -- consonants, and vowels of arbitrary scale. clvDefaults :: SimpleScoring