-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | Alphabet and word representations
--
@package NaturalLanguageAlphabets
@version 0.0.1.0
-- | An alphabet, where each character is a short bytestring.
--
-- Due to the overhead this incurs, we use ShortByteStrings
-- internally. We also provide an Interned instance to further
-- reduce overhead using hash-consing.
module NLP.Alphabet.MultiChar
-- | Interns a MultiChar character.
internMultiChar :: MultiChar -> MultiChar
-- | Wrap a short bytestring. Read and Show instances behave like for
-- normal strings.
newtype MultiChar
MultiChar :: Text -> MultiChar
getMultiChar :: MultiChar -> Text
-- | Interned MultiChar.
--
-- TODO Check Ord instance. We compare on
-- uninternMultiChar.
data InternedMultiChar
InternedMultiChar :: {-# UNPACK #-} !Id -> {-# UNPACK #-} !MultiChar -> InternedMultiChar
internedMultiCharId :: InternedMultiChar -> {-# UNPACK #-} !Id
uninternMultiChar :: InternedMultiChar -> {-# UNPACK #-} !MultiChar
imcCache :: Cache InternedMultiChar
instance Typeable MultiChar
instance Typeable InternedMultiChar
instance Eq MultiChar
instance Ord MultiChar
instance Generic MultiChar
instance Data MultiChar
instance Generic InternedMultiChar
instance Data InternedMultiChar
instance Eq (Description InternedMultiChar)
instance Hashable (Description InternedMultiChar)
instance Datatype D1MultiChar
instance Constructor C1_0MultiChar
instance Selector S1_0_0MultiChar
instance Datatype D1InternedMultiChar
instance Constructor C1_0InternedMultiChar
instance Selector S1_0_0InternedMultiChar
instance Selector S1_0_1InternedMultiChar
instance NFData InternedMultiChar
instance Stringable InternedMultiChar
instance Interned InternedMultiChar
instance Hashable InternedMultiChar
instance Show InternedMultiChar
instance Read InternedMultiChar
instance Ord InternedMultiChar
instance Eq InternedMultiChar
instance IsString InternedMultiChar
instance NFData MultiChar
instance Stringable MultiChar
instance IsString MultiChar
instance Hashable MultiChar
instance Read MultiChar
instance Show MultiChar
-- | This module keeps a persistent bimap between
-- InternedMultiChars and Ints
--
-- TODO make this a bimap Text - Vector. Compare
-- performance when printing backtracking results. (Do this after the
-- Builder-based backtracking is online)
module NLP.Alphabet.IMMC.Internal
immcBimap :: IORef (Bimap InternedMultiChar Int)
-- | Add InternedMultiChar and return Int key. Will
-- return key for existing string and thereby serves for lookup in
-- left-to-right direction.
immcBimapAdd :: InternedMultiChar -> Int
-- | Lookup the InternedMultiChar based on an Int key.
-- Unsafe totality assumption.
immcBimapLookupInt :: Int -> InternedMultiChar
-- | An implementation of Int-mapped MultiChars with
-- internalization.
module NLP.Alphabet.IMMC
newtype IMMC
IMMC :: Int -> IMMC
getIMMC :: IMMC -> Int
immc :: InternedMultiChar -> IMMC
instance NFData IMMC
instance Stringable IMMC
instance Hashable IMMC
instance Read IMMC
instance Show IMMC
instance IsString IMMC
instance Ord IMMC
instance Vector Vector IMMC
instance MVector MVector IMMC
instance Unbox IMMC
instance Eq IMMC
instance Generic IMMC
instance Datatype D1IMMC
instance Constructor C1_0IMMC
instance Selector S1_0_0IMMC
-- | This module defines a simple scoring scheme based on pairs of
-- unigrams.
module NLP.Scoring.SimpleUnigram
-- | Score MultiChars x and y based on the
-- simple scoring system: (i) lookup (x,y) and use the score if found;
-- (ii) if (x,y) is not in the database, then return the default matching
-- defMatch score if x==y, otherwise return the default
-- mismatch defMismatch score.
scoreUnigram :: SimpleScoring -> IMMC -> IMMC -> Double
-- | Collect the hashtable and scalar values for simple scoring.
data SimpleScoring
SimpleScoring :: !(BasicHashTable (IMMC, IMMC) Double) -> !Double -> !Double -> !Double -> !Double -> !Double -> SimpleScoring
simpleScore :: SimpleScoring -> !(BasicHashTable (IMMC, IMMC) Double)
gapScore :: SimpleScoring -> !Double
gapOpen :: SimpleScoring -> !Double
gapExtend :: SimpleScoring -> !Double
defMatch :: SimpleScoring -> !Double
defMismatch :: SimpleScoring -> !Double
instance Show SimpleScoring
module NLP.Scoring.SimpleUnigram.Import
-- | Each parsed line gives a set of characters, or tells us a score.
--
-- TODO add LPimport which starts a recursive import (note:
-- start by storing the hash or whatever of the file to be imported so
-- that we can comment on circular imports)
data ParsedLine
PLset :: Text -> [IMMC] -> ParsedLine
PLeq :: Text -> Double -> ParsedLine
PLeqset :: Text -> [IMMC] -> ParsedLine
PLinset :: Text -> Text -> Double -> ParsedLine
PLgap :: Double -> ParsedLine
PLgapopen :: Double -> ParsedLine
PLgapextend :: Double -> ParsedLine
PLdefmatch :: Double -> ParsedLine
PLdefmismatch :: Double -> ParsedLine
PLcomment :: Text -> ParsedLine
-- | Here we simple parse individual lines.
parseLine :: Text -> ParsedLine
-- | Parses a bytestring to create a simple scoring. We don't do much error
-- checking, many of the bindings below will easily fail.
--
-- TODO obviously: implement error-checking
genSimpleScoring :: Text -> SimpleScoring
-- | parse a simple scoring file.
simpleScoreFromFile :: FilePath -> IO SimpleScoring
instance Show ParsedLine
instance Eq ParsedLine
instance Ord ParsedLine
module NLP.Scoring.SimpleUnigram.Default
-- | Default simple unigram scores for a system of consonants, liquid
-- consonants, and vowels of arbitrary scale.
clvDefaults :: SimpleScoring