-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Simple scoring schemes for word alignments -- -- Provides a simple scoring scheme for word alignments. @package NaturalLanguageAlphabets @version 0.2.1.0 -- | This module defines a simple scoring scheme based on pairs of -- unigrams. module NLP.Scoring.Unigram -- | Score IBSs x and y based on the simple -- scoring system: (i) lookup (x,y) and use the score if found; (ii) if -- (x,y) is not in the database, then return the default matching -- defaultMatch score if x==y, otherwise return the -- default mismatch defaultMismatch score. Note that even though -- IBS k and IBS l have different types, mismatches are -- checked using the underlying Int representation. matchUnigram :: UnigramScoring k l -> IBS k -> IBS l -> Double -- | Provides a score for the unigram characters in an in/del -- environment. In case the character x in the pairing x == -- - is found in the unigramInsert database, that -- score is used, otherwise the gapLinear score is used. insertUnigramFstK :: UnigramScoring k l -> IBS k -> Double -- | Analog to insertUnigramSndL, but works on the IBS l -- with phantom type l. insertUnigramSndL :: UnigramScoring k l -> IBS l -> Double -- | Collect the hashtable and scalar values for simple scoring. -- -- TODO binary and cereal instances data UnigramScoring k l UnigramScoring :: !HashMap (IBS k, IBS l) Double -> !HashMap (IBS k) Double -> !HashMap (IBS l) Double -> !Double -> !Double -> !Double -> !Double -> !Double -> !Double -> !Double -> !Double -> UnigramScoring k l -- | All known matching characters and associated scores. [usUnigramMatch] :: UnigramScoring k l -> !HashMap (IBS k, IBS l) Double -- | Characters that can be deleted with costs different from -- gapOpen/gapExtension. This is the insertion map, -- associated with the first type k. [usUnigramInsertFstK] :: UnigramScoring k l -> !HashMap (IBS k) Double -- | Characters that can be deleted with costs different from -- gapOpen/gapExtension. This is the insertion map, -- associated with the second type l. [usUnigramInsertSndL] :: UnigramScoring k l -> !HashMap (IBS l) Double -- | linear gap scores [usGapLinear] :: UnigramScoring k l -> !Double -- | Gap opening costs for Gotoh-style grammars. [usGapOpen] :: UnigramScoring k l -> !Double -- | Gap extension costs for Gotoh-style grammars. [usGapExtension] :: UnigramScoring k l -> !Double -- | Default score for characters matching, i.e. x==y. [usDefaultMatch] :: UnigramScoring k l -> !Double -- | Default score for characters not matching, i.e. x/=y. [usDefaultMismatch] :: UnigramScoring k l -> !Double -- | Special gap score for a prefix or suffix. [usPrefixSuffixLinear] :: UnigramScoring k l -> !Double -- | Special gap opening score for a prefix or suffix. [usPrefixSuffixOpen] :: UnigramScoring k l -> !Double -- | Special gap extension score for a prefix or suffix. [usPrefixSuffixExtension] :: UnigramScoring k l -> !Double instance forall k1 (k2 :: k1) k3 (l :: k3). GHC.Generics.Generic (NLP.Scoring.Unigram.UnigramScoring k2 l) instance forall k1 (k2 :: k1) k3 (l :: k3). GHC.Classes.Eq (NLP.Scoring.Unigram.UnigramScoring k2 l) instance forall k1 (k2 :: k1) k3 (l :: k3). GHC.Show.Show (NLP.Scoring.Unigram.UnigramScoring k2 l) instance forall k1 (k2 :: k1) k3 (l :: k3). GHC.Read.Read (NLP.Scoring.Unigram.UnigramScoring k2 l) instance forall k1 k2 (k3 :: k2) (l :: k1). Data.Hashable.Class.Hashable (NLP.Scoring.Unigram.UnigramScoring k3 l) instance forall k1 k2 (k3 :: k2) (l :: k1). Data.Aeson.Types.FromJSON.FromJSON (NLP.Scoring.Unigram.UnigramScoring k3 l) instance forall k1 k2 (k3 :: k2) (l :: k1). Data.Aeson.Types.ToJSON.ToJSON (NLP.Scoring.Unigram.UnigramScoring k3 l) -- | TODO normalization of characters! (though it might be better to do -- this not in the importer, but a normalization function) module NLP.Scoring.Unigram.Import data Env Env :: !Seq Text -> !HashMap Text Double -> !HashMap Text (HashSet Text) -> !HashMap (Text, Text) Double -> !HashMap Text Double -> !HashMap Text Double -> Env [_warnings] :: Env -> !Seq Text [_defaults] :: Env -> !HashMap Text Double [_charGroups] :: Env -> !HashMap Text (HashSet Text) [_matchScores] :: Env -> !HashMap (Text, Text) Double [_ignoredScoresFstK] :: Env -> !HashMap Text Double [_ignoredScoresSndL] :: Env -> !HashMap Text Double warnings :: Lens' Env (Seq Text) matchScores :: Lens' Env (HashMap (Text, Text) Double) ignoredScoresSndL :: Lens' Env (HashMap Text Double) ignoredScoresFstK :: Lens' Env (HashMap Text Double) defaults :: Lens' Env (HashMap Text Double) charGroups :: Lens' Env (HashMap Text (HashSet Text)) defaultEnv :: Env test :: () => IO (Either ErrInfo (UnigramScoring k3 l)) -- | This will prettyprint the error message and ungracefully exit prettyErrorAndExit :: MonadIO m => ErrInfo -> m () -- | Returns the error message, but will not exit. errorToString :: ErrInfo -> String fromByteString :: ByteString -> String -> Except ErrInfo (UnigramScoring k l) fromFile :: Bool -> FilePath -> ExceptT ErrInfo IO (UnigramScoring k l) pUnigram :: UnigramParser (UnigramScoring k l) -- | Defaults are key-value pairs, of which there is only a small set. pDefaults :: UnigramParser () -- | Gives a name to a set of characters we want to work with later on. pCharGroup :: UnigramParser () -- | Parses a similarity line and updates the scores for the pairs of -- characters. pSimilarity :: UnigramParser () -- | Parses an equality line and updates the scores for the pairs of -- characters. pEquality :: UnigramParser () data FstKSndL FstK :: FstKSndL SndL :: FstKSndL pIgnored :: UnigramParser () -- | Defines what a grapheme is. Basically, don't be a whitespace and don't -- start with $. -- -- TODO we probably want to allow $ to stand for $. pGrapheme :: (CharParsing p, TokenParsing p) => p Text -- | Returns the set of characters from a known character group pKnownCharGroup :: Unlined UnigramParser (HashSet Text) -- | How we can expand a group with special functions. pExpansionOptions :: UnigramParser Text specialFunctions :: [(Text, Text -> Text)] applySpecialFunctions :: Foldable t => t Text -> HashSet Text -> HashSet Text -- | TODO only insert warning, not error, after seeing a character again! setIdent :: HashSet Text -> Unlined UnigramParser Text reserved :: TokenParsing m => IdentifierStyle m -- | This is just the trifecta parser, but with haskell-style comments -- enabled. newtype P a P :: Parser a -> P a [runP] :: P a -> Parser a type UnigramParser = StateT Env P instance Text.Parser.Combinators.Parsing NLP.Scoring.Unigram.Import.P instance Text.Parser.Char.CharParsing NLP.Scoring.Unigram.Import.P instance GHC.Base.Alternative NLP.Scoring.Unigram.Import.P instance GHC.Base.MonadPlus NLP.Scoring.Unigram.Import.P instance Text.Trifecta.Combinators.DeltaParsing NLP.Scoring.Unigram.Import.P instance GHC.Base.Functor NLP.Scoring.Unigram.Import.P instance GHC.Base.Monad NLP.Scoring.Unigram.Import.P instance GHC.Base.Applicative NLP.Scoring.Unigram.Import.P instance GHC.Classes.Ord NLP.Scoring.Unigram.Import.FstKSndL instance GHC.Classes.Eq NLP.Scoring.Unigram.Import.FstKSndL instance Text.Trifecta.Combinators.DeltaParsing (Text.Parser.Token.Unlined NLP.Scoring.Unigram.Import.UnigramParser) instance Text.Parser.Token.TokenParsing NLP.Scoring.Unigram.Import.P instance GHC.Show.Show NLP.Scoring.Unigram.Import.Env module NLP.Scoring.Unigram.Default -- | Default simple unigram scores for a system of consonants, liquid -- consonants, and vowels of arbitrary scale. clvDefaults :: () => UnigramScoring k3 l