-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | Simple scoring schemes for word alignments
--
-- Provides a simple scoring scheme for word alignments.
@package NaturalLanguageAlphabets
@version 0.2.1.0
-- | This module defines a simple scoring scheme based on pairs of
-- unigrams.
module NLP.Scoring.Unigram
-- | Score IBSs x and y based on the simple
-- scoring system: (i) lookup (x,y) and use the score if found; (ii) if
-- (x,y) is not in the database, then return the default matching
-- defaultMatch score if x==y, otherwise return the
-- default mismatch defaultMismatch score. Note that even though
-- IBS k and IBS l have different types, mismatches are
-- checked using the underlying Int representation.
matchUnigram :: UnigramScoring k l -> IBS k -> IBS l -> Double
-- | Provides a score for the unigram characters in an in/del
-- environment. In case the character x in the pairing x ==
-- - is found in the unigramInsert database, that
-- score is used, otherwise the gapLinear score is used.
insertUnigramFstK :: UnigramScoring k l -> IBS k -> Double
-- | Analog to insertUnigramSndL, but works on the IBS l
-- with phantom type l.
insertUnigramSndL :: UnigramScoring k l -> IBS l -> Double
-- | Collect the hashtable and scalar values for simple scoring.
--
-- TODO binary and cereal instances
data UnigramScoring k l
UnigramScoring :: !HashMap (IBS k, IBS l) Double -> !HashMap (IBS k) Double -> !HashMap (IBS l) Double -> !Double -> !Double -> !Double -> !Double -> !Double -> !Double -> !Double -> !Double -> UnigramScoring k l
-- | All known matching characters and associated scores.
[usUnigramMatch] :: UnigramScoring k l -> !HashMap (IBS k, IBS l) Double
-- | Characters that can be deleted with costs different from
-- gapOpen/gapExtension. This is the insertion map,
-- associated with the first type k.
[usUnigramInsertFstK] :: UnigramScoring k l -> !HashMap (IBS k) Double
-- | Characters that can be deleted with costs different from
-- gapOpen/gapExtension. This is the insertion map,
-- associated with the second type l.
[usUnigramInsertSndL] :: UnigramScoring k l -> !HashMap (IBS l) Double
-- | linear gap scores
[usGapLinear] :: UnigramScoring k l -> !Double
-- | Gap opening costs for Gotoh-style grammars.
[usGapOpen] :: UnigramScoring k l -> !Double
-- | Gap extension costs for Gotoh-style grammars.
[usGapExtension] :: UnigramScoring k l -> !Double
-- | Default score for characters matching, i.e. x==y.
[usDefaultMatch] :: UnigramScoring k l -> !Double
-- | Default score for characters not matching, i.e. x/=y.
[usDefaultMismatch] :: UnigramScoring k l -> !Double
-- | Special gap score for a prefix or suffix.
[usPrefixSuffixLinear] :: UnigramScoring k l -> !Double
-- | Special gap opening score for a prefix or suffix.
[usPrefixSuffixOpen] :: UnigramScoring k l -> !Double
-- | Special gap extension score for a prefix or suffix.
[usPrefixSuffixExtension] :: UnigramScoring k l -> !Double
instance forall k1 (k2 :: k1) k3 (l :: k3). GHC.Generics.Generic (NLP.Scoring.Unigram.UnigramScoring k2 l)
instance forall k1 (k2 :: k1) k3 (l :: k3). GHC.Classes.Eq (NLP.Scoring.Unigram.UnigramScoring k2 l)
instance forall k1 (k2 :: k1) k3 (l :: k3). GHC.Show.Show (NLP.Scoring.Unigram.UnigramScoring k2 l)
instance forall k1 (k2 :: k1) k3 (l :: k3). GHC.Read.Read (NLP.Scoring.Unigram.UnigramScoring k2 l)
instance forall k1 k2 (k3 :: k2) (l :: k1). Data.Hashable.Class.Hashable (NLP.Scoring.Unigram.UnigramScoring k3 l)
instance forall k1 k2 (k3 :: k2) (l :: k1). Data.Aeson.Types.FromJSON.FromJSON (NLP.Scoring.Unigram.UnigramScoring k3 l)
instance forall k1 k2 (k3 :: k2) (l :: k1). Data.Aeson.Types.ToJSON.ToJSON (NLP.Scoring.Unigram.UnigramScoring k3 l)
-- | TODO normalization of characters! (though it might be better to do
-- this not in the importer, but a normalization function)
module NLP.Scoring.Unigram.Import
data Env
Env :: !Seq Text -> !HashMap Text Double -> !HashMap Text (HashSet Text) -> !HashMap (Text, Text) Double -> !HashMap Text Double -> !HashMap Text Double -> Env
[_warnings] :: Env -> !Seq Text
[_defaults] :: Env -> !HashMap Text Double
[_charGroups] :: Env -> !HashMap Text (HashSet Text)
[_matchScores] :: Env -> !HashMap (Text, Text) Double
[_ignoredScoresFstK] :: Env -> !HashMap Text Double
[_ignoredScoresSndL] :: Env -> !HashMap Text Double
warnings :: Lens' Env (Seq Text)
matchScores :: Lens' Env (HashMap (Text, Text) Double)
ignoredScoresSndL :: Lens' Env (HashMap Text Double)
ignoredScoresFstK :: Lens' Env (HashMap Text Double)
defaults :: Lens' Env (HashMap Text Double)
charGroups :: Lens' Env (HashMap Text (HashSet Text))
defaultEnv :: Env
test :: () => IO (Either ErrInfo (UnigramScoring k3 l))
-- | This will prettyprint the error message and ungracefully exit
prettyErrorAndExit :: MonadIO m => ErrInfo -> m ()
-- | Returns the error message, but will not exit.
errorToString :: ErrInfo -> String
fromByteString :: ByteString -> String -> Except ErrInfo (UnigramScoring k l)
fromFile :: Bool -> FilePath -> ExceptT ErrInfo IO (UnigramScoring k l)
pUnigram :: UnigramParser (UnigramScoring k l)
-- | Defaults are key-value pairs, of which there is only a small set.
pDefaults :: UnigramParser ()
-- | Gives a name to a set of characters we want to work with later on.
pCharGroup :: UnigramParser ()
-- | Parses a similarity line and updates the scores for the pairs of
-- characters.
pSimilarity :: UnigramParser ()
-- | Parses an equality line and updates the scores for the pairs of
-- characters.
pEquality :: UnigramParser ()
data FstKSndL
FstK :: FstKSndL
SndL :: FstKSndL
pIgnored :: UnigramParser ()
-- | Defines what a grapheme is. Basically, don't be a whitespace and don't
-- start with $.
--
-- TODO we probably want to allow $ to stand for $.
pGrapheme :: (CharParsing p, TokenParsing p) => p Text
-- | Returns the set of characters from a known character group
pKnownCharGroup :: Unlined UnigramParser (HashSet Text)
-- | How we can expand a group with special functions.
pExpansionOptions :: UnigramParser Text
specialFunctions :: [(Text, Text -> Text)]
applySpecialFunctions :: Foldable t => t Text -> HashSet Text -> HashSet Text
-- | TODO only insert warning, not error, after seeing a character again!
setIdent :: HashSet Text -> Unlined UnigramParser Text
reserved :: TokenParsing m => IdentifierStyle m
-- | This is just the trifecta parser, but with haskell-style comments
-- enabled.
newtype P a
P :: Parser a -> P a
[runP] :: P a -> Parser a
type UnigramParser = StateT Env P
instance Text.Parser.Combinators.Parsing NLP.Scoring.Unigram.Import.P
instance Text.Parser.Char.CharParsing NLP.Scoring.Unigram.Import.P
instance GHC.Base.Alternative NLP.Scoring.Unigram.Import.P
instance GHC.Base.MonadPlus NLP.Scoring.Unigram.Import.P
instance Text.Trifecta.Combinators.DeltaParsing NLP.Scoring.Unigram.Import.P
instance GHC.Base.Functor NLP.Scoring.Unigram.Import.P
instance GHC.Base.Monad NLP.Scoring.Unigram.Import.P
instance GHC.Base.Applicative NLP.Scoring.Unigram.Import.P
instance GHC.Classes.Ord NLP.Scoring.Unigram.Import.FstKSndL
instance GHC.Classes.Eq NLP.Scoring.Unigram.Import.FstKSndL
instance Text.Trifecta.Combinators.DeltaParsing (Text.Parser.Token.Unlined NLP.Scoring.Unigram.Import.UnigramParser)
instance Text.Parser.Token.TokenParsing NLP.Scoring.Unigram.Import.P
instance GHC.Show.Show NLP.Scoring.Unigram.Import.Env
module NLP.Scoring.Unigram.Default
-- | Default simple unigram scores for a system of consonants, liquid
-- consonants, and vowels of arbitrary scale.
clvDefaults :: () => UnigramScoring k3 l