Safe Haskell	None
Language	Haskell2010

NLP.ML.AvgPerceptron

Description

Average Perceptron implementation of Part of speech tagging, adapted for Haskell from this python implementation, which is described on the blog post:

http://honnibal.wordpress.com/2013/09/11/a-good-part-of-speechpos-tagger-in-about-200-lines-of-python/

The Perceptron code can be found on github:

https://github.com/sloria/TextBlob/blob/dev/text/_perceptron.py

Synopsis

Documentation

data Perceptron Source #

The perceptron model.

Constructors

Perceptron

Fields

weights :: Map Feature (Map Class Weight)
Each feature gets its own weight vector, so weights is a dict-of-dicts
totals :: Map (Feature, Class) Weight
The accumulated values, for the averaging. These will be keyed by feature/clas tuples
tstamps :: Map (Feature, Class) Int
The last time the feature was changed, for the averaging. Also keyed by feature/clas tuples (tstamps is short for timestamps)
instances :: Int
Number of instances seen

Instances

Eq Perceptron Source #
Methods (==) :: Perceptron -> Perceptron -> Bool # (/=) :: Perceptron -> Perceptron -> Bool #
Read Perceptron Source #
Methods readsPrec :: Int -> ReadS Perceptron # readList :: ReadS [Perceptron] # readPrec :: ReadPrec Perceptron # readListPrec :: ReadPrec [Perceptron] #
Show Perceptron Source #
Methods showsPrec :: Int -> Perceptron -> ShowS # show :: Perceptron -> String # showList :: [Perceptron] -> ShowS #
Generic Perceptron Source #
Associated Types type Rep Perceptron :: * -> * # Methods from :: Perceptron -> Rep Perceptron x # to :: Rep Perceptron x -> Perceptron #
Serialize Perceptron Source #
Methods put :: Putter Perceptron # get :: Get Perceptron #
NFData Perceptron Source #
Methods rnf :: Perceptron -> () #
type Rep Perceptron Source #
type Rep Perceptron = D1 (MetaData "Perceptron" "NLP.ML.AvgPerceptron" "chatter-0.9.1.0-CnWxxDeMROyIxVsZb3fGkc" False) (C1 (MetaCons "Perceptron" PrefixI True) ((::) ((::) (S1 (MetaSel (Just Symbol "weights") NoSourceUnpackedness NoSourceStrictness DecidedLazy) (Rec0 (Map Feature (Map Class Weight)))) (S1 (MetaSel (Just Symbol "totals") NoSourceUnpackedness NoSourceStrictness DecidedLazy) (Rec0 (Map (Feature, Class) Weight)))) ((:*:) (S1 (MetaSel (Just Symbol "tstamps") NoSourceUnpackedness NoSourceStrictness DecidedLazy) (Rec0 (Map (Feature, Class) Int))) (S1 (MetaSel (Just Symbol "instances") NoSourceUnpackedness NoSourceStrictness DecidedLazy) (Rec0 Int)))))

newtype Class Source #

The classes that the perceptron assigns are represnted with a newtype-wrapped String.

Eventually, I think this should become a typeclass, so the classes can be defined by the users of the Perceptron (such as custom POS tag ADTs, or more complex classes).

Constructors

Class String

Instances

Eq Class Source #
Methods (==) :: Class -> Class -> Bool # (/=) :: Class -> Class -> Bool #
Ord Class Source #
Methods compare :: Class -> Class -> Ordering # (<) :: Class -> Class -> Bool # (<=) :: Class -> Class -> Bool # (>) :: Class -> Class -> Bool # (>=) :: Class -> Class -> Bool # max :: Class -> Class -> Class # min :: Class -> Class -> Class #
Read Class Source #
Methods readsPrec :: Int -> ReadS Class # readList :: ReadS [Class] # readPrec :: ReadPrec Class # readListPrec :: ReadPrec [Class] #
Show Class Source #
Methods showsPrec :: Int -> Class -> ShowS # show :: Class -> String # showList :: [Class] -> ShowS #
Generic Class Source #
Associated Types type Rep Class :: * -> * # Methods from :: Class -> Rep Class x # to :: Rep Class x -> Class #
Serialize Class Source #
Methods put :: Putter Class # get :: Get Class #
NFData Class Source #
Methods rnf :: Class -> () #
type Rep Class Source #
type Rep Class = D1 (MetaData "Class" "NLP.ML.AvgPerceptron" "chatter-0.9.1.0-CnWxxDeMROyIxVsZb3fGkc" True) (C1 (MetaCons "Class" PrefixI False) (S1 (MetaSel (Nothing Symbol) NoSourceUnpackedness NoSourceStrictness DecidedLazy) (Rec0 String)))

type Weight = Double Source #

Typedef for doubles to make the code easier to read, and to make this simple to change if necessary.

newtype Feature Source #

Constructors

Feat Text

Instances

Eq Feature Source #
Methods (==) :: Feature -> Feature -> Bool # (/=) :: Feature -> Feature -> Bool #
Ord Feature Source #
Methods compare :: Feature -> Feature -> Ordering # (<) :: Feature -> Feature -> Bool # (<=) :: Feature -> Feature -> Bool # (>) :: Feature -> Feature -> Bool # (>=) :: Feature -> Feature -> Bool # max :: Feature -> Feature -> Feature # min :: Feature -> Feature -> Feature #
Read Feature Source #
Methods readsPrec :: Int -> ReadS Feature # readList :: ReadS [Feature] # readPrec :: ReadPrec Feature # readListPrec :: ReadPrec [Feature] #
Show Feature Source #
Methods showsPrec :: Int -> Feature -> ShowS # show :: Feature -> String # showList :: [Feature] -> ShowS #
Generic Feature Source #
Associated Types type Rep Feature :: * -> * # Methods from :: Feature -> Rep Feature x # to :: Rep Feature x -> Feature #
Serialize Feature Source #
Methods put :: Putter Feature # get :: Get Feature #
NFData Feature Source #
Methods rnf :: Feature -> () #
type Rep Feature Source #
type Rep Feature = D1 (MetaData "Feature" "NLP.ML.AvgPerceptron" "chatter-0.9.1.0-CnWxxDeMROyIxVsZb3fGkc" True) (C1 (MetaCons "Feat" PrefixI False) (S1 (MetaSel (Nothing Symbol) NoSourceUnpackedness NoSourceStrictness DecidedLazy) (Rec0 Text)))

emptyPerceptron :: Perceptron Source #

An empty perceptron, used to start training.

predict :: Perceptron -> Map Feature Int -> Maybe Class Source #

Predict a class given a feature vector.

Ported from python:

def predict(self, features):
    '''Dot-product the features and current weights and return the best label.'''
    scores = defaultdict(float)
    for feat, value in features.items():
        if feat not in self.weights or value == 0:
            continue
        weights = self.weights[feat]
        for label, weight in weights.items():
            scores[label] += value * weight
    # Do a secondary alphabetic sort, for stability
    return max(self.classes, key=lambda label: (scores[label], label))

train :: Int -> Perceptron -> [(Map Feature Int, Class)] -> IO Perceptron Source #

update :: Perceptron -> Class -> Class -> [Feature] -> Perceptron Source #

Update the perceptron with a new example.

update(self, truth, guess, features)
   ...
        self.i += 1
        if truth == guess:
            return None
        for f in features:
            weights = self.weights.setdefault(f, {}) -- setdefault is Map.findWithDefault, and destructive.
            upd_feat(truth, f, weights.get(truth, 0.0), 1.0)
            upd_feat(guess, f, weights.get(guess, 0.0), -1.0)
        return None

averageWeights :: Perceptron -> Perceptron Source #

Average the weights

Ported from Python:

def average_weights(self):
    for feat, weights in self.weights.items():
        new_feat_weights = {}
        for clas, weight in weights.items():
            param = (feat, clas)
            total = self._totals[param]
            total += (self.i - self._tstamps[param]) * weight
            averaged = round(total / float(self.i), 3)
            if averaged:
                new_feat_weights[clas] = averaged
        self.weights[feat] = new_feat_weights
    return None