Source code for abm.learners

# -*- coding: utf-8 -*-
"""
    abm.learners
    ~~~~~~~~~~~~

    Learning mixins to add to entity subclasses
    Should implement _get_next_recipient and _learn
"""
import numpy as np
import operator

from random import choice
from numpy.random import choice as _npchoice


[docs]class DunceMixin(object):
    """
    A learner that doesn't learn
    """
    def _get_next_recipient(self, task):
        return choice(self.adjacencies)

    def _learn(self):
        pass


[docs]class SoftmaxLearnerMixin(object):
    last_recipient = None
    w_container = None
    update_buffer = None
    policy_duration = 1

    def _get_next_recipient(self, task):
        """
        Uses task.features and self.w_container to find the best neighbor for this task
        Sets "pending" state once a decision is made
        """
        # lazily initialize random weights and weight update buffer (for policy rollouts)
        if self.w_container is None:
            self.w_container = {a: np.random.random(task.features.shape) for a in self.adjacencies}
        if self.update_buffer is None:
            self.flush_count = 0
            self.update_buffer = []

        # if you have a decision pending feedback and are asked to make another,
        # mark that last decision as 'wrong' before proceeding
        if self.last_recipient is not None:
            self.award(-1. / self.environment.path_cutoff)

        self.latest_x = x = task.features

        # find the best neighbor for this task
        self.softmaxes = _exp_over_sumexp(x, self.w_container)

        if task.target in self.adjacencies:
            # don't actually use your weights to decide if the neighbor is visible
            self.last_recipient = decision = task.target
        else:
            decision = _npchoice(self.softmaxes.keys(), p=self.softmaxes.values())
            self.last_recipient = decision
        return decision

    def _learn(self):
        assert self.last_recipient is not None

        grad = _gradient_precomputed(self.last_recipient, self.softmaxes,
                                     self.latest_x, self.value > 0)
        # self.log(grad)
        w_adjustment = grad * abs(self.value)

        self.update_buffer.append((self.last_recipient, w_adjustment))

        self.last_recipient, self.softmaxes, self.latest_x = None, None, None

[docs]    def flush_updates(self):
        self.flush_count += 1
        for adjacency_index, w_adjustment in self.update_buffer:
            self.w_container[adjacency_index] += w_adjustment
        self.update_buffer = []


def _gradient_precomputed(k, softmaxes, train_x, success):
    # set magnitude of gradient proportionally to confidence
    grad = train_x * ((1 if success else 0) - softmaxes[k])
    return grad


def _exp_over_sumexp(train_x, w_container):
    """
    Calculate an exponential over a sum of exponentials in a numerically stable way
    Returns a dict keying class values k :
        np(exp(x_t*wk)) / sum for j in K {np.exp(x_t*wj)}
    """
    x_t = np.transpose(train_x)
    w_container_keys = w_container.keys()
    dots = map(lambda j: np.dot(x_t, w_container[j]), w_container_keys)

    # the final output is unchanged by removing the maximum `weight * x` value
    # from each dot product before exponentiation
    beta = max(dots)
    sumexp = sum(map(lambda e: np.exp(e - beta), dots))

    return {
        k: (np.exp(dots[ix] - beta) / sumexp)
        for ix, k in enumerate(w_container_keys)
    }