Source code for abm.learners
# -*- coding: utf-8 -*-
"""
abm.learners
~~~~~~~~~~~~
Learning mixins to add to entity subclasses
Should implement _get_next_recipient and _learn
"""
import numpy as np
import operator
from random import choice
from numpy.random import choice as _npchoice
[docs]class DunceMixin(object):
"""
A learner that doesn't learn
"""
def _get_next_recipient(self, task):
return choice(self.adjacencies)
def _learn(self):
pass
[docs]class SoftmaxLearnerMixin(object):
last_recipient = None
w_container = None
update_buffer = None
policy_duration = 1
def _get_next_recipient(self, task):
"""
Uses task.features and self.w_container to find the best neighbor for this task
Sets "pending" state once a decision is made
"""
# lazily initialize random weights and weight update buffer (for policy rollouts)
if self.w_container is None:
self.w_container = {a: np.random.random(task.features.shape) for a in self.adjacencies}
if self.update_buffer is None:
self.flush_count = 0
self.update_buffer = []
# if you have a decision pending feedback and are asked to make another,
# mark that last decision as 'wrong' before proceeding
if self.last_recipient is not None:
self.award(-1. / self.environment.path_cutoff)
self.latest_x = x = task.features
# find the best neighbor for this task
self.softmaxes = _exp_over_sumexp(x, self.w_container)
if task.target in self.adjacencies:
# don't actually use your weights to decide if the neighbor is visible
self.last_recipient = decision = task.target
else:
decision = _npchoice(self.softmaxes.keys(), p=self.softmaxes.values())
self.last_recipient = decision
return decision
def _learn(self):
assert self.last_recipient is not None
grad = _gradient_precomputed(self.last_recipient, self.softmaxes,
self.latest_x, self.value > 0)
# self.log(grad)
w_adjustment = grad * abs(self.value)
self.update_buffer.append((self.last_recipient, w_adjustment))
self.last_recipient, self.softmaxes, self.latest_x = None, None, None
[docs] def flush_updates(self):
self.flush_count += 1
for adjacency_index, w_adjustment in self.update_buffer:
self.w_container[adjacency_index] += w_adjustment
self.update_buffer = []
def _gradient_precomputed(k, softmaxes, train_x, success):
# set magnitude of gradient proportionally to confidence
grad = train_x * ((1 if success else 0) - softmaxes[k])
return grad
def _exp_over_sumexp(train_x, w_container):
"""
Calculate an exponential over a sum of exponentials in a numerically stable way
Returns a dict keying class values k :
np(exp(x_t*wk)) / sum for j in K {np.exp(x_t*wj)}
"""
x_t = np.transpose(train_x)
w_container_keys = w_container.keys()
dots = map(lambda j: np.dot(x_t, w_container[j]), w_container_keys)
# the final output is unchanged by removing the maximum `weight * x` value
# from each dot product before exponentiation
beta = max(dots)
sumexp = sum(map(lambda e: np.exp(e - beta), dots))
return {
k: (np.exp(dots[ix] - beta) / sumexp)
for ix, k in enumerate(w_container_keys)
}