Source code for gmmmc.monte_carlo

import abc
import logging
from gmmmc.posterior import GMMPosteriorTarget
import numpy as np

[docs]class MonteCarloBase: __metaclass__ = abc.ABCMeta @abc.abstractmethod
[docs] def sample(self, X, n_samples): return
[docs]class MarkovChain(MonteCarloBase): def __init__(self, proposal, prior, initial_gmm): """ Base algorithm for Markov Chain Monte Carlo Parameters ---------- proposal : Proposal Object The proposal function to be used. prior : GMMPrior Object The prior distribution on the parameters of the GMM. initial_gmm : GMM Object The starting point for the MCMC simulation. """ self.proposal = proposal self.target = GMMPosteriorTarget(prior, beta=1.0) self.initial_gmm = initial_gmm
[docs] def sample(self, X, n_samples, n_jobs=1): """ Sample from the posterior distribution of the GMM parameters Parameters ---------- X : 2-D array_like of shape (n_feature_vectors, n_features) Feature vectors used as the data for the underlying model. n_samples : int Number of Monte Carlo samples to be drawn from the posterior distribution. n_jobs : int Number of cpu cores to utilise during the Monte Carlo simulation. Returns ------- samples : List of GMM Objects Returns a list of GMMs (GMM parameters) which are the samples drawn from the distribution. Notes ----- It is beneficial to discard a number of samples from the beginning of the chain (burn-in) as well as to use only every nth sample (lag) to reduce the correlation between succesive samples of the posterior. """ samples = [] current_gmm = self.initial_gmm for run in xrange(n_samples): logging.info('Run: {0}'.format(run)) current_gmm = self.proposal.propose(X, current_gmm, self.target, n_jobs) samples.append(current_gmm) return samples
[docs]class AnnealedImportanceSampling(MonteCarloBase): def __init__(self, proposal, priors, betas): """ Base algorithm for Annealed Importance Sampling. Parameters ---------- proposal : Proposal Object The proposal function to be used. prior : GMMPrior Object The prior distribution on the parameters of the GMM. betas : 1-D array_like of shape (n_betas,) Betas used for the annealing process. A series of doubles from 0 to 1. References ---------- Neal, Radford M. "Annealed importance sampling." Statistics and Computing 11.2 (2001): 125-139. """ betas = np.array(betas) self.targets = [GMMPosteriorTarget(priors, beta,) for beta in betas] self.proposal = proposal self.priors = priors
[docs] def sample(self, X, n_samples, n_jobs=1, diagnostics=None): """ Generate samples from the posterior distribution of the parameters. Parameters ---------- X : 2-D array_like of shape (n_feature_vectors, n_features) Feature vectors used as the data for the underlying model. n_samples : int Number of Monte Carlo samples to be drawn from the posterior distribution. n_jobs : int Number of cpu cores to utilise during the Monte Carlo simulation. diagnostics : empty dictionary, optional If included, the dictionary passed to the function will contain diagnostic information from each annealing run of AIS. TODO: expand on this Returns ------- : list of tuples (GMM, double) A list of GMM samples with their corresponding weight. """ samples = [] for run in xrange(n_samples): logging.info('Run: {0}'.format(run)) if diagnostics is not None: diagnostics[run] = {} sample, weight = self.anneal(X, n_jobs, diagnostics[run]) else: sample, weight = self.anneal(X, n_jobs) samples.append((sample, weight)) return samples
[docs] def anneal(self, X, n_jobs, diagnostics=None): """ A single annealing run from AIS. Parameters ---------- X : 2-D array_like of shape (n_feature_vectors, n_features) Feature vectors used as the data for the underlying model. n_jobs : int Number of cpu cores to utilise during the Monte Carlo simulation. diagnostics : empty dictionary, optional If included, the dictionary passed to the function will contain diagnostic information from each annealing run of AIS. TODO: expand on this Returns ------- : tuple (GMM Object, double) A single GMM sample from AIS and its corresponding weight. """ # draw from prior cur_gmm = self.priors.sample() samples = [] for anneal_run, target in enumerate(self.targets[1:-1]): # skip first T_n (prior only) and last transition T_0 (posterior) (not necessary) samples.append(cur_gmm) cur_gmm = self.proposal.propose(X, cur_gmm, target, n_jobs) samples.append(cur_gmm) if diagnostics is not None: diagnostics['intermediate_log_weights'] = [] diagnostics['intermediate_betas'] = [] numerator = 0 denominator = 0 for run, sample in enumerate(samples): numerator += self.targets[run + 1].log_prob(X, sample, n_jobs) denominator += self.targets[run].log_prob(X, sample, n_jobs) if diagnostics is not None: diagnostics['intermediate_log_weights'].append(numerator - denominator) diagnostics['intermediate_betas'].append(self.targets[run].beta) weight = numerator - denominator if diagnostics is not None: diagnostics['intermediate_samples'] = samples diagnostics['final_sample'] = cur_gmm diagnostics['final_weight'] = weight return (cur_gmm, weight)