Source code for gmmmc.proposals.gaussian_proposals

import numpy as np
from gmmmc.gmm import GMM
from gmmmc.proposals.proposals import Proposal
import pdb

[docs]class GaussianStepMeansProposal(Proposal):
    """Gaussian Proposal distribution for means of a GMM"""

    def __init__(self, step_sizes=(0.001,)):
        """
        Gaussian proposal distribution for the means. The multivariate Gaussian is centered at the means of the current
        state in the Markov Chain and has covariance given by step_sizes. Multiple step sizes can be specified.
        The proposal algorithm will take these steps in the sequence specified in step_sizes.

        Parameters
        ----------
        step_sizes : 1-D array_like
            Iterable containing the sequence of step sizes (covariances of the Gaussian proposal distribution"
        """
        super(GaussianStepMeansProposal, self).__init__()
        self.step_sizes = step_sizes
        self.count_accepted = np.zeros((len(step_sizes),))
        self.count_illegal =  np.zeros((len(step_sizes),))
        self.count_proposed = np.zeros((len(step_sizes),))

[docs]    def propose(self, X, gmm, target, n_jobs=1):
        """
        Propose a new set of GMM means.

        Parameters
        ----------
        X : 2-D array like of shape (n_samples, n_features)
            The observed data or evidence.
        gmm : GMM object
            The current state (set of gmm parameters) in the Markov Chain
        target : GMMPosteriorTarget object
            The target distribution to be found.
        n_jobs : int
            Number of cpu cores to use in the calculation of log probabilities.

        Returns
        -------
            : GMM
            A new GMM object initialised with new mean parameters.
        """
        new_means = np.array(gmm.means)
        beta = target.beta
        prior = target.prior
        steps = [np.random.multivariate_normal(np.zeros(gmm.n_features),
                                              step_size * np.eye(gmm.n_features),
                                              size=gmm.n_mixtures)
                 for step_size in self.step_sizes]

        # calculation of prior probabilities of only the means, since only means will change
        log_priors = np.array([prior.means_prior.log_prob_single(gmm.means[mixture], mixture) for mixture in xrange(gmm.n_mixtures)])
        log_prob_priors = np.sum(log_priors)
        previous_prob = beta * gmm.log_likelihood(X, n_jobs) + np.sum(log_priors)
        for i, step in enumerate(steps):
            for mixture in xrange(gmm.n_mixtures):
                self.count_proposed[i] += 1
                # propose new means
                new_mixture_means = gmm.means[mixture] + step[mixture]

                # try out the new means
                proposed_means = np.array(new_means)
                proposed_means[mixture] = new_mixture_means
                proposed_gmm = GMM(proposed_means, np.array(gmm.covars), np.array(gmm.weights))

                # calculate new prior
                new_log_prob_mixture = prior.means_prior.log_prob_single(new_mixture_means, mixture)
                new_log_prob_priors = log_prob_priors - log_priors[mixture] + new_log_prob_mixture

                # priors
                proposed_prob = beta * proposed_gmm.log_likelihood(X, n_jobs) + new_log_prob_priors
                # ratio

                ratio = proposed_prob - previous_prob
                if ratio > 0 or ratio > np.log(np.random.uniform()):
                    # accept proposal
                    new_means = proposed_means
                    previous_prob = proposed_prob
                    # update prior probability calculation
                    log_prob_priors = new_log_prob_priors
                    log_priors[mixture] = new_log_prob_mixture
                    self.count_accepted[i] += 1

        return GMM(new_means, np.array(gmm.covars), np.array(gmm.weights))

[docs]class GaussianStepCovarProposal(Proposal):

    def __init__(self, step_sizes=(0.001,)):
        """
        Gaussian proposal function for the covariances of the GMM.

        Parameters
        ----------
        step_sizes : array_like
            Array of covariance values for the Gaussian proposal.
        """
        super(GaussianStepCovarProposal, self).__init__()
        self.step_sizes = step_sizes
        self.count_accepted = np.zeros((len(step_sizes),))
        self.count_illegal =  np.zeros((len(step_sizes),))
        self.count_proposed = np.zeros((len(step_sizes),))

[docs]    def propose(self, X, gmm, target, n_jobs=1):
        """
        Propose a new set of GMM covariances (diagonal only).

        Parameters
        ----------
        X : 2-D array like of shape (n_samples, n_features)
            The observed data or evidence.
        gmm : GMM object
            The current state (set of gmm parameters) in the Markov Chain
        target : GMMPosteriorTarget object
            The target distribution to be found.
        n_jobs : int
            Number of cpu cores to use in the calculation of log probabilities.

        Returns
        -------
            : GMM
            A new GMM object initialised with new covariance parameters.
        """
        new_covars = np.array(gmm.covars)
        beta = target.beta
        prior = target.prior
        previous_prob = beta * gmm.log_likelihood(X, n_jobs) + prior.log_prob(gmm)
        steps = [np.random.multivariate_normal(np.zeros(gmm.n_features),
                                               step_size * np.eye(gmm.n_features),
                                               size=gmm.n_mixtures) for step_size in self.step_sizes]

        log_priors = np.array([prior.means_prior.log_prob_single(gmm.means[mixture], mixture) for mixture in xrange(gmm.n_mixtures)])
        log_prob_priors = np.sum(log_priors)
        for i, step in enumerate(steps):
            for mixture in xrange(gmm.n_mixtures):
                self.count_proposed[i] += 1
                # propose new covars
                new_mixture_covars = gmm.covars[mixture] + step[mixture]

                if (new_mixture_covars > 0).all(): # check covariances are valid
                    # try out the new covars
                    proposed_covars = np.array(new_covars)
                    proposed_covars[mixture] = new_mixture_covars
                    proposed_gmm = GMM(np.array(gmm.means), proposed_covars, np.array(gmm.weights))

                    # calculate desired distribution
                    new_log_prob_mixture = prior.covars_prior.log_prob_single(new_mixture_covars, mixture)
                    new_log_prob_priors = log_prob_priors - log_priors[mixture] + new_log_prob_mixture
                    proposed_prob = beta * proposed_gmm.log_likelihood(X, n_jobs) + new_log_prob_priors

                    # ratio
                    ratio = proposed_prob - previous_prob
                    if ratio > 0 or ratio > np.log(np.random.uniform()):
                        # accept proposal
                        new_covars = proposed_covars
                        previous_prob = proposed_prob
                        log_prob_priors = new_log_prob_priors
                        log_priors[mixture] = new_log_prob_mixture
                        self.count_accepted[i] += 1
                else:
                    self.count_illegal[i] += 1

        return GMM(np.array(gmm.means), np.array(new_covars), np.array(gmm.weights))

[docs]class GaussianStepWeightsProposal(Proposal):

    def __init__(self,  n_mixtures, step_sizes=(0.001,)):
        """
        Gaussian proposal function for the weights of a GMM.

        Parameters
        ----------
        n_mixtures
        step_sizes

        Notes
        ----------
        The proposal function works by projecting the weight vector w onto the simplex defined by
        w_1 + w_2 + ..... w_n = 1 , 0<=w_i<=1. The change of basis matrix is found by finding n-1 vectors lying on the plane
        and using gramm schmidt to get an orthonormal basis. A Gaussian proposal function in (n-1)-d space is
        used to find the next point on the simplex.
        """
        super(GaussianStepWeightsProposal, self).__init__()
        self.step_sizes = step_sizes
        self.n_mixtures = n_mixtures
        self.count_accepted = np.zeros((len(step_sizes),))
        self.count_illegal =  np.zeros((len(step_sizes),))
        self.count_proposed = np.zeros((len(step_sizes),))


        if n_mixtures > 1:
            # get change of basis matrix mapping n dim coodinates to n-1 dim coordinates on simplex
            # x1 + x2 + x3 ..... =1
            points = np.random.dirichlet([1 for i in xrange(n_mixtures)], size=n_mixtures - 1)
            points = points.T
            self.plane_origin = np.ones((n_mixtures)) / float(n_mixtures)
            # get vectors parallel to plane from its center (1/n,1/n,....)
            parallel = points - np.ones(points.shape) / float(n_mixtures)
            # do gramm schmidt to get mutually orthonormal vectors (basis)
            self.e, _ = np.linalg.qr(parallel)

[docs]    def transformSimplex(self, weights):
        """
        Project weight vector onto the normal simplex.

        Parameters
        ----------
        weights : array_like of shape (n_mixtures,)
            vector of weights for each gaussian component

        Returns
        -------
            : array_like of shape (n_mixtures-1,)
            vector of weights projected onto the simplex plane
        """
        # project onto the simplex
        return np.dot(self.e.T, weights - self.plane_origin)

[docs]    def invTransformSimplex(self, simplex_coords):
        """
        Transforms a point on the simplex to the original vector space.

        Parameters
        ----------
        simplex_coords : array_like of shape (n_mixtures - 1,)
            Coordinates of a weight vector on the simplex.

        Returns
        -------
            : array_like of shape(n_mixtures,)
            vector of weights.
        """
        return self.plane_origin + np.dot(self.e, simplex_coords)

[docs]    def propose(self, X, gmm, target, n_jobs=1):
        """
        Propose a new set of weight vectors.
        Parameters
        ----------
        X : 2-D array like of shape (n_samples, n_features)
            The observed data or evidence.
        gmm : GMM object
            The current state (set of gmm parameters) in the Markov Chain
        target : GMMPosteriorTarget object
            The target distribution to be found.
        n_jobs : int
            Number of cpu cores to use in the calculation of log probabilities.
        Returns
        -------
            : GMM
            A new GMM object initialised with new covariance parameters.
        """
        accepted = False
        cur_gmm = gmm
        if gmm.n_mixtures > 1:
            for i, step_size in enumerate(self.step_sizes):
                self.count_proposed[i] += 1
                current_weights_transformed = self.transformSimplex(cur_gmm.weights)
                proposed_weights_transformed = np.random.multivariate_normal(current_weights_transformed,
                                                                             np.eye(self.n_mixtures - 1) * step_size)
                proposed_weights = self.invTransformSimplex(proposed_weights_transformed   )
                if np.logical_and(0 <= proposed_weights, proposed_weights <= 1).all()\
                    and np.isclose(np.sum(proposed_weights), 1.0):
                    previous_prob = target.log_prob(X, cur_gmm, n_jobs)
                    proposed_gmm = GMM(np.array(cur_gmm.means), np.array(cur_gmm.covars), proposed_weights)
                    proposed_prob = target.log_prob(X, proposed_gmm, n_jobs)
                    ratio = proposed_prob - previous_prob
                    if ratio > 0 or ratio > np.log(np.random.uniform()):
                        # accept proposal
                        self.count_accepted[i] += 1
                        accepted = True
                        cur_gmm = proposed_gmm
                else:
                    self.count_illegal[i] += 1

        if accepted is True:
            return cur_gmm
        else:
            return GMM(np.array(gmm.means), np.array(gmm.covars), np.array(gmm.weights))