Source code for gmmmc.gmm

import numpy as np
import sklearn.mixture
from gmmmc.fastgmm import gmm_likelihood
import multiprocessing

[docs]class GMM():

    def __init__(self, means, covariances, weights):
        """
        Gaussian Mixture Model Distribution class for calculation of log likelihood and sampling.

        Parameters
        ----------
        means : 2-D array_like of shape (n_mixtures, n_features)
            Means for each component of the GMM
        covariances : 2-D array_like of shape (n_mixtures, n_features)
            Covariance matrices of the GMM. Only diagonal matrices are supported at this time.
        weights : 1-D array_like of shape (n_mixtures,)
            Weights for each of the GMM components
        """
        if len(covariances.shape) == 2:
            self.covariance_type = 'diag'
        else:
            raise NotImplementedError('Only diagonal covariance matrices supported')
        self.gmm = sklearn.mixture.GMM(n_components=len(weights))
        self.gmm.weights_ = weights
        self.gmm.covars_ = covariances
        self.gmm.means_ = means
        self.n_mixtures = len(weights)
        try:
            self.n_features = means.shape[1]
        except:
            raise ValueError("Means array must be 2 dimensional")

    @property
    def means(self):
        return self.gmm.means_

    @property
    def covars(self):
        return self.gmm.covars_

    @property
    def weights(self):
        return self.gmm.weights_

    @means.setter
    def means(self, means):
        # must create GMM object again so that the sklearn sample method will work correctly
        self.gmm = create_gmm(self.n_mixtures, means, self.gmm.covars_, self.gmm.weights_)

    @covars.setter
    def covars(self, covars):
        self.gmm = create_gmm(self.n_mixtures, self.gmm.means_, covars, self.gmm.weights_)

    @weights.setter
    def weights(self, weights):
        self.gmm = create_gmm(self.n_mixtures, self.gmm.means_, self.gmm.covars_, weights)

[docs]    def sample(self, n_samples):
        """
        Sample from the GMM.

        Parameters
        ----------
        n_samples : int
            Number of samples to draw.

        Returns
        -------
            : 2-D array_like of shape (n_samples, n_features)
            Samples drawn from the GMM distribution
        """
        return self.gmm.sample(n_samples)

[docs]    def log_likelihood(self, X, n_jobs=1):
        """
        Calculate the average log likelihood of the data given the GMM parameters

        Parameters
        ----------
        X : 2-D array_like of shape (n_samples, n_features)
            Data to be used.
        n_jobs : int
            Number of CPU cores to use in the calculation

        Returns
        -------
            : float
            average log likelihood of the data given the GMM parameters

        Notes
        -------
        For GMMs with small numbers of mixtures (<10) the use of more than 1 core can slow down the function.
        """
        n_samples = X.shape[0]
        if n_jobs == 0:
            raise ValueError("n_jobs==0 has no meaning")
        elif n_jobs < 0:
            n_jobs = multiprocessing.cpu_count()
        else:
            n_jobs = n_jobs

        if n_jobs == 1:
            # Use the sklearn/numpy implementation
            return np.sum(self.gmm.score(X))
        else:
            # Sse compiled multireaded C code
            return gmm_likelihood(X, self.means, self.covars, self.weights, n_jobs=n_jobs)