I am Charmie

メモとログ

mlpack: K GMMs fitting by EM algorithm using K-1 GMMs fitting result

[code lang="cpp"]

include <time.h>

include <mlpack/core.hpp>

include <mlpack/methods/kmeans/kmeans.hpp>

include <mlpack/methods/kmeans/allow_empty_clusters.hpp>

include <mlpack/methods/kmeans/refined_start.hpp>

include <mlpack/methods/kmeans/max_variance_new_cluster.hpp>

include <mlpack/methods/gmm/gmm.hpp>

include <mlpack/methods/gmm/phi.hpp>

include <mlpack/methods/gmm/no_constraint.hpp>

include <mlpack/methods/gmm/positive_definite_constraint.hpp>

include <mlpack/methods/gmm/diagonal_constraint.hpp>

include <mlpack/methods/gmm/eigenvalue_ratio_constraint.hpp>

int main(int argc, char* argv[]) { srand(time(NULL));

int numDim = 3;
int numGaussian = 3;
int numObservation = 1000;
if(argc &gt; 1)
{
    str2scalar(argv[1], numObservation);
}

//////////////////////////////////////////////////////////// // Synthesize ground truth data. mlpack::gmm::GMM<> gmm(numGaussian, numDim); for(int n = 0; n < numGaussian; ++n) { gmm.Means()[n] = 10.0arma::randu<arma::vec>(numDim); gmm.Covariances()[n] = arma::eye<arma::mat>(numDim,numDim) + 0.1arma::randu<arma::mat>(numDim,numDim); mlpack::gmm::PositiveDefiniteConstraint::ApplyConstraint(gmm.Covariances()[n]); } double minWeight = 0.0; while(minWeight < (1.0/(double)(numGaussian*5)) ) { gmm.Weights() = arma::randu<arma::vec>(numDim); gmm.Weights() /= arma::accu(gmm.Weights()); minWeight = gmm.Weights().min(); } arma::mat data(numDim, numObservation); for(int n = 0; n < numObservation; ++n) { data.col(n) = gmm.Random(); }

//////////////////////////////////////////////////////////// // EM algorithm for GMM fitting without initial guess. mlpack::gmm::GMM<> gmmCur(numGaussian, numDim); clock_t t = clock(); gmmCur.Estimate( data, 1 ); std::cout << "EM algorithm without initial guess: " << (double)(clock()-t)/CLOCKS_PER_SEC << std::endl; std::cout << " EM fitting result without initial guess:" << std::endl; for(int n = 0; n < numGaussian; ++n) { std::cout << "mean[" << n << "] = " << gmmCur.Means()[n].t(); std::cout << "cov.[" << n << "] = " << gmmCur.Covariances()[n].t(); std::cout << "wei.[" << n << "] = " << gmmCur.Weights()[n] << std::endl; }

//////////////////////////////////////////////////////////// // EM algorithm for GMM fitting with initial guess. // step.0: compute K-1 GMM by EM algorithm mlpack::gmm::GMM<> gmmPrev(numGaussian-1, numDim); gmmPrev.Estimate( data, 1 );

// step.1: compute assignment with known K-1 centroids
t = clock();
mlpack::kmeans::KMeans&lt;&gt; kmeans; // No overclustering.
arma::uvec tmp;
arma::mat centroidPrev = arma::zeros&lt;arma::mat&gt;(numDim,numGaussian-1);
arma::Col&lt;size_t&gt; assignmentsPrev;
arma::Col&lt;size_t&gt; clusterCountsPrev = arma::zeros&lt; arma::Col&lt;size_t&gt; &gt;(numGaussian-1);
for(int n = 0; n &lt; (numGaussian-1); ++n)
{
    centroidPrev.col(n) = gmmPrev.Means()[n];
}
kmeans.Cluster(
    data,
    numGaussian-1,
    assignmentsPrev,
    centroidPrev,
    false,
    true
);
tmp = arma::hist(assignmentsPrev, arma::linspace&lt; arma::Col&lt;size_t&gt; &gt;(0,numGaussian-2,numGaussian-1) );
for(int n = 0; n &lt; (numGaussian-1); ++n)
{
    clusterCountsPrev(n) = tmp(n);
}

// step.2: find K-th centroid
arma::mat centroidCur = arma::zeros&lt;arma::mat&gt;(numDim,numGaussian);
arma::Col&lt;size_t&gt; assignmentsCur;
arma::Col&lt;size_t&gt; clusterCountsCur = arma::zeros&lt; arma::Col&lt;size_t&gt; &gt;(numGaussian);
for(int n = 0; n &lt; (numGaussian-1); ++n)
{
    centroidCur.col(n) = centroidPrev.col(n);
    clusterCountsCur(n) = tmp(n);
}
size_t numOfChangedPoints = mlpack::kmeans::MaxVarianceNewCluster::EmptyCluster(
    data,
    numGaussian-1,
    centroidCur,
    clusterCountsCur,
    assignmentsPrev
);
for(int m = 0; m &lt; numObservation; ++m)
{
    if( assignmentsPrev(m) == (numGaussian-1) )
    {
        centroidCur.col(numGaussian-1) = data.col(m);
    }
}

// step.3: re-compute assignments with K centroids
kmeans.Cluster(
    data,
    numGaussian,
    assignmentsCur,
    centroidCur,
    false,
    true
);
tmp = arma::hist(assignmentsCur, arma::linspace&lt; arma::Col&lt;size_t&gt; &gt;(0,numGaussian-1,numGaussian) );
for(int n = 0; n &lt; numGaussian; ++n)
{
    clusterCountsCur(n) = tmp(n);
}

// step.4 computes covariance and weight
mlpack::gmm::GMM&lt;&gt; gmmCur_(numGaussian, numDim); // 3 dimensions, 4 components.
gmmCur_.Means() = std::vector&lt;arma::vec&gt;(numGaussian, arma::zeros&lt;arma::vec&gt;(numDim));
gmmCur_.Covariances() = std::vector&lt;arma::mat&gt;(numGaussian, arma::zeros&lt;arma::mat&gt;(numDim,numDim));
gmmCur_.Weights() = arma::zeros&lt;arma::vec&gt;(numGaussian);
for(int n = 0; n &lt; numObservation; ++n)
{
    gmmCur_.Means()[ assignmentsCur[n] ] += data.col(n);
}
for(int n = 0; n &lt; numGaussian; ++n)
{
    gmmCur_.Means()[n] /= (clusterCountsCur[n] &gt; 1) ? (double)clusterCountsCur[n] : 1.0;
    gmmCur_.Weights()(n) = (double)clusterCountsCur(n) / (double)numObservation;
}
size_t cluster;
arma::vec vecDiff;
for(int n = 0; n &lt; numObservation; ++n)
{
    cluster = assignmentsCur[n];
    vecDiff = data.col(n) - gmmCur_.Means()[cluster];
    gmmCur_.Covariances()[cluster] += vecDiff * vecDiff.t();
}
for(int n = 0; n &lt; numGaussian; ++n)
{
    gmmCur_.Covariances()[n] /= (clusterCountsCur[n] &gt;1) ? (double)clusterCountsCur[n] : 1.0;
    mlpack::gmm::PositiveDefiniteConstraint::ApplyConstraint(gmmCur_.Covariances()[n]);
}

// step.5 EM algorithm with known means, covariances, and weights
gmmCur_.Estimate(
    data,
    1,
    true
);

std::cout &lt;&lt; &quot;EM algorithm with initial guess: &quot; &lt;&lt; (double)(clock()-t)/CLOCKS_PER_SEC &lt;&lt; std::endl;
std::cout &lt;&lt; &quot;  EM fitting result with initial guess:&quot; &lt;&lt; std::endl;
for(int n = 0; n &lt; numGaussian; ++n)
{
    std::cout &lt;&lt; &quot;mean[&quot; &lt;&lt; n &lt;&lt; &quot;] = &quot; &lt;&lt; gmmCur_.Means()[n].t();
    std::cout &lt;&lt; &quot;cov.[&quot; &lt;&lt; n &lt;&lt; &quot;] = &quot; &lt;&lt; gmmCur_.Covariances()[n].t();
    std::cout &lt;&lt; &quot;wei.[&quot; &lt;&lt; n &lt;&lt; &quot;] = &quot; &lt;&lt; gmmCur_.Weights()[n] &lt;&lt; std::endl;
}

std::cout &lt;&lt; &quot;  Ground truth:&quot; &lt;&lt; std::endl;
for(int n = 0; n &lt; numGaussian; ++n)
{
    std::cout &lt;&lt; &quot;mean[&quot; &lt;&lt; n &lt;&lt; &quot;] = &quot; &lt;&lt; gmm.Means()[n].t();
    std::cout &lt;&lt; &quot;cov.[&quot; &lt;&lt; n &lt;&lt; &quot;] = &quot; &lt;&lt; gmm.Covariances()[n].t();
    std::cout &lt;&lt; &quot;wei.[&quot; &lt;&lt; n &lt;&lt; &quot;] = &quot; &lt;&lt; gmm.Weights()[n] &lt;&lt; std::endl;
}

return 0;

} [/code]