[code lang="cpp"]
include <time.h>
include <mlpack/core.hpp>
include <mlpack/methods/kmeans/kmeans.hpp>
include <mlpack/methods/kmeans/allow_empty_clusters.hpp>
include <mlpack/methods/kmeans/refined_start.hpp>
include <mlpack/methods/kmeans/max_variance_new_cluster.hpp>
include <mlpack/methods/gmm/gmm.hpp>
include <mlpack/methods/gmm/phi.hpp>
include <mlpack/methods/gmm/no_constraint.hpp>
include <mlpack/methods/gmm/positive_definite_constraint.hpp>
include <mlpack/methods/gmm/diagonal_constraint.hpp>
include <mlpack/methods/gmm/eigenvalue_ratio_constraint.hpp>
int main(int argc, char* argv[]) { srand(time(NULL));
int numDim = 3;
int numGaussian = 3;
int numObservation = 1000;
if(argc > 1)
{
str2scalar(argv[1], numObservation);
}
//////////////////////////////////////////////////////////// // Synthesize ground truth data. mlpack::gmm::GMM<> gmm(numGaussian, numDim); for(int n = 0; n < numGaussian; ++n) { gmm.Means()[n] = 10.0arma::randu<arma::vec>(numDim); gmm.Covariances()[n] = arma::eye<arma::mat>(numDim,numDim) + 0.1arma::randu<arma::mat>(numDim,numDim); mlpack::gmm::PositiveDefiniteConstraint::ApplyConstraint(gmm.Covariances()[n]); } double minWeight = 0.0; while(minWeight < (1.0/(double)(numGaussian*5)) ) { gmm.Weights() = arma::randu<arma::vec>(numDim); gmm.Weights() /= arma::accu(gmm.Weights()); minWeight = gmm.Weights().min(); } arma::mat data(numDim, numObservation); for(int n = 0; n < numObservation; ++n) { data.col(n) = gmm.Random(); }
//////////////////////////////////////////////////////////// // EM algorithm for GMM fitting without initial guess. mlpack::gmm::GMM<> gmmCur(numGaussian, numDim); clock_t t = clock(); gmmCur.Estimate( data, 1 ); std::cout << "EM algorithm without initial guess: " << (double)(clock()-t)/CLOCKS_PER_SEC << std::endl; std::cout << " EM fitting result without initial guess:" << std::endl; for(int n = 0; n < numGaussian; ++n) { std::cout << "mean[" << n << "] = " << gmmCur.Means()[n].t(); std::cout << "cov.[" << n << "] = " << gmmCur.Covariances()[n].t(); std::cout << "wei.[" << n << "] = " << gmmCur.Weights()[n] << std::endl; }
//////////////////////////////////////////////////////////// // EM algorithm for GMM fitting with initial guess. // step.0: compute K-1 GMM by EM algorithm mlpack::gmm::GMM<> gmmPrev(numGaussian-1, numDim); gmmPrev.Estimate( data, 1 );
// step.1: compute assignment with known K-1 centroids
t = clock();
mlpack::kmeans::KMeans<> kmeans; // No overclustering.
arma::uvec tmp;
arma::mat centroidPrev = arma::zeros<arma::mat>(numDim,numGaussian-1);
arma::Col<size_t> assignmentsPrev;
arma::Col<size_t> clusterCountsPrev = arma::zeros< arma::Col<size_t> >(numGaussian-1);
for(int n = 0; n < (numGaussian-1); ++n)
{
centroidPrev.col(n) = gmmPrev.Means()[n];
}
kmeans.Cluster(
data,
numGaussian-1,
assignmentsPrev,
centroidPrev,
false,
true
);
tmp = arma::hist(assignmentsPrev, arma::linspace< arma::Col<size_t> >(0,numGaussian-2,numGaussian-1) );
for(int n = 0; n < (numGaussian-1); ++n)
{
clusterCountsPrev(n) = tmp(n);
}
// step.2: find K-th centroid
arma::mat centroidCur = arma::zeros<arma::mat>(numDim,numGaussian);
arma::Col<size_t> assignmentsCur;
arma::Col<size_t> clusterCountsCur = arma::zeros< arma::Col<size_t> >(numGaussian);
for(int n = 0; n < (numGaussian-1); ++n)
{
centroidCur.col(n) = centroidPrev.col(n);
clusterCountsCur(n) = tmp(n);
}
size_t numOfChangedPoints = mlpack::kmeans::MaxVarianceNewCluster::EmptyCluster(
data,
numGaussian-1,
centroidCur,
clusterCountsCur,
assignmentsPrev
);
for(int m = 0; m < numObservation; ++m)
{
if( assignmentsPrev(m) == (numGaussian-1) )
{
centroidCur.col(numGaussian-1) = data.col(m);
}
}
// step.3: re-compute assignments with K centroids
kmeans.Cluster(
data,
numGaussian,
assignmentsCur,
centroidCur,
false,
true
);
tmp = arma::hist(assignmentsCur, arma::linspace< arma::Col<size_t> >(0,numGaussian-1,numGaussian) );
for(int n = 0; n < numGaussian; ++n)
{
clusterCountsCur(n) = tmp(n);
}
// step.4 computes covariance and weight
mlpack::gmm::GMM<> gmmCur_(numGaussian, numDim); // 3 dimensions, 4 components.
gmmCur_.Means() = std::vector<arma::vec>(numGaussian, arma::zeros<arma::vec>(numDim));
gmmCur_.Covariances() = std::vector<arma::mat>(numGaussian, arma::zeros<arma::mat>(numDim,numDim));
gmmCur_.Weights() = arma::zeros<arma::vec>(numGaussian);
for(int n = 0; n < numObservation; ++n)
{
gmmCur_.Means()[ assignmentsCur[n] ] += data.col(n);
}
for(int n = 0; n < numGaussian; ++n)
{
gmmCur_.Means()[n] /= (clusterCountsCur[n] > 1) ? (double)clusterCountsCur[n] : 1.0;
gmmCur_.Weights()(n) = (double)clusterCountsCur(n) / (double)numObservation;
}
size_t cluster;
arma::vec vecDiff;
for(int n = 0; n < numObservation; ++n)
{
cluster = assignmentsCur[n];
vecDiff = data.col(n) - gmmCur_.Means()[cluster];
gmmCur_.Covariances()[cluster] += vecDiff * vecDiff.t();
}
for(int n = 0; n < numGaussian; ++n)
{
gmmCur_.Covariances()[n] /= (clusterCountsCur[n] >1) ? (double)clusterCountsCur[n] : 1.0;
mlpack::gmm::PositiveDefiniteConstraint::ApplyConstraint(gmmCur_.Covariances()[n]);
}
// step.5 EM algorithm with known means, covariances, and weights
gmmCur_.Estimate(
data,
1,
true
);
std::cout << "EM algorithm with initial guess: " << (double)(clock()-t)/CLOCKS_PER_SEC << std::endl;
std::cout << " EM fitting result with initial guess:" << std::endl;
for(int n = 0; n < numGaussian; ++n)
{
std::cout << "mean[" << n << "] = " << gmmCur_.Means()[n].t();
std::cout << "cov.[" << n << "] = " << gmmCur_.Covariances()[n].t();
std::cout << "wei.[" << n << "] = " << gmmCur_.Weights()[n] << std::endl;
}
std::cout << " Ground truth:" << std::endl;
for(int n = 0; n < numGaussian; ++n)
{
std::cout << "mean[" << n << "] = " << gmm.Means()[n].t();
std::cout << "cov.[" << n << "] = " << gmm.Covariances()[n].t();
std::cout << "wei.[" << n << "] = " << gmm.Weights()[n] << std::endl;
}
return 0;
} [/code]