Skip to content

Commit

Permalink
Fix kShape for double array (#91)
Browse files Browse the repository at this point in the history
  • Loading branch information
Oscar Torreno committed Mar 12, 2019
1 parent deca820 commit dfd12e4
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 31 deletions.
77 changes: 48 additions & 29 deletions src/khiva/clustering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
* @param k The number of centroids.
* @return The new centroids.
*/
af::array calculateInitialMeans(af::array tss, int k) { return af::constant(0.0f, tss.dims(0), k); }
af::array calculateInitialMeans(af::array tss, int k) { return af::constant(0, tss.dims(0), k, tss.type()); }

/**
* Computes The euclidean distance for a tiled time series agains k-means.
Expand All @@ -40,13 +40,13 @@ af::array kEuclideanDistance(af::array tts, af::array means) {
* @param labels The ids of the closes mean for all time series.
*/
void euclideanDistance(af::array tss, af::array means, af::array &minDistance, af::array &idxs) {
int nseries = tss.dims(1);
af::array kDistances = af::constant(0.0, means.dims(1), nseries);
int nSeries = tss.dims(1);
af::array kDistances = af::constant(0.0, means.dims(1), nSeries, tss.type());

// This for loop could be parallel, not parallelized to keep memory footprint low
for (int i = 0; i < nseries; i++) {
af::array tiledSerie = af::tile(tss.col(i), 1, means.dims(1));
kDistances(af::span, i) = kEuclideanDistance(tiledSerie, means);
for (int i = 0; i < nSeries; i++) {
af::array tiledSeries = af::tile(tss.col(i), 1, means.dims(1));
kDistances(af::span, i) = kEuclideanDistance(tiledSeries, means);
}
af::min(minDistance, idxs, kDistances, 0);
}
Expand All @@ -61,7 +61,7 @@ void euclideanDistance(af::array tss, af::array means, af::array &minDistance, a
*/
af::array computeNewMeans(af::array tss, af::array labels, int k) {
af::array labelsTiled = af::tile(labels, tss.dims(0));
af::array newMeans = af::constant(0.0, tss.dims(0), k);
af::array newMeans = af::constant(0.0, tss.dims(0), k, tss.type());

gfor(af::seq ii, k) {
newMeans(af::span, ii) = af::sum(tss * (labelsTiled == ii), 1) / (af::sum(af::sum((labels == ii), 3), 1));
Expand Down Expand Up @@ -133,7 +133,7 @@ void khiva::clustering::kMeans(af::array tss, int k, af::array &centroids, af::a
labels = generateRandomLabels(tss.dims(1), k);
}

af::array distances = af::constant(0.0f, tss.dims(1));
af::array distances = af::constant(0, tss.dims(1), tss.type());
af::array newMeans;
int iter = 0;

Expand Down Expand Up @@ -204,18 +204,36 @@ af::array eigenValues(af::array matrix) {
* @return The first Eigen vector.
*/
af::array getFirstEigenVector(af::array m) {
float *matHost = m.host<float>();
Eigen::MatrixXf mat = Eigen::Map<Eigen::MatrixXf>(matHost, m.dims(0), m.dims(1));

// Compute Eigen Values.
Eigen::VectorXcf eivals = mat.eigenvalues();
Eigen::VectorXf reEIVals = eivals.real();
af::array eigenValues = af::array(m.dims(0), reEIVals.data());

// Compute Eigen Vectors.
Eigen::EigenSolver<Eigen::MatrixXf> solution(mat);
Eigen::MatrixXf reEIVectors = solution.eigenvectors().real();
af::array eigenVectors = af::array(m.dims(0), m.dims(1), reEIVectors.data());
af::array eigenValues;
af::array eigenVectors;

if (m.type() == af::dtype::f64) {
double *matHost = m.host<double>();
Eigen::MatrixXd mat = Eigen::Map<Eigen::MatrixXd>(matHost, m.dims(0), m.dims(1));

// Compute Eigen Values.
Eigen::VectorXcd eivals = mat.eigenvalues();
Eigen::VectorXd reEIVals = eivals.real();
eigenValues = af::array(m.dims(0), reEIVals.data());

// Compute Eigen Vectors.
Eigen::EigenSolver<Eigen::MatrixXd> solution(mat);
Eigen::MatrixXd reEIVectors = solution.eigenvectors().real();
eigenVectors = af::array(m.dims(0), m.dims(1), reEIVectors.data());
} else if (m.type() == af::dtype::f32) {
float *matHost = m.host<float>();
Eigen::MatrixXf mat = Eigen::Map<Eigen::MatrixXf>(matHost, m.dims(0), m.dims(1));

// Compute Eigen Values.
Eigen::VectorXcf eivals = mat.eigenvalues();
Eigen::VectorXf reEIVals = eivals.real();
eigenValues = af::array(m.dims(0), reEIVals.data());

// Compute Eigen Vectors.
Eigen::EigenSolver<Eigen::MatrixXf> solution(mat);
Eigen::MatrixXf reEIVectors = solution.eigenvectors().real();
eigenVectors = af::array(m.dims(0), m.dims(1), reEIVectors.data());
}

// Get maximum Eigen Value
af::array maxEigenValue;
Expand Down Expand Up @@ -256,7 +274,7 @@ af::array ncc3Dim(af::array tss, af::array centroids) {
int distanceSize = static_cast<unsigned int>(centroids.dims(0)) * 2 - 1;

af::array cc = af::constant(0, static_cast<unsigned int>(centroids.dims(1)), static_cast<unsigned int>(tss.dims(1)),
distanceSize);
distanceSize, tss.type());
for (unsigned int i = 0; i < static_cast<unsigned int>(centroids.dims(1)); i++) {
for (unsigned int j = 0; j < static_cast<unsigned int>(tss.dims(1)); j++) {
cc(i, j, af::span) = af::convolve(tss.col(j), af::flip(centroids.col(i), 0), AF_CONV_EXPAND);
Expand Down Expand Up @@ -286,9 +304,9 @@ af::array SBDShifted(af::array ts, af::array centroid) {
int shift = index - tsLength + 1;

if (shift >= 0) {
shiftedTS = af::join(0, af::constant(0, shift), ts(af::range(tsLength - shift), 0));
shiftedTS = af::join(0, af::constant(0, shift, ts.type()), ts(af::range(tsLength - shift), 0));
} else {
shiftedTS = af::join(0, ts(af::range(tsLength + shift) - shift, 0), af::constant(0, -shift));
shiftedTS = af::join(0, ts(af::range(tsLength + shift) - shift, 0), af::constant(0, -shift, ts.type()));
}

return shiftedTS;
Expand All @@ -304,8 +322,8 @@ af::array SBDShifted(af::array ts, af::array centroid) {
af::array shapeExtraction(af::array tss, af::array centroid) {
int ntss = tss.dims(1);
int nelements = tss.dims(0);
af::array shiftedTSS = af::constant(0.0f, tss.dims(0), tss.dims(1));
af::array shiftedTSi = af::constant(0.0f, tss.dims(0), 1);
af::array shiftedTSS = af::constant(0, tss.dims(0), tss.dims(1), tss.type());
af::array shiftedTSi = af::constant(0, tss.dims(0), 1, tss.type());
af::array dist;

for (int i = 0; i < ntss; i++) {
Expand All @@ -316,7 +334,8 @@ af::array shapeExtraction(af::array tss, af::array centroid) {

shiftedTSS = khiva::normalization::znorm(shiftedTSS);
af::array s = af::matmul(shiftedTSS, shiftedTSS.T());
af::array q = af::identity(nelements, nelements) - (af::constant(1.0, nelements, nelements) / nelements);
af::array q =
af::identity(nelements, nelements) - (af::constant(1.0, nelements, nelements, tss.type()) / nelements);
af::array m = af::matmul(q.T(), s, q);

af::array first = getFirstEigenVector(m);
Expand Down Expand Up @@ -365,7 +384,7 @@ af::array refinementStep(af::array tss, af::array centroids, af::array labels) {
* @return The new set of labels.
*/
af::array assignmentStep(af::array tss, af::array centroids, af::array labels) {
af::array min = af::constant(std::numeric_limits<float>::max(), tss.dims(1));
af::array min = af::constant(std::numeric_limits<float>::max(), tss.dims(1), tss.type());
af::array distances = 1 - af::max(ncc3Dim(tss, centroids), 2);
af::min(min, labels, distances, 0);
labels = labels.T();
Expand All @@ -379,15 +398,15 @@ void khiva::clustering::kShape(af::array tss, int k, af::array &centroids, af::a
unsigned int nElements = static_cast<unsigned int>(tss.dims(0));

if (centroids.isempty()) {
centroids = af::constant(0.0f, nElements, k);
centroids = af::constant(0, nElements, k, tss.type());
}

if (labels.isempty()) {
labels = generateUniformLabels(nTimeseries, k);
}

af::array normTSS = khiva::normalization::znorm(tss);
af::array distances = af::constant(0.0f, nTimeseries, k);
af::array distances = af::constant(0, nTimeseries, k, tss.type());
af::array newCentroids;

float error = std::numeric_limits<float>::max();
Expand Down
34 changes: 32 additions & 2 deletions test/clusteringTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ void kmeans2() {
}
}

void kShape() {
void kShapeFloat() {
float tolerance = 1e-10;
int maxIter = 100;
float a[35] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 0.0f, 10.0f, 4.0f, 5.0f, 7.0f,
Expand Down Expand Up @@ -84,6 +84,36 @@ void kShape() {
}
}

void kShapeDouble() {
double tolerance = 1e-10;
int maxIter = 100;
double a[35] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 0.0, 10.0, 4.0, 5.0, 7.0, -3.0, 0.0, -1.0, 15.0, -12.0, 8.0,
9.0, 4.0, 5.0, 2.0, 8.0, 7.0, -6.0, -1.0, 2.0, 9.0, -5.0, -5.0, -6.0, 7.0, 9.0, 9.0, 0.0};

unsigned int idxh[5] = {0, 1, 2, 0, 1};

double expected_c[21] = {-0.5234, 0.1560, -0.3627, -1.2764, -0.7781, 0.9135, 1.8711,
-0.7825, 1.5990, 0.1701, 0.4082, 0.8845, -1.4969, -0.7825,
-0.6278, 1.3812, -2.0090, 0.5022, 0.6278, -0.0000, 0.1256};
int k = 3;
int nElements = 7;
int ntss = 5;

af::array data = af::array(nElements, ntss, a);
af::array idx = af::array(ntss, 1, idxh);
af::array centroids = af::constant(0, nElements, k, data.type());

khiva::clustering::kShape(data, k, centroids, idx, tolerance, maxIter);

double *calculated_c = centroids.host<double>();
unsigned int *calculated_l = idx.host<unsigned int>();

for (size_t i = 0; i < 21; i++) {
ASSERT_NEAR(calculated_c[i], expected_c[i], 1e-3);
}
}

KHIVA_TEST(ClusteringTests, KMeans, kmeans)
KHIVA_TEST(ClusteringTests, KMeans2, kmeans2)
KHIVA_TEST(ClusteringTests, KShape, kShape)
KHIVA_TEST(ClusteringTests, KShapeFloat, kShapeFloat)
KHIVA_TEST(ClusteringTests, KShapeDouble, kShapeDouble)

0 comments on commit dfd12e4

Please sign in to comment.