Java* API Reference for Intel® Data Analytics Acceleration Library 2016 Update 4

KMeansCSRDistributed.java

/* file: KMeansCSRDistributed.java */
/*******************************************************************************
* Copyright 2014-2016 Intel Corporation All Rights Reserved.
*
* The source code, information and material ("Material") contained herein is
* owned by Intel Corporation or its suppliers or licensors, and title to such
* Material remains with Intel Corporation or its suppliers or licensors. The
* Material contains proprietary information of Intel or its suppliers and
* licensors. The Material is protected by worldwide copyright laws and treaty
* provisions. No part of the Material may be used, copied, reproduced,
* modified, published, uploaded, posted, transmitted, distributed or disclosed
* in any way without Intel's prior express written permission. No license under
* any patent, copyright or other intellectual property rights in the Material
* is granted to or conferred upon you, either expressly, by implication,
* inducement, estoppel or otherwise. Any license under such intellectual
* property rights must be express and approved by Intel in writing.
*
* Unless otherwise agreed by Intel in writing, you may not remove or alter this
* notice or any other notice embedded in Materials by Intel or Intel's
* suppliers or licensors in any way.
*******************************************************************************/
/*
// Content:
// Java example of sparse K-Means clustering in the distributed processing mode
*/
package com.intel.daal.examples.kmeans;
import com.intel.daal.algorithms.kmeans.*;
import com.intel.daal.algorithms.kmeans.init.*;
import com.intel.daal.data_management.data.NumericTable;
import com.intel.daal.data_management.data.CSRNumericTable;
import com.intel.daal.data_management.data_source.DataSource;
import com.intel.daal.data_management.data_source.FileDataSource;
import com.intel.daal.examples.utils.Service;
import com.intel.daal.services.DaalContext;
class KMeansCSRDistributed {
/* Input data set parameters */
private static final String[] datasetFileNames = {
"../data/distributed/kmeans_csr_1.csv", "../data/distributed/kmeans_csr_2.csv",
"../data/distributed/kmeans_csr_3.csv", "../data/distributed/kmeans_csr_4.csv"};
private static final int nClusters = 20;
private static final int nBlocks = 4;
private static final int nIterations = 5;
private static final int nVectorsInBlock = 8000;
private static DaalContext context = new DaalContext();
public static void main(String[] args) throws java.io.FileNotFoundException, java.io.IOException {
/* Get initial centroids */
InitDistributedStep2Master initMaster = new InitDistributedStep2Master(context, Double.class,
InitMethod.randomCSR, nClusters);
for (int node = 0; node < nBlocks; node++) {
/* Create Numeric Table for input data */
CSRNumericTable data = Service.createSparseTable(context, datasetFileNames[node]);
/* Create an algorithm object to initialize the K-Means algorithm */
InitDistributedStep1Local initLocal = new InitDistributedStep1Local(context, Double.class,
InitMethod.randomCSR, nClusters, nBlocks * nVectorsInBlock, node * nVectorsInBlock);
/* Set the input data to the algorithm */
initLocal.input.set(InitInputId.data, data);
InitPartialResult initPres = initLocal.compute();
initMaster.input.add(InitDistributedStep2MasterInputId.partialResults, initPres);
}
initMaster.compute();
InitResult initResult = initMaster.finalizeCompute();
NumericTable centroids = initResult.get(InitResultId.centroids);
NumericTable[] assignments = new NumericTable[nBlocks];
NumericTable goalFunction = null;
/* Create an algorithm for K-Means clustering */
DistributedStep2Master masterAlgorithm = new DistributedStep2Master(context, Double.class, Method.lloydCSR,
nClusters);
for (int it = 0; it < nIterations + 1; it++) {
for (int node = 0; node < nBlocks; node++) {
/* Create Numeric Table for input data */
CSRNumericTable data = Service.createSparseTable(context, datasetFileNames[node]);
/* Create an algorithm object for the K-Means algorithm */
DistributedStep1Local algorithm = new DistributedStep1Local(context, Double.class, Method.lloydCSR,
nClusters);
if (it == nIterations) {
algorithm.parameter.setAssignFlag(true);
}
/* Set the input data to the algorithm */
algorithm.input.set(InputId.data, data);
algorithm.input.set(InputId.inputCentroids, centroids);
PartialResult pres = algorithm.compute();
if (it == nIterations) {
Result result = algorithm.finalizeCompute();
assignments[node] = result.get(ResultId.assignments);
} else {
masterAlgorithm.input.add(DistributedStep2MasterInputId.partialResults, pres);
}
}
if (it == nIterations)
break;
masterAlgorithm.compute();
Result result = masterAlgorithm.finalizeCompute();
centroids = result.get(ResultId.centroids);
goalFunction = result.get(ResultId.goalFunction);
}
/* Print the results */
Service.printNumericTable("First 10 cluster assignments from 1st node:", assignments[0], 10);
Service.printNumericTable("First 10 dimensions of centroids:", centroids, 20, 10);
Service.printNumericTable("Goal function value:", goalFunction);
context.dispose();
}
}