public static IndexPartition<double> Discover(
DoubleMatrix data,
int maximumNumberOfParts
)
Public Shared Function Discover (
data As DoubleMatrix,
maximumNumberOfParts As Integer
) As IndexPartition(Of Double)
public:
static IndexPartition<double>^ Discover(
DoubleMatrix^ data,
int maximumNumberOfParts
)
static member Discover :
data : DoubleMatrix *
maximumNumberOfParts : int -> IndexPartition<float>
Method Discover(DoubleMatrix, Int32) partitions a collection of items in no more than the specified maximumNumberOfParts, given the specified data, by minimizing the sum of intra-cluster squared deviations.
This method uses a default Cross-Entropy context of type PartitionOptimizationContext to identify the optimal partition. If different partitioning criteria need to be applied, or extra control on the parameters of the underlying algorithm is required, a specialized PartitionOptimizationContext can be can be instantiated and hence exploited executing method Optimize on a SystemPerformanceOptimizer object. See the documentation about PartitionOptimizationContext for additional examples.
In the following example, a partition that optimally split 12 items is discovered given an artificial data set regarding the items under study.
using System;
namespace Novacta.Analytics.CodeExamples
{
public class ClustersDiscoverExample0
{
public void Main()
{
// Set the number of items and features under study.
const int numberOfItems = 12;
int numberOfFeatures = 7;
// Create a matrix that will represent
// an artificial data set,
// having 12 items (rows) and 7 features (columns).
// This will store the observations which
// partition discovery will be based on.
var data = DoubleMatrix.Dense(
numberOfRows: numberOfItems,
numberOfColumns: numberOfFeatures);
// Fill the data rows by sampling from a different
// distribution while, respectively, drawing observations
// for items 0 to 3, 4 to 7, and 8 to 11: these will be the
// three different parts expected to be included in the
// optimal partition.
double mu = 1.0;
var g = new GaussianDistribution(mu: mu, sigma: .01);
IndexCollection range = IndexCollection.Range(0, 3);
for (int j = 0; j < numberOfFeatures; j++)
{
data[range, j] = g.Sample(sampleSize: range.Count);
}
mu += 5.0;
g.Mu = mu;
range = IndexCollection.Range(4, 7);
for (int j = 0; j < numberOfFeatures; j++)
{
data[range, j] = g.Sample(sampleSize: range.Count);
}
mu += 5.0;
g.Mu = mu;
range = IndexCollection.Range(8, 11);
for (int j = 0; j < numberOfFeatures; j++)
{
data[range, j] = g.Sample(sampleSize: range.Count);
}
Console.WriteLine("The data set:");
Console.WriteLine(data);
// Define the maximum number of parts allowed in the
// partition to be discovered.
int maximumNumberOfParts = 3;
// Select the best partition.
IndexPartition<double> optimalPartition =
Clusters.Discover(
data,
maximumNumberOfParts);
// Show the results.
Console.WriteLine();
Console.WriteLine(
"The optimal partition:");
Console.WriteLine(optimalPartition);
Console.WriteLine();
Console.WriteLine("The Davies-Bouldin Index for the optimal partition:");
var dbi = IndexPartition.DaviesBouldinIndex(
data,
optimalPartition);
Console.WriteLine(dbi);
}
}
}
// Executing method Main() produces the following output:
//
// The data set:
// 1.00443413 1.00220674 0.998272394 1.00269053 0.996789222 1.00089097 1.00413588
// 0.997933228 0.993625618 1.01576579 1.02088407 1.00505243 1.00840849 0.996769171
// 1.01157148 0.993373518 1.01292534 1.00980881 0.985070715 0.999051426 1.00490173
// 0.984314579 0.978064595 0.991518637 0.992424337 1.01228209 0.995401166 0.990271674
// 5.99388101 5.98782501 5.99234977 6.00720306 5.99391035 5.99483769 6.01287673
// 5.9959073 6.00295384 5.99060985 6.00210944 5.9964148 6.00144991 5.9847536
// 6.00236976 6.00235032 6.00327886 6.01032821 5.98648754 6.0157819 5.98911535
// 6.01095691 5.98513671 5.99782074 5.989109 6.0223965 6.01074213 6.00275269
// 10.9874226 11.0066379 11.0105315 10.9944047 10.9989296 11.009567 10.9938497
// 11.0044736 10.9993181 11.0126423 10.9974367 10.9946015 10.9885624 11.0013196
// 10.9766909 10.9908217 11.012074 10.9924937 10.9886034 11.007384 10.9891406
// 10.9974049 10.9925417 10.9969562 11.0226839 10.9973201 11.007219 11.005672
//
//
//
// The optimal partition:
// [(0), 8, 9, 10, 11]
// [(1), 0, 1, 2, 3]
// [(2), 4, 5, 6, 7]
//
//
// The Davies-Bouldin Index for the optimal partition:
// 0.0034476806923814927
ArgumentNullException | data is null. |
ArgumentOutOfRangeException | maximumNumberOfParts is not greater than one. |
ArgumentException | maximumNumberOfParts is not less than the number of rows in data. |