public static IndexCollection Explain(
DoubleMatrix data,
IndexPartition<double> partition,
int numberOfExplanatoryFeatures
)
Public Shared Function Explain (
data As DoubleMatrix,
partition As IndexPartition(Of Double),
numberOfExplanatoryFeatures As Integer
) As IndexCollection
public:
static IndexCollection^ Explain(
DoubleMatrix^ data,
IndexPartition<double>^ partition,
int numberOfExplanatoryFeatures
)
static member Explain :
data : DoubleMatrix *
partition : IndexPartition<float> *
numberOfExplanatoryFeatures : int -> IndexCollection
Method Explain(DoubleMatrix, IndexPartitionDouble, Int32) selects the specified numberOfExplanatoryFeatures from the given data, by minimizing the Davies-Bouldin Index corresponding to the partition of the items under study.
This method uses a default Cross-Entropy context of type CombinationOptimizationContext to identify the optimal features. If different selection criteria need to be applied, or extra control on the parameters of the underlying algorithm is required, a specialized CombinationOptimizationContext can be can be instantiated and hence exploited executing method Optimize on a SystemPerformanceOptimizer object. See the documentation about CombinationOptimizationContext for additional examples.
In the following example, an existing partition of 12 items is explained by selecting 2 features out of the seven ones available in an artificial data set regarding the items under study.
using System;
namespace Novacta.Analytics.CodeExamples
{
public class ClustersExplainExample0
{
public void Main()
{
// Set the number of items and features under study.
const int numberOfItems = 12;
int numberOfFeatures = 7;
// Define a partition that must be explained.
// Three parts (clusters) are included,
// containing, respectively, items 0 to 3,
// 5 to 8, and 9 to 11.
var partition = IndexPartition.Create(
new double[numberOfItems]
{ 0 ,0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 });
// Create a matrix that will represent
// an artificial data set,
// having 12 items (rows) and 7 features (columns).
// This will store the observations which
// explanation will be based on.
var data = DoubleMatrix.Dense(
numberOfRows: numberOfItems,
numberOfColumns: numberOfFeatures);
// The first 5 features are built to be almost
// surely non informative, since they result
// as samples drawn from a same distribution.
var g = new GaussianDistribution(mu: 0, sigma: .01);
for (int j = 0; j < 5; j++)
{
data[":", j] = g.Sample(sampleSize: numberOfItems);
}
// Features 5 to 6 are instead built to be informative,
// since they are sampled from different distributions
// while filling rows whose indexes are in different parts
// of the partition to be explained.
var partIdentifiers = partition.Identifiers;
double mu = 1.0;
for (int i = 0; i < partIdentifiers.Count; i++)
{
var part = partition[partIdentifiers[i]];
int partSize = part.Count;
g.Mu = mu;
data[part, 5] = g.Sample(sampleSize: partSize);
mu += 2.0;
g.Mu = mu;
data[part, 6] = g.Sample(sampleSize: partSize);
mu += 2.0;
}
Console.WriteLine("The data set:");
Console.WriteLine(data);
// Define how many features must be selected
// for explanation.
int numberOfExplanatoryFeatures = 2;
// Select the best features.
IndexCollection optimalExplanatoryFeatureIndexes =
Clusters.Explain(
data,
partition,
numberOfExplanatoryFeatures);
// Show the results.
Console.WriteLine();
Console.WriteLine(
"The {0} features best explaining the given partition have column indexes:",
numberOfExplanatoryFeatures);
Console.WriteLine(optimalExplanatoryFeatureIndexes);
Console.WriteLine();
Console.WriteLine("The Davies-Bouldin Index for the selected features:");
var dbi = IndexPartition.DaviesBouldinIndex(
data[":", optimalExplanatoryFeatureIndexes],
partition);
Console.WriteLine(dbi);
}
}
}
// Executing method Main() produces the following output:
//
// The data set:
// 0.00443412894 0.00269053161 0.00413587909 -0.00765022956 -0.0051623096 1.00663787 3.01053155
// -0.0020667716 0.0208840726 -0.00323082939 -0.00939014629 0.00144991289 0.999318094 3.01264231
// 0.0115714824 0.00980880506 0.0049017337 0.0032788575 0.0157818958 0.990821676 3.01207396
// -0.0156854206 -0.00757566325 -0.00972832587 -0.00217925896 0.0107421303 0.992541729 2.99695621
// 0.00220674309 -0.00321077807 -0.00611898588 0.00720305795 0.0128767272 4.99440474 6.99892958
// -0.00637438186 0.0050524291 -0.00409270388 0.00210944391 -0.0152463979 4.9974367 6.99460151
// -0.00662648189 -0.0149292848 0.00236975764 0.0103282088 -0.0108846478 4.99249371 6.98860335
// -0.0219354055 0.0122820889 0.0109569101 -0.0108910035 0.00275269082 5.02268395 6.99732006
// -0.00172760644 0.000890969086 -0.0121749938 -0.0060896535 -0.0125774475 9.00956698 10.9938497
// 0.0157657881 0.0084084921 0.00295384059 -0.00358519595 0.00447359708 8.98856241 11.0013196
// 0.0129253424 -0.000948574239 0.00235032211 -0.0135124599 -0.0233090879 9.00738398 10.9891406
// -0.0084813634 -0.00459883432 -0.0148632861 0.0223964957 -0.00259506386 9.00721897 11.005672
//
//
//
// The 2 features best explaining the given partition have column indexes:
// 5, 6
//
// The Davies-Bouldin Index for the selected features:
// 0.003700083159450854
ArgumentNullException | data is null. -or- partition is null. |
ArgumentOutOfRangeException | numberOfExplanatoryFeatures is not positive. |
ArgumentException | numberOfExplanatoryFeatures is not less than
the number of columns in data. -or- A part in partition contains a position which is not valid as a row index of data. |