public static CategoricalDataSet Encode(
string path,
char columnDelimiter,
IndexCollection extractedColumns,
bool firstLineContainsVariableNames
)
Public Shared Function Encode (
path As String,
columnDelimiter As Char,
extractedColumns As IndexCollection,
firstLineContainsVariableNames As Boolean
) As CategoricalDataSet
public:
static CategoricalDataSet^ Encode(
String^ path,
wchar_t columnDelimiter,
IndexCollection^ extractedColumns,
bool firstLineContainsVariableNames
)
static member Encode :
path : string *
columnDelimiter : char *
extractedColumns : IndexCollection *
firstLineContainsVariableNames : bool -> CategoricalDataSet
Each line from the stream is interpreted as the information about variables observed at a given instance. A line is split in tokens, each corresponding to a (zero-based) column, which in turn stores the data of a given variable. Columns are assumed to be separated each other by the character passed as columnDelimiter. Data from a variable are extracted only if the corresponding column index is in the collection extractedColumns.
Data are encoded applying the InvariantCulture.
In the following example, a data stream is read to encode a categorical dataset. The stream contains data corresponding to two categorical variables.
using System;
using System.IO;
namespace Novacta.Analytics.CodeExamples
{
public class CategoricalEncodeExample2
{
public void Main()
{
// Create a data stream.
string[] data = [
"COLOR,NUMBER",
"Red,Negative",
"Green,Zero",
"Red,Negative",
"Black,Negative",
"Black,Positive" ];
MemoryStream stream = new();
StreamWriter writer = new(stream);
for (int i = 0; i < data.Length; i++) {
writer.WriteLine(data[i].ToCharArray());
writer.Flush();
}
stream.Position = 0;
// Encode the categorical data set.
StreamReader streamReader = new(stream);
char columnDelimiter = ',';
IndexCollection extractedColumns = IndexCollection.Range(0, 1);
bool firstLineContainsColumnHeaders = true;
CategoricalDataSet dataset = CategoricalDataSet.Encode(
streamReader,
columnDelimiter,
extractedColumns,
firstLineContainsColumnHeaders);
// Decode and show the data set.
Console.WriteLine("Decoded data set:");
Console.WriteLine();
var decodedDataSet = dataset.Decode();
int numberOfInstances = dataset.Data.NumberOfRows;
int numberOfVariables = dataset.Data.NumberOfColumns;
foreach (var variable in dataset.Variables) {
Console.Write(variable.Name + ",");
}
Console.WriteLine();
for (int i = 0; i < numberOfInstances; i++) {
for (int j = 0; j < numberOfVariables; j++) {
Console.Write(decodedDataSet[i][j] + ",");
}
Console.WriteLine();
}
}
}
}
// Executing method Main() produces the following output:
//
// Decoded data set:
//
// COLOR,NUMBER,
// Red,Negative,
// Green,Zero,
// Red,Negative,
// Black,Negative,
// Black,Positive,
ArgumentNullException | path is null. -or- extractedColumns is null. |
ArgumentException | path is an empty string (""). |
FileNotFoundException | The file cannot be found. |
DirectoryNotFoundException | The specified path is invalid, such as being on an unmapped drive. |
IOException | path includes an incorrect or invalid syntax for file name, directory name, or volume label. |
InvalidDataException |
The stream to file having the specified
path contains no data rows. -or- There is at least a row which contains not enough data for any column specified by extractedColumns. This can happen if there are missing columns, or if strings representing variable names or category labels, i.e. tokens extracted from the stream, are null or consist only of white-space characters. In some cases, the InnerException property is set to add further details about the occurred error. |