public static CategoricalDataSet Encode(
TextReader reader,
char columnDelimiter,
IndexCollection extractedColumns,
bool firstLineContainsVariableNames
)
Public Shared Function Encode (
reader As TextReader,
columnDelimiter As Char,
extractedColumns As IndexCollection,
firstLineContainsVariableNames As Boolean
) As CategoricalDataSet
public:
static CategoricalDataSet^ Encode(
TextReader^ reader,
wchar_t columnDelimiter,
IndexCollection^ extractedColumns,
bool firstLineContainsVariableNames
)
static member Encode :
reader : TextReader *
columnDelimiter : char *
extractedColumns : IndexCollection *
firstLineContainsVariableNames : bool -> CategoricalDataSet
Each line from the stream is interpreted as the information about variables observed at a given instance. A line is split in tokens, each corresponding to a (zero-based) column, which in turn stores the data of a given variable. Columns are assumed to be separated each other by the character passed as columnDelimiter. Data from a variable are extracted only if the corresponding column index is in the collection extractedColumns.
Data are encoded applying the InvariantCulture.
In the following example, a data stream is read to encode a categorical dataset. The stream contains data corresponding to two categorical variables.
using System;
using System.IO;
namespace Novacta.Analytics.CodeExamples
{
public class CategoricalEncodeExample2
{
public void Main()
{
// Create a data stream.
string[] data = [
"COLOR,NUMBER",
"Red,Negative",
"Green,Zero",
"Red,Negative",
"Black,Negative",
"Black,Positive" ];
MemoryStream stream = new();
StreamWriter writer = new(stream);
for (int i = 0; i < data.Length; i++) {
writer.WriteLine(data[i].ToCharArray());
writer.Flush();
}
stream.Position = 0;
// Encode the categorical data set.
StreamReader streamReader = new(stream);
char columnDelimiter = ',';
IndexCollection extractedColumns = IndexCollection.Range(0, 1);
bool firstLineContainsColumnHeaders = true;
CategoricalDataSet dataset = CategoricalDataSet.Encode(
streamReader,
columnDelimiter,
extractedColumns,
firstLineContainsColumnHeaders);
// Decode and show the data set.
Console.WriteLine("Decoded data set:");
Console.WriteLine();
var decodedDataSet = dataset.Decode();
int numberOfInstances = dataset.Data.NumberOfRows;
int numberOfVariables = dataset.Data.NumberOfColumns;
foreach (var variable in dataset.Variables) {
Console.Write(variable.Name + ",");
}
Console.WriteLine();
for (int i = 0; i < numberOfInstances; i++) {
for (int j = 0; j < numberOfVariables; j++) {
Console.Write(decodedDataSet[i][j] + ",");
}
Console.WriteLine();
}
}
}
}
// Executing method Main() produces the following output:
//
// Decoded data set:
//
// COLOR,NUMBER,
// Red,Negative,
// Green,Zero,
// Red,Negative,
// Black,Negative,
// Black,Positive,
ArgumentNullException | reader is null. -or- extractedColumns is null. |
InvalidDataException |
The stream accessed by reader contains no
data rows. -or- There is at least a row which contains not enough data for any column specified by extractedColumns. This can happen if there are missing columns, or if strings representing variable names or category labels, i.e. tokens extracted from the stream, are null or consist only of white-space characters. In some cases, the InnerException property is set to add further details about the occurred error. |