-
Notifications
You must be signed in to change notification settings - Fork 78
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5301a7a
commit 4fa2fb8
Showing
5 changed files
with
276 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.IO; | ||
using System.Linq; | ||
using System.Text; | ||
using NumSharp; | ||
|
||
namespace PandasNet.Impl | ||
{ | ||
internal class CsvWriter | ||
{ | ||
private readonly byte delimiter; | ||
private readonly byte[] noValue; | ||
private readonly string floatFormat; | ||
private readonly bool header; | ||
private readonly CsvQuoteStyle quotingStyle; | ||
private readonly byte quotebyte; | ||
private readonly char[] specialChars; | ||
private readonly Encoding encoding; | ||
private readonly byte[] lfBytes; | ||
|
||
internal CsvWriter(char sep, string na_rep, string floatFormat, | ||
bool header, CsvQuoteStyle quotingStyle, char quotechar, | ||
string lineTerminator, Encoding encoding) | ||
{ | ||
delimiter = (byte) sep; | ||
noValue = encoding.GetBytes(na_rep); | ||
this.floatFormat = floatFormat; | ||
this.header = header; | ||
this.quotingStyle = quotingStyle; | ||
quotebyte = (byte) quotechar; | ||
specialChars = lineTerminator.Length > 1 ? | ||
new char[] { sep, quotechar, lineTerminator[0], lineTerminator[1] } | ||
: new char[] { sep, quotechar, lineTerminator[0] }; | ||
this.encoding = encoding; | ||
lfBytes = encoding.GetBytes(lineTerminator); | ||
} | ||
|
||
internal void Write(string filepath, IDataFrame df, | ||
IEnumerable<string> columns) | ||
{ | ||
var columnLabels = columns == null ? | ||
df.Columns.Values.Data<string>() : columns.ToArray(); | ||
var columnCount = columnLabels.Length; | ||
int rowCount = df.Index.Size; | ||
var data = df[columnLabels].Values; | ||
using (var fs = File.Create(filepath)) | ||
{ | ||
if (columnCount == 0) { return; } | ||
else if (header) { WriteHeader(fs, columnLabels); } | ||
for (var i = 0; i < rowCount; i++) | ||
{ | ||
WriteField(data[i][0], fs); | ||
for (var j = 1; j < columnCount; j++) | ||
{ | ||
fs.WriteByte(delimiter); | ||
WriteField(data[i][j], fs); | ||
} | ||
fs.Write(lfBytes, 0, lfBytes.Length); | ||
} | ||
} | ||
} | ||
|
||
private void WriteField(NDArray fieldValue, Stream fs) | ||
{ | ||
var needsQuoting = NeedsQuoting(fieldValue); | ||
if (needsQuoting) { fs.WriteByte(quotebyte); } | ||
var bytes = noValue; | ||
if (fieldValue.size > 0) | ||
{ | ||
var fieldValueFormatted = floatFormat != null && | ||
(fieldValue.dtype == np.float32 || fieldValue.dtype == np.float64) | ||
? ((double) fieldValue).ToString(floatFormat) | ||
: fieldValue.ToString(); | ||
bytes = encoding.GetBytes(fieldValueFormatted); | ||
} | ||
fs.Write(bytes, 0, bytes.Length); | ||
if (needsQuoting) { fs.WriteByte(quotebyte); } | ||
} | ||
|
||
private bool NeedsQuoting(object field) | ||
{ | ||
switch (quotingStyle) | ||
{ | ||
case CsvQuoteStyle.QUOTE_MINIMAL: | ||
return !IsNumber(field) && -1 != field.ToString().IndexOfAny(specialChars); | ||
case CsvQuoteStyle.QUOTE_ALL: | ||
return true; | ||
case CsvQuoteStyle.QUOTE_NONNUMERIC: | ||
return !IsNumber(field); | ||
case CsvQuoteStyle.QUOTE_NONE: | ||
return false; | ||
default: | ||
throw new ArgumentException("Invalid value", nameof(quotingStyle)); | ||
} | ||
} | ||
|
||
/// <summary> | ||
/// Writes the columnLabels on one line to the FileStream. | ||
/// </summary> | ||
/// <param name="fs">Output stream</param> | ||
/// <param name="encoding">Byte encoding used</param> | ||
/// <param name="columnLabels">Column names</param> | ||
/// <param name="delimiter">Separator for columns</param> | ||
/// <param name="lfBytes">Line-break bytes.</param> | ||
private void WriteHeader(Stream fs, string[] columnLabels) | ||
{ | ||
var bytes = encoding.GetBytes(columnLabels[0]); | ||
fs.Write(bytes, 0, bytes.Length); | ||
for (var i = 1; i < columnLabels.Length; i++) | ||
{ | ||
fs.WriteByte(delimiter); | ||
bytes = encoding.GetBytes(columnLabels[i]); | ||
fs.Write(bytes, 0, bytes.Length); | ||
} | ||
fs.Write(lfBytes, 0, lfBytes.Length); | ||
} | ||
|
||
private static bool IsNumber(object value) | ||
{ | ||
return value is sbyte || value is byte || value is short || | ||
value is ushort || value is int || value is uint || | ||
value is long || value is ulong || value is float || | ||
value is double || value is decimal; | ||
} | ||
} | ||
|
||
internal enum CsvQuoteStyle | ||
{ | ||
/// <summary> | ||
/// Instructs writer objects to only quote those fields which | ||
/// contain special characters such as delimiter, quotechar or any | ||
/// of the characters in lineterminator. | ||
/// </summary> | ||
QUOTE_MINIMAL = 0, | ||
/// <summary> | ||
/// Instructs writer objects to quote all fields. | ||
/// </summary> | ||
QUOTE_ALL = 1, | ||
/// <summary> | ||
/// <para>Instructs writer objects to quote all non-numeric | ||
/// fields.</para> | ||
/// <para>Instructs the reader to convert all non-quoted fields | ||
/// to type float.</para> | ||
/// </summary> | ||
QUOTE_NONNUMERIC = 2, | ||
/// <summary> | ||
/// <para>Instructs writer objects to never quote fields. When the | ||
/// current delimiter occurs in output data it is preceded by the | ||
/// current escapechar character. If escapechar is not set, the | ||
/// writer will raise Error if any characters that require escaping | ||
/// are encountered.</para> | ||
/// <para>Instructs reader to perform no special processing of | ||
/// quote characters.</para> | ||
/// </summary> | ||
QUOTE_NONE = 3 | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
using Xunit; | ||
using NumSharp; | ||
using System.Linq; | ||
using System.IO; | ||
|
||
namespace PandasNet.Test | ||
{ | ||
public class DataFrameCsvTest | ||
{ | ||
public DataFrameCsvTest() | ||
{} | ||
|
||
[Fact] | ||
public void WriteCsv_ToFile_Test() | ||
{ | ||
var filepath = "write_test.csv"; | ||
var array = np.arange(100).reshape(20, 5); | ||
var columnNames = new string[] { "first", "second", "third", | ||
"fourth", "fifth" }; | ||
var pd = new Pandas(); | ||
IDataFrame df1 = pd.DataFrame(array, null, columnNames, typeof(object)); | ||
df1.to_csv(filepath); | ||
using (var fr = File.OpenText(filepath)) | ||
{ | ||
Assert.Equal(string.Join(',', columnNames), fr.ReadLine()); | ||
for (var i = 0; i < array.shape[0]; i++) | ||
{ | ||
Assert.Equal(string.Join(',', array[i].Data<int>()), fr.ReadLine()); | ||
} | ||
} | ||
} | ||
|
||
[Fact] | ||
public void WriteCsvQuoted_ToFile_Test() | ||
{ | ||
var filepath = "write_quoted_test.csv"; | ||
var array = np.arange(100).reshape(20, 5); | ||
var columnNames = new string[] { "first", "second", "third", | ||
"fourth", "fifth" }; | ||
var pd = new Pandas(); | ||
IDataFrame df1 = pd.DataFrame(array, null, columnNames, typeof(object)); | ||
df1.to_csv(filepath, quoting: 1); | ||
using (var fr = File.OpenText(filepath)) | ||
{ | ||
Assert.Equal(string.Join(',', columnNames), fr.ReadLine()); | ||
for (var i = 0; i < array.shape[0]; i++) | ||
{ | ||
Assert.Equal('"' + string.Join("\",\"", array[i].Data<int>()) + '"', fr.ReadLine()); | ||
} | ||
} | ||
} | ||
|
||
[Fact] | ||
public void WriteCsvFormated_ToFile_Test() | ||
{ | ||
var filepath = "write_quoted_test.csv"; | ||
var array = np.arange(0, 50, 0.5).reshape(20, 5); | ||
var columnNames = new string[] { "first", "second", "third", | ||
"fourth", "fifth" }; | ||
var floatFormat = "E03"; | ||
var pd = new Pandas(); | ||
IDataFrame df1 = pd.DataFrame(array, null, columnNames, typeof(object)); | ||
df1.to_csv(filepath, float_format: floatFormat); | ||
using (var fr = File.OpenText(filepath)) | ||
{ | ||
Assert.Equal(string.Join(',', columnNames), fr.ReadLine()); | ||
for (var i = 0; i < array.shape[0]; i++) | ||
{ | ||
var formattedData = array[i].Data<double>().Select( | ||
x => x.ToString(floatFormat)); | ||
Assert.Equal(string.Join(",", formattedData), fr.ReadLine()); | ||
} | ||
} | ||
} | ||
} | ||
} |