diff --git a/src/Pandas.NET/Extensions/PandasMethods.Excel.cs b/src/Pandas.NET/Extensions/PandasMethods.Excel.cs
index 1efb4b6..9f28849 100644
--- a/src/Pandas.NET/Extensions/PandasMethods.Excel.cs
+++ b/src/Pandas.NET/Extensions/PandasMethods.Excel.cs
@@ -1,13 +1,14 @@
using System;
using System.Collections.Generic;
using System.Text;
+using PandasNet.Impl;
namespace PandasNet
{
public static class PandasMethods
{
///
- ///
+ /// Read a comma-separated values (csv) file into DataFrame.
///
///
///
@@ -18,5 +19,36 @@ public static IDataFrame read_csv(this Pandas pd, string filepath, string sep =
{
throw new NotImplementedException();
}
+
+ ///
+ /// Write object to a comma-separated values (csv) file.
+ ///
+ /// File path.
+ /// Field delimiter for the output file.
+ /// Missing data representation.
+ /// Format string for floating point numbers.
+ /// Columns to write.
+ /// Write out the column names.
+ ///
+ /// Defaults to QUOTE_MINIMAL. If you have set a float_format then
+ /// floats are converted to strings and thus QUOTE_NONNUMERIC will
+ /// treat them as non-numeric.
+ ///
+ /// Character used to quote fields.
+ ///
+ /// The newline character or character sequence to use in the output
+ /// file. Defaults to os.linesep, which depends on the OS in which this
+ /// method is called (‘n’ for linux, ‘rn’ for Windows, i.e.).
+ ///
+ public static void to_csv(this IDataFrame df, string filepath, char sep = ',',
+ string na_rep = "", string float_format = null, IEnumerable columns = null,
+ bool header = true, int quoting = (int) CsvQuoteStyle.QUOTE_MINIMAL,
+ char quotechar = '"', string line_terminator = null)
+ {
+ new CsvWriter(sep, na_rep, float_format, header,
+ (CsvQuoteStyle) quoting, quotechar, string.IsNullOrEmpty(
+ line_terminator) ? Environment.NewLine : line_terminator,
+ new UTF8Encoding(false)).Write(filepath, df, columns);
+ }
}
}
diff --git a/src/Pandas.NET/Impl/CsvWriter.cs b/src/Pandas.NET/Impl/CsvWriter.cs
new file mode 100644
index 0000000..5f59e6d
--- /dev/null
+++ b/src/Pandas.NET/Impl/CsvWriter.cs
@@ -0,0 +1,158 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using NumSharp;
+
+namespace PandasNet.Impl
+{
+ internal class CsvWriter
+ {
+ private readonly byte delimiter;
+ private readonly byte[] noValue;
+ private readonly string floatFormat;
+ private readonly bool header;
+ private readonly CsvQuoteStyle quotingStyle;
+ private readonly byte quotebyte;
+ private readonly char[] specialChars;
+ private readonly Encoding encoding;
+ private readonly byte[] lfBytes;
+
+ internal CsvWriter(char sep, string na_rep, string floatFormat,
+ bool header, CsvQuoteStyle quotingStyle, char quotechar,
+ string lineTerminator, Encoding encoding)
+ {
+ delimiter = (byte) sep;
+ noValue = encoding.GetBytes(na_rep);
+ this.floatFormat = floatFormat;
+ this.header = header;
+ this.quotingStyle = quotingStyle;
+ quotebyte = (byte) quotechar;
+ specialChars = lineTerminator.Length > 1 ?
+ new char[] { sep, quotechar, lineTerminator[0], lineTerminator[1] }
+ : new char[] { sep, quotechar, lineTerminator[0] };
+ this.encoding = encoding;
+ lfBytes = encoding.GetBytes(lineTerminator);
+ }
+
+ internal void Write(string filepath, IDataFrame df,
+ IEnumerable columns)
+ {
+ var columnLabels = columns == null ?
+ df.Columns.Values.Data() : columns.ToArray();
+ var columnCount = columnLabels.Length;
+ int rowCount = df.Index.Size;
+ var data = df[columnLabels].Values;
+ using (var fs = File.Create(filepath))
+ {
+ if (columnCount == 0) { return; }
+ else if (header) { WriteHeader(fs, columnLabels); }
+ for (var i = 0; i < rowCount; i++)
+ {
+ WriteField(data[i][0], fs);
+ for (var j = 1; j < columnCount; j++)
+ {
+ fs.WriteByte(delimiter);
+ WriteField(data[i][j], fs);
+ }
+ fs.Write(lfBytes, 0, lfBytes.Length);
+ }
+ }
+ }
+
+ private void WriteField(NDArray fieldValue, Stream fs)
+ {
+ var needsQuoting = NeedsQuoting(fieldValue);
+ if (needsQuoting) { fs.WriteByte(quotebyte); }
+ var bytes = noValue;
+ if (fieldValue.size > 0)
+ {
+ var fieldValueFormatted = floatFormat != null &&
+ (fieldValue.dtype == np.float32 || fieldValue.dtype == np.float64)
+ ? ((double) fieldValue).ToString(floatFormat)
+ : fieldValue.ToString();
+ bytes = encoding.GetBytes(fieldValueFormatted);
+ }
+ fs.Write(bytes, 0, bytes.Length);
+ if (needsQuoting) { fs.WriteByte(quotebyte); }
+ }
+
+ private bool NeedsQuoting(object field)
+ {
+ switch (quotingStyle)
+ {
+ case CsvQuoteStyle.QUOTE_MINIMAL:
+ return !IsNumber(field) && -1 != field.ToString().IndexOfAny(specialChars);
+ case CsvQuoteStyle.QUOTE_ALL:
+ return true;
+ case CsvQuoteStyle.QUOTE_NONNUMERIC:
+ return !IsNumber(field);
+ case CsvQuoteStyle.QUOTE_NONE:
+ return false;
+ default:
+ throw new ArgumentException("Invalid value", nameof(quotingStyle));
+ }
+ }
+
+ ///
+ /// Writes the columnLabels on one line to the FileStream.
+ ///
+ /// Output stream
+ /// Byte encoding used
+ /// Column names
+ /// Separator for columns
+ /// Line-break bytes.
+ private void WriteHeader(Stream fs, string[] columnLabels)
+ {
+ var bytes = encoding.GetBytes(columnLabels[0]);
+ fs.Write(bytes, 0, bytes.Length);
+ for (var i = 1; i < columnLabels.Length; i++)
+ {
+ fs.WriteByte(delimiter);
+ bytes = encoding.GetBytes(columnLabels[i]);
+ fs.Write(bytes, 0, bytes.Length);
+ }
+ fs.Write(lfBytes, 0, lfBytes.Length);
+ }
+
+ private static bool IsNumber(object value)
+ {
+ return value is sbyte || value is byte || value is short ||
+ value is ushort || value is int || value is uint ||
+ value is long || value is ulong || value is float ||
+ value is double || value is decimal;
+ }
+ }
+
+ internal enum CsvQuoteStyle
+ {
+ ///
+ /// Instructs writer objects to only quote those fields which
+ /// contain special characters such as delimiter, quotechar or any
+ /// of the characters in lineterminator.
+ ///
+ QUOTE_MINIMAL = 0,
+ ///
+ /// Instructs writer objects to quote all fields.
+ ///
+ QUOTE_ALL = 1,
+ ///
+ /// Instructs writer objects to quote all non-numeric
+ /// fields.
+ /// Instructs the reader to convert all non-quoted fields
+ /// to type float.
+ ///
+ QUOTE_NONNUMERIC = 2,
+ ///
+ /// Instructs writer objects to never quote fields. When the
+ /// current delimiter occurs in output data it is preceded by the
+ /// current escapechar character. If escapechar is not set, the
+ /// writer will raise Error if any characters that require escaping
+ /// are encountered.
+ /// Instructs reader to perform no special processing of
+ /// quote characters.
+ ///
+ QUOTE_NONE = 3
+ }
+}
diff --git a/src/Pandas.NET/Impl/DataFrame.cs b/src/Pandas.NET/Impl/DataFrame.cs
index ba741b8..61231ae 100644
--- a/src/Pandas.NET/Impl/DataFrame.cs
+++ b/src/Pandas.NET/Impl/DataFrame.cs
@@ -10,8 +10,14 @@ namespace PandasNet.Impl
{
public partial class DataFrame : PandasObject, IDataFrame
{
+ ///
+ /// The index (row labels) of the DataFrame.
+ ///
public IDataIndex Index { get; internal set; }
+ ///
+ /// The column labels of the DataFrame.
+ ///
public IDataIndex Columns { get; internal set; }
///
@@ -140,7 +146,7 @@ public IDataFrame this[params int[] columnIndexs]
get
{
var colLength = columnIndexs.Length;
- NDArray array = new object[_rowSize, colLength];
+ NDArray array = new NDArray(Values.dtype, new Shape(_rowSize, colLength));
for (var rowIndex = 0; rowIndex < _rowSize; rowIndex++)
{
for (var col = 0; col < colLength; col++)
diff --git a/src/Pandas.NET/Pandas.Net.csproj b/src/Pandas.NET/Pandas.Net.csproj
index 48df23f..a762fa0 100644
--- a/src/Pandas.NET/Pandas.Net.csproj
+++ b/src/Pandas.NET/Pandas.Net.csproj
@@ -1,7 +1,7 @@
-
+
- netstandard2.0
+ netstandard2.0
PandasNet
0.1.0
true
diff --git a/test/Pandas.NET.Test/DataFrameCsvTest.cs b/test/Pandas.NET.Test/DataFrameCsvTest.cs
new file mode 100644
index 0000000..293752f
--- /dev/null
+++ b/test/Pandas.NET.Test/DataFrameCsvTest.cs
@@ -0,0 +1,76 @@
+using Xunit;
+using NumSharp;
+using System.Linq;
+using System.IO;
+
+namespace PandasNet.Test
+{
+ public class DataFrameCsvTest
+ {
+ public DataFrameCsvTest()
+ {}
+
+ [Fact]
+ public void WriteCsv_ToFile_Test()
+ {
+ var filepath = "write_test.csv";
+ var array = np.arange(100).reshape(20, 5);
+ var columnNames = new string[] { "first", "second", "third",
+ "fourth", "fifth" };
+ var pd = new Pandas();
+ IDataFrame df1 = pd.DataFrame(array, null, columnNames, typeof(object));
+ df1.to_csv(filepath);
+ using (var fr = File.OpenText(filepath))
+ {
+ Assert.Equal(string.Join(',', columnNames), fr.ReadLine());
+ for (var i = 0; i < array.shape[0]; i++)
+ {
+ Assert.Equal(string.Join(',', array[i].Data()), fr.ReadLine());
+ }
+ }
+ }
+
+ [Fact]
+ public void WriteCsvQuoted_ToFile_Test()
+ {
+ var filepath = "write_quoted_test.csv";
+ var array = np.arange(100).reshape(20, 5);
+ var columnNames = new string[] { "first", "second", "third",
+ "fourth", "fifth" };
+ var pd = new Pandas();
+ IDataFrame df1 = pd.DataFrame(array, null, columnNames, typeof(object));
+ df1.to_csv(filepath, quoting: 1);
+ using (var fr = File.OpenText(filepath))
+ {
+ Assert.Equal(string.Join(',', columnNames), fr.ReadLine());
+ for (var i = 0; i < array.shape[0]; i++)
+ {
+ Assert.Equal('"' + string.Join("\",\"", array[i].Data()) + '"', fr.ReadLine());
+ }
+ }
+ }
+
+ [Fact]
+ public void WriteCsvFormated_ToFile_Test()
+ {
+ var filepath = "write_quoted_test.csv";
+ var array = np.arange(0, 50, 0.5).reshape(20, 5);
+ var columnNames = new string[] { "first", "second", "third",
+ "fourth", "fifth" };
+ var floatFormat = "E03";
+ var pd = new Pandas();
+ IDataFrame df1 = pd.DataFrame(array, null, columnNames, typeof(object));
+ df1.to_csv(filepath, float_format: floatFormat);
+ using (var fr = File.OpenText(filepath))
+ {
+ Assert.Equal(string.Join(',', columnNames), fr.ReadLine());
+ for (var i = 0; i < array.shape[0]; i++)
+ {
+ var formattedData = array[i].Data().Select(
+ x => x.ToString(floatFormat));
+ Assert.Equal(string.Join(",", formattedData), fr.ReadLine());
+ }
+ }
+ }
+ }
+}