Feat/replace csv parser by papa parse (#14)

Feat/replace csv parser by papa parse
TransitApp · May 15, 2019 · f576813 · f576813
2 parents a8ccb7d + 0b69d5b
commit f576813
Show file tree

Hide file tree

Showing 11 changed files with 218 additions and 324 deletions.
diff --git a/.node-version b/.node-version
@@ -0,0 +1 @@
+8.11.3
diff --git a/README.md b/README.md
@@ -35,27 +35,22 @@ gtfs.forEachStopTime((stopTime) => {
 gtfs.forEachFrequency((frequency) => {
   const fromStop = gtfs.getStopWithId(frequency.from_stop_id);
   const toStop = gtfs.getStopWithId(frequency.to_stop_id);
-  
+
   if (!fromStop || !toStop) {
     gtfs.removeFrequency(frequency);
   }
 });
 
 gtfs.exportAtPath('somePathWhereYouWantToExportTheGtfs', (error) => {
   if (error) { throw error };
-  
+
   // Done
 });
 ```
 
-## Keep in mind: synchronous loading and indexes
-
-This project is Transit's old GTFS implementation, using callbacks and plain objects. The main advantage is that 
-is it light and simple to use, but it has some drawbacks. The two biggest ones are the indexes and the synchronous loading of tables.
-
 ### Indexes
 
-The tables are loaded and saved as Maps, to allow o(1) access using the ids. The routes are therefore indexed by the 
+The tables are loaded and saved as Maps, to allow o(1) access using the ids. The routes are therefore indexed by the
 `route_id` value, which is therefore saved in `route.route_id` but also as an index.
 
 **This indexing is not automatically kept up to date.**
@@ -75,7 +70,7 @@ gtfs.addRoute(route);
 The goal of this implementation was to avoid loading upfront all the tables. Therefore, they are loaded only when
 required. This makes the code faster to run (if some tables are not required at all).
 
-The drawback, is that any function could trigger the loading of a table. Since we do not want to turn every function into an async one, the loading of the tables is done synchronously. 
+The drawback, is that any function could trigger the loading of a table. Since we do not want to turn every function into an async one, the loading of the tables is done synchronously.
 
 ## Naming
 
@@ -89,8 +84,7 @@ but for the `shapes.txt`, since one item of the table is not a "shape" per-se, b
 
 ## Support and contact
 
-Please post any issues you find on [the repo of the project](https://github.com/TransitApp/gtfsNodeLib/issues). And 
-do not hesitate to contact [Transit App](https://github.com/TransitApp) or [Leo Frachet](https://github.com/LeoFrachet) directly if you have any questions.
-
+Please post any issues you find on [the repo of the project](https://github.com/TransitApp/gtfsNodeLib/issues). And
+do not hesitate to contact [Transit App](https://github.com/TransitApp) directly if you have any questions.
 
 
diff --git a/helpers/csv.js b/helpers/csv.js
diff --git a/helpers/export.js b/helpers/export.js
@@ -2,14 +2,12 @@
 
 /* eslint-disable no-underscore-dangle */
 
-const acomb = require('acomb');
 const async = require('async');
 const infoLog = require('debug')('gtfsNodeLib:i');
 const warningLog = require('debug')('gtfsNodeLib:w');
 const errorLog = require('debug')('gtfsNodeLib:e');
 const fs = require('fs-extra');
-
-const { fromObjectToCsvString } = require('./csv');
+const Papa = require('papaparse');
 
 /**
  * Private functions
@@ -66,78 +64,88 @@ function copyUntouchedTable(inputPath, outputPath, tableName, callback) {
 }
 
 function exportTable(tableName, gtfs, outputPath, callback) {
-  const keys = gtfs.getActualKeysForTable(tableName);
+  const csv = processGtfsTable(tableName, gtfs);
+
   const outputFullPath = `${outputPath + tableName}.txt`;
-  const firstRow = `${keys.join(',')}\n`;
-
-  fs.writeFile(outputFullPath, firstRow, (err) => {
-    if (err) { throw err; }
-    /* About acomb.ensureAsync:
-      If the function async.eachSeries run without doing anything, just calling the callback (which
-      happens when there is a lot of empty object), it crashes. It is a known bug of async.
-      The acomb.ensureAsync fonction prevent that. It should be removed when the async module
-      will be fixed.
-      2015-03-10
-    */
-    const indexKeys = gtfs._schema.indexKeysByTableName[tableName];
-    const deepness = gtfs._schema.deepnessByTableName[tableName];
-
-    if (indexKeys.singleton) {
-      let item = gtfs.getIndexedTable(tableName);
-      if (item) {
-        if (gtfs._preExportItemFunction) {
-          item = gtfs._preExportItemFunction(item, tableName);
-        }
-        const row = fromObjectToCsvString(item, keys);
-        fs.appendFile(outputFullPath, row, callback);
-      } else {
-        callback();
-      }
-      return;
+  fs.writeFile(outputFullPath, csv, (error) => {
+    if (error) {
+      throw error;
     }
 
-    let rowsBuffer = [];
+    infoLog(`[${getHHmmss()}] Table has been exported: ${tableName}`);
+    callback();
+  });
+}
 
-    async.eachSeries(gtfs.getIndexedTable(tableName), acomb.ensureAsync(([key, object], subDone) => {
-      if (deepness === 0 || deepness === 1) {
-        if (gtfs._preExportItemFunction) {
-          object = gtfs._preExportItemFunction(object, tableName, key);
-        }
-        rowsBuffer.push(fromObjectToCsvString(object, keys));
-      } else if (deepness === 2) {
-        object.forEach((subObject, subKey) => {
-          if (gtfs._preExportItemFunction) {
-            subObject = gtfs._preExportItemFunction(subObject, tableName, key, subKey);
-          }
-          rowsBuffer.push(fromObjectToCsvString(subObject, keys));
-        });
-      }
+function getObjectValuesUsingKeyOrdering(object, keys) {
+  return keys.map((key) => {
+    let value = object[key];
 
-      if (rowsBuffer.length < 100) {
-        subDone();
-        return;
-      }
+    if (value === undefined || value === null) {
+      return '';
+    }
 
-      fs.appendFile(outputFullPath, rowsBuffer.join(''), (appendingError) => {
-        if (appendingError) { throw appendingError; }
+    const type = typeof value;
+    if (type === 'object') {
+      value = JSON.stringify(value);
+    } else if (type !== 'string') {
+      value = String(value);
+    }
 
-        rowsBuffer = [];
-        subDone();
-      });
-    }), () => {
-      if (rowsBuffer.length === 0) {
-        infoLog(`[${getHHmmss()}] Table has been exported: ${tableName}`);
-        callback();
-        return;
+    return value;
+  });
+}
+
+function processGtfsTable(tableName, gtfs) {
+  let itemMap = gtfs.getIndexedTable(tableName);
+  if (!itemMap) {
+    return undefined;
+  }
+
+  const actualKeys = gtfs.getActualKeysForTable(tableName);
+  const indexKeys = gtfs._schema.indexKeysByTableName[tableName];
+  const deepness = gtfs._schema.deepnessByTableName[tableName];
+  const itemValues = [];
+
+  if (indexKeys.singleton) {
+    if (gtfs._preExportItemFunction) {
+      itemMap = gtfs._preExportItemFunction(itemMap, tableName);
+    }
+
+    const formattedGtfsRowValues = getObjectValuesUsingKeyOrdering(itemMap, actualKeys);
+    itemValues.push(formattedGtfsRowValues);
+
+    return Papa.unparse({
+      fields: actualKeys,
+      data: itemValues,
+    });
+  }
+
+  itemMap.forEach((gtfsRowObjectOrMap, key) => {
+    if (deepness === 0 || deepness === 1) {
+      if (gtfs._preExportItemFunction) {
+        gtfsRowObjectOrMap = gtfs._preExportItemFunction(gtfsRowObjectOrMap, tableName, key);
       }
 
-      fs.appendFile(outputFullPath, rowsBuffer.join(''), (appendingError) => {
-        if (appendingError) { throw appendingError; }
+      const formattedGtfsRowValues = getObjectValuesUsingKeyOrdering(gtfsRowObjectOrMap, actualKeys);
+      itemValues.push(formattedGtfsRowValues);
+    }
+
+    if (deepness === 2) {
+      gtfsRowObjectOrMap.forEach((gtfsRowObject, subKey) => {
+        if (gtfs._preExportItemFunction) {
+          gtfsRowObject = gtfs._preExportItemFunction(gtfsRowObject, tableName, key, subKey);
+        }
 
-        infoLog(`[${getHHmmss()}] Table has been exported: ${tableName}`);
-        callback();
+        const formattedGtfsRowValues = getObjectValuesUsingKeyOrdering(gtfsRowObject, actualKeys);
+        itemValues.push(formattedGtfsRowValues);
       });
-    });
+    }
+  });
+
+  return Papa.unparse({
+    fields: actualKeys,
+    data: itemValues,
   });
 }