Skip to content

Commit

Permalink
Feat/replace csv parser by papa parse (#14)
Browse files Browse the repository at this point in the history
Feat/replace csv parser by papa parse
  • Loading branch information
lvancraen committed May 15, 2019
2 parents a8ccb7d + 0b69d5b commit f576813
Show file tree
Hide file tree
Showing 11 changed files with 218 additions and 324 deletions.
1 change: 1 addition & 0 deletions .node-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
8.11.3
18 changes: 6 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,27 +35,22 @@ gtfs.forEachStopTime((stopTime) => {
gtfs.forEachFrequency((frequency) => {
const fromStop = gtfs.getStopWithId(frequency.from_stop_id);
const toStop = gtfs.getStopWithId(frequency.to_stop_id);

if (!fromStop || !toStop) {
gtfs.removeFrequency(frequency);
}
});

gtfs.exportAtPath('somePathWhereYouWantToExportTheGtfs', (error) => {
if (error) { throw error };

// Done
});
```

## Keep in mind: synchronous loading and indexes

This project is Transit's old GTFS implementation, using callbacks and plain objects. The main advantage is that
is it light and simple to use, but it has some drawbacks. The two biggest ones are the indexes and the synchronous loading of tables.

### Indexes

The tables are loaded and saved as Maps, to allow o(1) access using the ids. The routes are therefore indexed by the
The tables are loaded and saved as Maps, to allow o(1) access using the ids. The routes are therefore indexed by the
`route_id` value, which is therefore saved in `route.route_id` but also as an index.

**This indexing is not automatically kept up to date.**
Expand All @@ -75,7 +70,7 @@ gtfs.addRoute(route);
The goal of this implementation was to avoid loading upfront all the tables. Therefore, they are loaded only when
required. This makes the code faster to run (if some tables are not required at all).

The drawback, is that any function could trigger the loading of a table. Since we do not want to turn every function into an async one, the loading of the tables is done synchronously.
The drawback, is that any function could trigger the loading of a table. Since we do not want to turn every function into an async one, the loading of the tables is done synchronously.

## Naming

Expand All @@ -89,8 +84,7 @@ but for the `shapes.txt`, since one item of the table is not a "shape" per-se, b

## Support and contact

Please post any issues you find on [the repo of the project](https://github.com/TransitApp/gtfsNodeLib/issues). And
do not hesitate to contact [Transit App](https://github.com/TransitApp) or [Leo Frachet](https://github.com/LeoFrachet) directly if you have any questions.

Please post any issues you find on [the repo of the project](https://github.com/TransitApp/gtfsNodeLib/issues). And
do not hesitate to contact [Transit App](https://github.com/TransitApp) directly if you have any questions.


119 changes: 0 additions & 119 deletions helpers/csv.js

This file was deleted.

136 changes: 72 additions & 64 deletions helpers/export.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,12 @@

/* eslint-disable no-underscore-dangle */

const acomb = require('acomb');
const async = require('async');
const infoLog = require('debug')('gtfsNodeLib:i');
const warningLog = require('debug')('gtfsNodeLib:w');
const errorLog = require('debug')('gtfsNodeLib:e');
const fs = require('fs-extra');

const { fromObjectToCsvString } = require('./csv');
const Papa = require('papaparse');

/**
* Private functions
Expand Down Expand Up @@ -66,78 +64,88 @@ function copyUntouchedTable(inputPath, outputPath, tableName, callback) {
}

function exportTable(tableName, gtfs, outputPath, callback) {
const keys = gtfs.getActualKeysForTable(tableName);
const csv = processGtfsTable(tableName, gtfs);

const outputFullPath = `${outputPath + tableName}.txt`;
const firstRow = `${keys.join(',')}\n`;

fs.writeFile(outputFullPath, firstRow, (err) => {
if (err) { throw err; }
/* About acomb.ensureAsync:
If the function async.eachSeries run without doing anything, just calling the callback (which
happens when there is a lot of empty object), it crashes. It is a known bug of async.
The acomb.ensureAsync fonction prevent that. It should be removed when the async module
will be fixed.
2015-03-10
*/
const indexKeys = gtfs._schema.indexKeysByTableName[tableName];
const deepness = gtfs._schema.deepnessByTableName[tableName];

if (indexKeys.singleton) {
let item = gtfs.getIndexedTable(tableName);
if (item) {
if (gtfs._preExportItemFunction) {
item = gtfs._preExportItemFunction(item, tableName);
}
const row = fromObjectToCsvString(item, keys);
fs.appendFile(outputFullPath, row, callback);
} else {
callback();
}
return;
fs.writeFile(outputFullPath, csv, (error) => {
if (error) {
throw error;
}

let rowsBuffer = [];
infoLog(`[${getHHmmss()}] Table has been exported: ${tableName}`);
callback();
});
}

async.eachSeries(gtfs.getIndexedTable(tableName), acomb.ensureAsync(([key, object], subDone) => {
if (deepness === 0 || deepness === 1) {
if (gtfs._preExportItemFunction) {
object = gtfs._preExportItemFunction(object, tableName, key);
}
rowsBuffer.push(fromObjectToCsvString(object, keys));
} else if (deepness === 2) {
object.forEach((subObject, subKey) => {
if (gtfs._preExportItemFunction) {
subObject = gtfs._preExportItemFunction(subObject, tableName, key, subKey);
}
rowsBuffer.push(fromObjectToCsvString(subObject, keys));
});
}
function getObjectValuesUsingKeyOrdering(object, keys) {
return keys.map((key) => {
let value = object[key];

if (rowsBuffer.length < 100) {
subDone();
return;
}
if (value === undefined || value === null) {
return '';
}

fs.appendFile(outputFullPath, rowsBuffer.join(''), (appendingError) => {
if (appendingError) { throw appendingError; }
const type = typeof value;
if (type === 'object') {
value = JSON.stringify(value);
} else if (type !== 'string') {
value = String(value);
}

rowsBuffer = [];
subDone();
});
}), () => {
if (rowsBuffer.length === 0) {
infoLog(`[${getHHmmss()}] Table has been exported: ${tableName}`);
callback();
return;
return value;
});
}

function processGtfsTable(tableName, gtfs) {
let itemMap = gtfs.getIndexedTable(tableName);
if (!itemMap) {
return undefined;
}

const actualKeys = gtfs.getActualKeysForTable(tableName);
const indexKeys = gtfs._schema.indexKeysByTableName[tableName];
const deepness = gtfs._schema.deepnessByTableName[tableName];
const itemValues = [];

if (indexKeys.singleton) {
if (gtfs._preExportItemFunction) {
itemMap = gtfs._preExportItemFunction(itemMap, tableName);
}

const formattedGtfsRowValues = getObjectValuesUsingKeyOrdering(itemMap, actualKeys);
itemValues.push(formattedGtfsRowValues);

return Papa.unparse({
fields: actualKeys,
data: itemValues,
});
}

itemMap.forEach((gtfsRowObjectOrMap, key) => {
if (deepness === 0 || deepness === 1) {
if (gtfs._preExportItemFunction) {
gtfsRowObjectOrMap = gtfs._preExportItemFunction(gtfsRowObjectOrMap, tableName, key);
}

fs.appendFile(outputFullPath, rowsBuffer.join(''), (appendingError) => {
if (appendingError) { throw appendingError; }
const formattedGtfsRowValues = getObjectValuesUsingKeyOrdering(gtfsRowObjectOrMap, actualKeys);
itemValues.push(formattedGtfsRowValues);
}

if (deepness === 2) {
gtfsRowObjectOrMap.forEach((gtfsRowObject, subKey) => {
if (gtfs._preExportItemFunction) {
gtfsRowObject = gtfs._preExportItemFunction(gtfsRowObject, tableName, key, subKey);
}

infoLog(`[${getHHmmss()}] Table has been exported: ${tableName}`);
callback();
const formattedGtfsRowValues = getObjectValuesUsingKeyOrdering(gtfsRowObject, actualKeys);
itemValues.push(formattedGtfsRowValues);
});
});
}
});

return Papa.unparse({
fields: actualKeys,
data: itemValues,
});
}

Expand Down
Loading

0 comments on commit f576813

Please sign in to comment.