Skip to content

Commit

Permalink
ENH: Make Qurro use sparse count JSON: close #58
Browse files Browse the repository at this point in the history
There will be other ways to optimize Qurro, but I think this is
sufficient to close #58 for the time being.

Now I want to try benchmarking this, to see how much more of the
EMP we can fit into a browser :)
  • Loading branch information
fedarko committed Jul 7, 2019
1 parent cea1037 commit bc0f97d
Show file tree
Hide file tree
Showing 20 changed files with 323 additions and 125 deletions.
74 changes: 52 additions & 22 deletions docs/demos/byrd/js/display.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,32 +51,32 @@ define(["./feature_computation", "./dom_utils", "vega", "vega-embed"], function(
this.botFeatures = undefined;

// Used when looking up a feature's count.
this.feature_cts = countJSON;
this.featureCts = countJSON;
// Used when searching through features.
this.feature_ids = Object.keys(this.feature_cts);
// Since we filtered out empty features in the python side of
// things, we know that every feature should be represented in the
// count JSON's keys.
this.featureIDs = Object.keys(this.featureCts);

// Just a list of all sample IDs.
var sampleIDs = Object.keys(this.feature_cts[this.feature_ids[0]]);
this.sampleIDs = RRVDisplay.identifySampleIDs(samplePlotJSON);
// Used when letting the user know how many samples are present in
// the sample plot.
// Note that we need to use Object.keys() in order to be able to
// figure out how many entries are in the sampleIDs list; see
// https://stackoverflow.com/a/6700/10730311
this.sampleCount = sampleIDs.length;
this.sampleCount = this.sampleIDs.length;

// a mapping from "reason" (i.e. "balance", "xAxis", "color") to
// list of dropped sample IDs.
//
// "balance" maps to sampleIDs right now because all samples have a
// null balance starting off.
// "balance" maps to this.sampleIDs right now because all samples
// have a null balance starting off.
//
// NOTE: xAxis and color might already exclude some samples from
// being shown in the default categorical encoding. Their
// corresponding lists will be updated in makeSamplePlot(), before
// dom_utils.updateMainSampleShownDiv() will be called (so the
// nulls will be replaced with actual lists).
this.droppedSamples = {
balance: sampleIDs,
balance: this.sampleIDs,
xAxis: null,
color: null
};
Expand Down Expand Up @@ -208,7 +208,7 @@ define(["./feature_computation", "./dom_utils", "vega", "vega-embed"], function(
// view/select.
// TODO: make this a separate func so we can unit-test it
if (
this.feature_ids.length <=
this.featureIDs.length <=
this.rankPlotJSON.config.view.width
) {
document.getElementById("barSize").value = "fit";
Expand Down Expand Up @@ -411,7 +411,7 @@ define(["./feature_computation", "./dom_utils", "vega", "vega-embed"], function(
// Not 100% sure this is optimal.
newBarSize =
this.rankPlotJSON.config.view.width /
this.feature_ids.length;
this.featureIDs.length;
} else {
newBarSize = parseInt(newSizeType);
}
Expand Down Expand Up @@ -964,6 +964,20 @@ define(["./feature_computation", "./dom_utils", "vega", "vega-embed"], function(
}
}

static identifySampleIDs(samplePlotSpec) {
// Like identifyMetadataColumns(), but just finds sample IDs.
var sampleIDs = [];
var dataName = samplePlotSpec.data.name;
var sid;
for (var s = 0; s < samplePlotSpec.datasets[dataName].length; s++) {
sid = samplePlotSpec.datasets[dataName][s]["Sample ID"]
if (sid !== undefined) {
sampleIDs.push(sid);
}
}
return sampleIDs;
}

static identifyMetadataColumns(samplePlotSpec) {
// Given a Vega-Lite sample plot specification, find all the metadata cols.
// Just uses whatever the first available sample's keys are as a
Expand Down Expand Up @@ -995,11 +1009,33 @@ define(["./feature_computation", "./dom_utils", "vega", "vega-embed"], function(
* python side of things.)
*/
validateSampleID(sampleID) {
if (this.feature_cts[this.feature_ids[0]][sampleID] === undefined) {
if (!this.sampleIDs.includes(sampleID)) {
throw new Error("Invalid sample ID: " + sampleID);
}
}

/* Gets count data from the featureCts object. This uses a sparse
* storage method, so only samples with a nonzero count for a given
* feature are contained in that feature's entry.
*
* So, if the "entry" for a sample for a feature in featureCts is falsy
* (i.e. undefined, but could also be false, 0, "", null, or NaN --
* none of these should ever occur in the count JSON but if they do
* that should also be indicative of a zero count [1]), then we
* consider that sample's count to be 0. Otherwise, we just return the
* entry.
*
* [1] See https://developer.mozilla.org/en-US/docs/Glossary/Truthy
*/
getCount(featureID, sampleID) {
var putativeCount = this.featureCts[featureID][sampleID];
if (putativeCount) {
return putativeCount;
} else {
return 0;
}
}

/* Given a "row" of the sample plot's JSON for a sample, and given an array of
* features, return the sum of the sample's abundances for those particular features.
* TODO: add option to do log geometric means
Expand All @@ -1009,9 +1045,7 @@ define(["./feature_computation", "./dom_utils", "vega", "vega-embed"], function(
this.validateSampleID(sampleID);
var abundance = 0;
for (var t = 0; t < features.length; t++) {
abundance += this.feature_cts[features[t]["Feature ID"]][
sampleID
];
abundance += this.getCount(features[t]["Feature ID"], sampleID);
}
return abundance;
}
Expand All @@ -1025,12 +1059,8 @@ define(["./feature_computation", "./dom_utils", "vega", "vega-embed"], function(
updateBalanceSingle(sampleRow) {
var sampleID = sampleRow["Sample ID"];
this.validateSampleID(sampleID);
var topCt = this.feature_cts[this.newFeatureHigh["Feature ID"]][
sampleID
];
var botCt = this.feature_cts[this.newFeatureLow["Feature ID"]][
sampleID
];
var topCt = this.getCount(this.newFeatureHigh["Feature ID"], sampleID);
var botCt = this.getCount(this.newFeatureLow["Feature ID"], sampleID);
return feature_computation.computeBalance(topCt, botCt);
}

Expand Down
2 changes: 1 addition & 1 deletion docs/demos/byrd/main.js

Large diffs are not rendered by default.

74 changes: 52 additions & 22 deletions docs/demos/q2_moving_pictures/js/display.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,32 +51,32 @@ define(["./feature_computation", "./dom_utils", "vega", "vega-embed"], function(
this.botFeatures = undefined;

// Used when looking up a feature's count.
this.feature_cts = countJSON;
this.featureCts = countJSON;
// Used when searching through features.
this.feature_ids = Object.keys(this.feature_cts);
// Since we filtered out empty features in the python side of
// things, we know that every feature should be represented in the
// count JSON's keys.
this.featureIDs = Object.keys(this.featureCts);

// Just a list of all sample IDs.
var sampleIDs = Object.keys(this.feature_cts[this.feature_ids[0]]);
this.sampleIDs = RRVDisplay.identifySampleIDs(samplePlotJSON);
// Used when letting the user know how many samples are present in
// the sample plot.
// Note that we need to use Object.keys() in order to be able to
// figure out how many entries are in the sampleIDs list; see
// https://stackoverflow.com/a/6700/10730311
this.sampleCount = sampleIDs.length;
this.sampleCount = this.sampleIDs.length;

// a mapping from "reason" (i.e. "balance", "xAxis", "color") to
// list of dropped sample IDs.
//
// "balance" maps to sampleIDs right now because all samples have a
// null balance starting off.
// "balance" maps to this.sampleIDs right now because all samples
// have a null balance starting off.
//
// NOTE: xAxis and color might already exclude some samples from
// being shown in the default categorical encoding. Their
// corresponding lists will be updated in makeSamplePlot(), before
// dom_utils.updateMainSampleShownDiv() will be called (so the
// nulls will be replaced with actual lists).
this.droppedSamples = {
balance: sampleIDs,
balance: this.sampleIDs,
xAxis: null,
color: null
};
Expand Down Expand Up @@ -208,7 +208,7 @@ define(["./feature_computation", "./dom_utils", "vega", "vega-embed"], function(
// view/select.
// TODO: make this a separate func so we can unit-test it
if (
this.feature_ids.length <=
this.featureIDs.length <=
this.rankPlotJSON.config.view.width
) {
document.getElementById("barSize").value = "fit";
Expand Down Expand Up @@ -411,7 +411,7 @@ define(["./feature_computation", "./dom_utils", "vega", "vega-embed"], function(
// Not 100% sure this is optimal.
newBarSize =
this.rankPlotJSON.config.view.width /
this.feature_ids.length;
this.featureIDs.length;
} else {
newBarSize = parseInt(newSizeType);
}
Expand Down Expand Up @@ -964,6 +964,20 @@ define(["./feature_computation", "./dom_utils", "vega", "vega-embed"], function(
}
}

static identifySampleIDs(samplePlotSpec) {
// Like identifyMetadataColumns(), but just finds sample IDs.
var sampleIDs = [];
var dataName = samplePlotSpec.data.name;
var sid;
for (var s = 0; s < samplePlotSpec.datasets[dataName].length; s++) {
sid = samplePlotSpec.datasets[dataName][s]["Sample ID"]
if (sid !== undefined) {
sampleIDs.push(sid);
}
}
return sampleIDs;
}

static identifyMetadataColumns(samplePlotSpec) {
// Given a Vega-Lite sample plot specification, find all the metadata cols.
// Just uses whatever the first available sample's keys are as a
Expand Down Expand Up @@ -995,11 +1009,33 @@ define(["./feature_computation", "./dom_utils", "vega", "vega-embed"], function(
* python side of things.)
*/
validateSampleID(sampleID) {
if (this.feature_cts[this.feature_ids[0]][sampleID] === undefined) {
if (!this.sampleIDs.includes(sampleID)) {
throw new Error("Invalid sample ID: " + sampleID);
}
}

/* Gets count data from the featureCts object. This uses a sparse
* storage method, so only samples with a nonzero count for a given
* feature are contained in that feature's entry.
*
* So, if the "entry" for a sample for a feature in featureCts is falsy
* (i.e. undefined, but could also be false, 0, "", null, or NaN --
* none of these should ever occur in the count JSON but if they do
* that should also be indicative of a zero count [1]), then we
* consider that sample's count to be 0. Otherwise, we just return the
* entry.
*
* [1] See https://developer.mozilla.org/en-US/docs/Glossary/Truthy
*/
getCount(featureID, sampleID) {
var putativeCount = this.featureCts[featureID][sampleID];
if (putativeCount) {
return putativeCount;
} else {
return 0;
}
}

/* Given a "row" of the sample plot's JSON for a sample, and given an array of
* features, return the sum of the sample's abundances for those particular features.
* TODO: add option to do log geometric means
Expand All @@ -1009,9 +1045,7 @@ define(["./feature_computation", "./dom_utils", "vega", "vega-embed"], function(
this.validateSampleID(sampleID);
var abundance = 0;
for (var t = 0; t < features.length; t++) {
abundance += this.feature_cts[features[t]["Feature ID"]][
sampleID
];
abundance += this.getCount(features[t]["Feature ID"], sampleID);
}
return abundance;
}
Expand All @@ -1025,12 +1059,8 @@ define(["./feature_computation", "./dom_utils", "vega", "vega-embed"], function(
updateBalanceSingle(sampleRow) {
var sampleID = sampleRow["Sample ID"];
this.validateSampleID(sampleID);
var topCt = this.feature_cts[this.newFeatureHigh["Feature ID"]][
sampleID
];
var botCt = this.feature_cts[this.newFeatureLow["Feature ID"]][
sampleID
];
var topCt = this.getCount(this.newFeatureHigh["Feature ID"], sampleID);
var botCt = this.getCount(this.newFeatureLow["Feature ID"], sampleID);
return feature_computation.computeBalance(topCt, botCt);
}

Expand Down
2 changes: 1 addition & 1 deletion docs/demos/q2_moving_pictures/main.js

Large diffs are not rendered by default.

Loading

0 comments on commit bc0f97d

Please sign in to comment.