Skip to content

Commit

Permalink
Now portcullis filter accepts custom user-defined folders for the ini…
Browse files Browse the repository at this point in the history
…tial training rules (see #41). Corrected deprecation warning in rule_filter.py

Also Resetting doc prefix

Corrected a bug that caused saltuarily segmentation faults when the size of the positive or the negative set were too small (smote.cc)
  • Loading branch information
maplesondctx committed May 21, 2019
1 parent 9afdf22 commit 027679a
Show file tree
Hide file tree
Showing 6 changed files with 388 additions and 361 deletions.
2 changes: 1 addition & 1 deletion doc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ MKDIR_P = mkdir -p
INSTALL = /usr/bin/install -c -m 644

# Set autoconf variables
prefix = /home/luca/workspace/portcullis/compiled
prefix = /usr/local
PACKAGE_TARNAME = portcullis
top_srcdir = ..
srcdir = .
Expand Down
5 changes: 3 additions & 2 deletions lib/include/portcullis/ml/smote.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class Smote {
bool verbose;

double* data;
size_t nelements;
size_t rows;
size_t cols;

Expand All @@ -52,7 +53,7 @@ class Smote {

public:

Smote(uint16_t defaultK, uint16_t _smoteness, uint16_t _threads, double* _data, size_t _rows, size_t _cols);
Smote(uint16_t defaultK, uint16_t _smoteness, uint16_t _threads, double* _data, size_t _nelements, size_t _rows, size_t _cols);

~Smote() {
delete[] synthetic;
Expand Down Expand Up @@ -99,4 +100,4 @@ class Smote {
void print(ostream& out) const;
};
}
}
}
2 changes: 1 addition & 1 deletion lib/src/model_features.cc
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ portcullis::ml::ForestPtr portcullis::ml::ModelFeatures::trainInstance(const Jun
}
//cout << endl;
}
Smote smote(5, N, threads, nm, negData->getNumRows(), negData->getNumCols() - 1);
Smote smote(5, N, threads, nm, nelements, negData->getNumRows(), negData->getNumCols() - 1);
smote.setVerbose(verbose);
smote.execute();
smote_rows = smote.getNbSynthRows();
Expand Down
18 changes: 13 additions & 5 deletions lib/src/smote.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@ using portcullis::ml::KNN;

#include <portcullis/ml/smote.hpp>

portcullis::ml::Smote::Smote(uint16_t defaultK, uint16_t _smoteness, uint16_t _threads, double* _data, size_t _rows, size_t _cols) {
portcullis::ml::Smote::Smote(uint16_t defaultK, uint16_t _smoteness, uint16_t _threads, double* _data, size_t _nelements, size_t _rows, size_t _cols) {
data = _data;
rows = _rows;
cols = _cols;
nelements = _nelements;
if (_rows < defaultK && _rows < 100)
k = _rows;
else
Expand Down Expand Up @@ -58,9 +59,16 @@ void portcullis::ml::Smote::execute() {
const vector<uint32_t> nns = knn.getNNs(i);
uint32_t nn = nns[igen(rng)]; // Nearest neighbour row index
for (size_t j = 0; j < cols; j++) {
double dif = data[(nn * cols) + j] - data[(i * cols) + j];
double gap = dgen(rng);
synthetic[(new_index * cols) + j] = data[(i * cols) + j] + gap * dif;
double dif;
double gap;
if ((nn * cols) + j <= nelements) {
dif = data[(nn * cols) + j] - data[(i * cols) + j]; // This is causing errors in some weird cases.
} else{
dif = 0;
}
gap = dgen(rng);
synthetic[(new_index * cols) + j] = data[(i * cols) + j] + gap * dif;

}
new_index++;
N--;
Expand All @@ -84,4 +92,4 @@ void portcullis::ml::Smote::print(ostream& out) const {
out << endl;
}
out << endl;
}
}
Loading

0 comments on commit 027679a

Please sign in to comment.