Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Logistic Regression algorithm #383

Merged
merged 4 commits into from
Aug 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ public enum FunctionName {
FIT_RCF,
BATCH_RCF,
ANOMALY_LOCALIZATION,
RCF_SUMMARIZE;
RCF_SUMMARIZE,
LOGISTIC_REGRESSION;

public static FunctionName from(String value) {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,10 @@ public interface DataFrame extends Iterable<Row>, Writeable, ToXContentObject {
*/
DataFrame select(int[] columns);

/**
* Find the index of the target in columnMetas
* @param target the string value of the target
* @return column index of the target in the list of columnMetas
*/
int getColumnIndex(String target);
}
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,24 @@ public DataFrame select(int[] columns) {
return new DefaultDataFrame(newColumnMetas, rows.stream().map(row-> row.select(columns)).collect(Collectors.toList()));
}

@Override
public int getColumnIndex(String target) {
List<String> columnNames = Arrays.stream(this.columnMetas()).map(ColumnMeta::getName).collect(Collectors.toList());

int targetIndex = -1;
for (int i = 0; i < columnNames.size(); ++i) {
if (columnNames.get(i).equals(target)) {
targetIndex = i;
break;
}
}
if (targetIndex == -1) {
throw new IllegalArgumentException("No matched target when generating dataset from data frame.");
}

return targetIndex;
}

@Override
public Iterator<Row> iterator() {
return rows.iterator();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,328 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.ml.common.input.parameter.regression;

import lombok.Builder;
import lombok.Data;
import org.opensearch.common.ParseField;
import org.opensearch.common.io.stream.StreamInput;
import org.opensearch.common.io.stream.StreamOutput;
import org.opensearch.common.xcontent.NamedXContentRegistry;
import org.opensearch.common.xcontent.XContentBuilder;
import org.opensearch.common.xcontent.XContentParser;
import org.opensearch.ml.common.FunctionName;
import org.opensearch.ml.common.annotation.MLAlgoParameter;
import org.opensearch.ml.common.input.parameter.MLAlgoParams;

import java.io.IOException;
import java.util.Locale;

import static org.opensearch.common.xcontent.XContentParserUtils.ensureExpectedToken;

@Data
@MLAlgoParameter(algorithms={FunctionName.LOGISTIC_REGRESSION})
public class LogisticRegressionParams implements MLAlgoParams {

public static final String PARSE_FIELD_NAME = FunctionName.LOGISTIC_REGRESSION.name();
public static final NamedXContentRegistry.Entry XCONTENT_REGISTRY = new NamedXContentRegistry.Entry(
MLAlgoParams.class,
new ParseField(PARSE_FIELD_NAME),
it -> parse(it)
);

public static final String OBJECTIVE_FIELD = "objective";
public static final String OPTIMISER_FIELD = "optimiser";
public static final String MOMENTUM_TYPE_FIELD = "momentum_type";
public static final String LEARNING_RATE_FIELD = "learning_rate";
public static final String EPSILON_FIELD = "epsilon";
public static final String MOMENTUM_FACTOR_FIELD = "momentum_factor";
public static final String BETA1_FIELD = "beta1";
public static final String BETA2_FIELD = "beta2";
public static final String DECAY_RATE_FIELD = "decay_rate";
public static final String EPOCHS_FIELD = "epochs";
public static final String BATCH_SIZE_FIELD = "batch_size";
public static final String LOGGING_INTERVAL_FIELD = "logging_interval";
public static final String SEED_FIELD = "seed";
public static final String TARGET_FIELD = "target";

private ObjectiveType objectiveType;
private OptimizerType optimizerType;
private MomentumType momentumType;
private Double learningRate;
private Double epsilon;
private Double momentumFactor;
private Double beta1;
private Double beta2;
private Double decayRate;
private Integer epochs;
private Integer batchSize;
private Integer loggingInterval;
private Long seed;
private String target;

@Builder(toBuilder = true)
public LogisticRegressionParams(
ObjectiveType objectiveType,
OptimizerType optimizerType,
MomentumType momentumType,
Double learningRate,
Double epsilon,
Double momentumFactor,
Double beta1,
Double beta2,
Double decayRate,
Integer epochs,
Integer batchSize,
Integer loggingInterval,
Long seed,
String target
) {
this.objectiveType = objectiveType;
this.optimizerType = optimizerType;
this.momentumType = momentumType;
this.learningRate = learningRate;
this.epsilon = epsilon;
this.momentumFactor = momentumFactor;
this.beta1 = beta1;
this.beta2 = beta2;
this.decayRate = decayRate;
this.epochs = epochs;
this.batchSize = batchSize;
this.loggingInterval = loggingInterval;
this.seed = seed;
this.target = target;
}

public LogisticRegressionParams(StreamInput in) throws IOException {
if (in.readBoolean()) {
this.objectiveType = in.readEnum(ObjectiveType.class);
}
if (in.readBoolean()) {
this.optimizerType = in.readEnum(OptimizerType.class);
}
if (in.readBoolean()) {
this.momentumType = in.readEnum(MomentumType.class);
}
this.learningRate = in.readOptionalDouble();
this.epsilon = in.readOptionalDouble();
this.momentumFactor = in.readOptionalDouble();
this.beta1 = in.readOptionalDouble();
this.beta2 = in.readOptionalDouble();
this.decayRate = in.readOptionalDouble();
this.epochs = in.readOptionalInt();
this.batchSize = in.readOptionalInt();
this.loggingInterval = in.readOptionalInt();
this.seed = in.readOptionalLong();
this.target = in.readOptionalString();
}

public static MLAlgoParams parse(XContentParser parser) throws IOException {
ObjectiveType objective = null;
OptimizerType optimizerType = null;
MomentumType momentumType = null;
Double learningRate = null;
Double epsilon = null;
Double momentumFactor = null;
Double beta1 = null;
Double beta2 = null;
Double decayRate = null;
Integer epochs = null;
Integer batchSize = null;
Integer loggingInterval = null;
Long seed = null;
String target = null;

ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser);
while (parser.nextToken() != XContentParser.Token.END_OBJECT) {
String fieldName = parser.currentName();
parser.nextToken();

switch (fieldName) {
case OBJECTIVE_FIELD:
objective = ObjectiveType.valueOf(parser.text().toUpperCase(Locale.ROOT));
break;
case OPTIMISER_FIELD:
optimizerType = OptimizerType.valueOf(parser.text().toUpperCase(Locale.ROOT));
break;
case MOMENTUM_TYPE_FIELD:
momentumType = MomentumType.valueOf(parser.text().toUpperCase(Locale.ROOT));
break;
case LEARNING_RATE_FIELD:
learningRate = parser.doubleValue(false);
break;
case EPSILON_FIELD:
epsilon = parser.doubleValue(false);
break;
case MOMENTUM_FACTOR_FIELD:
momentumFactor = parser.doubleValue(false);
break;
case BETA1_FIELD:
beta1 = parser.doubleValue(false);
break;
case BETA2_FIELD:
beta2 = parser.doubleValue(false);
break;
case DECAY_RATE_FIELD:
decayRate = parser.doubleValue(false);
break;
case EPOCHS_FIELD:
epochs = parser.intValue(false);
break;
case BATCH_SIZE_FIELD:
batchSize = parser.intValue(false);
break;
case LOGGING_INTERVAL_FIELD:
loggingInterval = parser.intValue(false);
break;
case SEED_FIELD:
seed = parser.longValue(false);
break;
case TARGET_FIELD:
target = parser.text();
break;
default:
parser.skipChildren();
break;
}
}
return new LogisticRegressionParams(objective, optimizerType, momentumType, learningRate, epsilon, momentumFactor, beta1, beta2, decayRate, epochs, batchSize, loggingInterval, seed, target);
}

@Override
public void writeTo(StreamOutput out) throws IOException {
if (objectiveType != null) {
out.writeBoolean(true);
out.writeEnum(objectiveType);
} else {
out.writeBoolean(false);
}
if (optimizerType != null) {
out.writeBoolean(true);
out.writeEnum(optimizerType);
} else {
out.writeBoolean(false);
}
if (momentumType != null) {
out.writeBoolean(true);
out.writeEnum(momentumType);
} else {
out.writeBoolean(false);
}
out.writeOptionalDouble(learningRate);
out.writeOptionalDouble(epsilon);
out.writeOptionalDouble(momentumFactor);
out.writeOptionalDouble(beta1);
out.writeOptionalDouble(beta2);
out.writeOptionalDouble(decayRate);
out.writeOptionalInt(epochs);
out.writeOptionalInt(batchSize);
out.writeOptionalInt(loggingInterval);
out.writeOptionalLong(seed);
out.writeOptionalString(target);
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
if (objectiveType != null) {
builder.field(OBJECTIVE_FIELD, objectiveType);
}
if (optimizerType != null) {
builder.field(OPTIMISER_FIELD, optimizerType);
}
if (momentumType != null) {
builder.field(MOMENTUM_TYPE_FIELD, momentumType);
}
if (learningRate != null) {
builder.field(LEARNING_RATE_FIELD, learningRate);
}
if (epsilon != null) {
builder.field(EPSILON_FIELD, epsilon);
}
if (momentumFactor != null) {
builder.field(MOMENTUM_FACTOR_FIELD, momentumFactor);
}
if (beta1 != null) {
builder.field(BETA1_FIELD, beta1);
}
if (beta2 != null) {
builder.field(BETA1_FIELD, beta2);
}
if (decayRate != null) {
builder.field(DECAY_RATE_FIELD, decayRate);
}
if (epochs != null) {
builder.field(EPOCHS_FIELD, epochs);
}
if (batchSize != null) {
builder.field(BATCH_SIZE_FIELD, batchSize);
}
if (loggingInterval != null) {
builder.field(LOGGING_INTERVAL_FIELD, loggingInterval);
}
if (seed != null) {
builder.field(SEED_FIELD, seed);
}
if (target != null) {
builder.field(TARGET_FIELD, target);
}
builder.endObject();
return builder;
}

@Override
public String getWriteableName() {
return PARSE_FIELD_NAME;
}

@Override
public int getVersion() {
return 1;
}

public enum ObjectiveType {
HINGE,
LOGMULTICLASS;
public static ObjectiveType from(String value) {
try{
return ObjectiveType.valueOf(value);
} catch (Exception e) {
throw new IllegalArgumentException("Wrong objective type");
}
}
}

public enum MomentumType {
STANDARD,
NESTEROV;

public static MomentumType from(String value) {
try{
return MomentumType.valueOf(value);
} catch (Exception e) {
throw new IllegalArgumentException("Wrong momentum type");
}
}
}

public enum OptimizerType {
SIMPLE_SGD,
LINEAR_DECAY_SGD,
SQRT_DECAY_SGD,
ADA_GRAD,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Craigacp help review if all these supported for logistic regression?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are all supported.

ADA_DELTA,
ADAM,
RMS_PROP;

public static OptimizerType from(String value) {
try{
return OptimizerType.valueOf(value);
} catch (Exception e) {
throw new IllegalArgumentException("Wrong optimizer type");
}
}
}
}
1 change: 1 addition & 0 deletions ml-algorithms/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ dependencies {
implementation group: 'org.tribuo', name: 'tribuo-clustering-kmeans', version: '4.2.1'
implementation group: 'org.tribuo', name: 'tribuo-regression-sgd', version: '4.2.1'
implementation group: 'org.tribuo', name: 'tribuo-anomaly-libsvm', version: '4.2.1'
implementation group: 'org.tribuo', name: 'tribuo-classification-sgd', version: '4.2.1'
implementation group: 'commons-io', name: 'commons-io', version: '2.11.0'
implementation 'software.amazon.randomcutforest:randomcutforest-parkservices:3.0-rc3'
implementation 'software.amazon.randomcutforest:randomcutforest-core:3.0-rc3'
Expand Down
Loading