From 30c4d71f87c922acf9f55fa4af67ab515ab543b8 Mon Sep 17 00:00:00 2001 From: Timothy Brown Date: Wed, 17 Jan 2024 12:16:07 -0700 Subject: [PATCH 1/3] Refactor config classes --- .../io/onetable/client/ExternalTable.java | 67 ++++ ...HudiSourceConfig.java => SourceTable.java} | 19 +- .../java/io/onetable/client/TargetTable.java | 43 +++ .../io/onetable/spi/sync/TargetClient.java | 4 +- .../io/onetable/client/TestSourceTable.java | 58 ++++ .../io/onetable/client/TestTargetTable.java | 73 ++++ .../io/onetable/client/OneTableClient.java | 24 +- .../onetable/client/PerTableConfigImpl.java | 142 -------- .../onetable/client/SourceClientProvider.java | 14 +- .../client/TableFormatClientFactory.java | 9 +- .../io/onetable/client/TableSyncConfig.java | 30 +- .../java/io/onetable/delta/DeltaClient.java | 18 +- .../delta/DeltaSourceClientProvider.java | 13 +- .../hudi/HudiSourceClientProvider.java | 13 +- .../onetable/hudi/HudiSourceConfigImpl.java | 32 +- .../io/onetable/hudi/HudiTargetClient.java | 20 +- .../io/onetable/iceberg/IcebergClient.java | 22 +- .../onetable/iceberg/IcebergSourceClient.java | 10 +- .../iceberg/IcebergSourceClientProvider.java | 7 +- .../java/io/onetable/ITOneTableClient.java | 311 ++++++++++-------- .../onetable/client/TestOneTableClient.java | 100 ++++-- .../onetable/client/TestPerTableConfig.java | 118 ------- .../client/TestTableFormatClientFactory.java | 35 +- .../onetable/delta/ITDeltaSourceClient.java | 124 +++---- .../java/io/onetable/delta/TestDeltaSync.java | 12 +- .../io/onetable/hudi/ITHudiSourceClient.java | 4 +- .../io/onetable/hudi/ITHudiTargetClient.java | 12 +- .../iceberg/ITIcebergSourceClient.java | 74 +++-- .../iceberg/TestIcebergSourceClient.java | 29 +- .../io/onetable/iceberg/TestIcebergSync.java | 13 +- .../java/io/onetable/loadtest/LoadTest.java | 72 ++-- .../onetable/hudi/sync/OneTableSyncTool.java | 51 ++- .../java/io/onetable/utilities/RunSync.java | 58 +++- 33 files changed, 872 insertions(+), 759 deletions(-) create mode 100644 api/src/main/java/io/onetable/client/ExternalTable.java rename api/src/main/java/io/onetable/client/{HudiSourceConfig.java => SourceTable.java} (66%) create mode 100644 api/src/main/java/io/onetable/client/TargetTable.java create mode 100644 api/src/test/java/io/onetable/client/TestSourceTable.java create mode 100644 api/src/test/java/io/onetable/client/TestTargetTable.java delete mode 100644 core/src/main/java/io/onetable/client/PerTableConfigImpl.java rename api/src/main/java/io/onetable/client/PerTableConfig.java => core/src/main/java/io/onetable/client/TableSyncConfig.java (72%) delete mode 100644 core/src/test/java/io/onetable/client/TestPerTableConfig.java diff --git a/api/src/main/java/io/onetable/client/ExternalTable.java b/api/src/main/java/io/onetable/client/ExternalTable.java new file mode 100644 index 000000000..beedee58a --- /dev/null +++ b/api/src/main/java/io/onetable/client/ExternalTable.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.onetable.client; + +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NonNull; + +import org.apache.hadoop.fs.Path; + +import com.google.common.base.Preconditions; + +@Getter +@EqualsAndHashCode +public class ExternalTable { + @NonNull String name; + @NonNull String formatName; + @NonNull String basePath; + @NonNull String dataPath; + String[] namespace; + CatalogConfig catalogConfig; + + ExternalTable( + @NonNull String name, + @NonNull String formatName, + @NonNull String basePath, + String dataPath, + String[] namespace, + CatalogConfig catalogConfig) { + this.name = name; + this.formatName = formatName; + this.basePath = sanitizeBasePath(basePath); + this.dataPath = dataPath == null ? this.basePath : sanitizeBasePath(dataPath); + this.namespace = namespace; + this.catalogConfig = catalogConfig; + } + + private String sanitizeBasePath(String tableBasePath) { + Path path = new Path(tableBasePath); + Preconditions.checkArgument(path.isAbsolute(), "Table base path must be absolute"); + if (path.isAbsoluteAndSchemeAuthorityNull()) { + // assume this is local file system and append scheme + return "file://" + path; + } else if (path.toUri().getScheme().equals("file")) { + // add extra slashes + return "file://" + path.toUri().getPath(); + } else { + return path.toString(); + } + } +} diff --git a/api/src/main/java/io/onetable/client/HudiSourceConfig.java b/api/src/main/java/io/onetable/client/SourceTable.java similarity index 66% rename from api/src/main/java/io/onetable/client/HudiSourceConfig.java rename to api/src/main/java/io/onetable/client/SourceTable.java index 78d082314..61d7c45b1 100644 --- a/api/src/main/java/io/onetable/client/HudiSourceConfig.java +++ b/api/src/main/java/io/onetable/client/SourceTable.java @@ -18,10 +18,19 @@ package io.onetable.client; -import io.onetable.spi.extractor.SourcePartitionSpecExtractor; +import lombok.Builder; +import lombok.EqualsAndHashCode; -public interface HudiSourceConfig { - public String getPartitionSpecExtractorClass(); - - SourcePartitionSpecExtractor loadSourcePartitionSpecExtractor(); +@EqualsAndHashCode(callSuper = true) +public class SourceTable extends ExternalTable { + @Builder(toBuilder = true) + public SourceTable( + String name, + String formatName, + String basePath, + String dataPath, + String[] namespace, + CatalogConfig catalogConfig) { + super(name, formatName, basePath, dataPath, namespace, catalogConfig); + } } diff --git a/api/src/main/java/io/onetable/client/TargetTable.java b/api/src/main/java/io/onetable/client/TargetTable.java new file mode 100644 index 000000000..526451872 --- /dev/null +++ b/api/src/main/java/io/onetable/client/TargetTable.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.onetable.client; + +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; + +@Getter +@EqualsAndHashCode(callSuper = true) +public class TargetTable extends ExternalTable { + int metadataRetentionInHours; + + @Builder(toBuilder = true) + public TargetTable( + String name, + String formatName, + String basePath, + String dataPath, + String[] namespace, + CatalogConfig catalogConfig, + Integer metadataRetentionInHours) { + super(name, formatName, basePath, dataPath, namespace, catalogConfig); + this.metadataRetentionInHours = + metadataRetentionInHours == null ? 24 * 7 : metadataRetentionInHours; + } +} diff --git a/api/src/main/java/io/onetable/spi/sync/TargetClient.java b/api/src/main/java/io/onetable/spi/sync/TargetClient.java index bd9b4962b..3b9e0beb6 100644 --- a/api/src/main/java/io/onetable/spi/sync/TargetClient.java +++ b/api/src/main/java/io/onetable/spi/sync/TargetClient.java @@ -23,7 +23,7 @@ import org.apache.hadoop.conf.Configuration; -import io.onetable.client.PerTableConfig; +import io.onetable.client.TargetTable; import io.onetable.model.OneTable; import io.onetable.model.OneTableMetadata; import io.onetable.model.schema.OnePartitionField; @@ -89,5 +89,5 @@ public interface TargetClient { String getTableFormat(); /** Initializes the client with provided configuration */ - void init(PerTableConfig perTableConfig, Configuration configuration); + void init(TargetTable targetTable, Configuration configuration); } diff --git a/api/src/test/java/io/onetable/client/TestSourceTable.java b/api/src/test/java/io/onetable/client/TestSourceTable.java new file mode 100644 index 000000000..fb1daf7f7 --- /dev/null +++ b/api/src/test/java/io/onetable/client/TestSourceTable.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.onetable.client; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import org.junit.jupiter.api.Test; + +public class TestSourceTable { + @Test + void sanitizePath() { + SourceTable tooManySlashes = + SourceTable.builder().basePath("s3://bucket//path").name("name").formatName("hudi").build(); + assertEquals("s3://bucket/path", tooManySlashes.getBasePath()); + + SourceTable localFilePath = + SourceTable.builder().basePath("/local/data//path").name("name").formatName("hudi").build(); + assertEquals("file:///local/data/path", localFilePath.getBasePath()); + + SourceTable properLocalFilePath = + SourceTable.builder() + .basePath("file:///local/data//path") + .name("name") + .formatName("hudi") + .build(); + assertEquals("file:///local/data/path", properLocalFilePath.getBasePath()); + } + + @Test + void errorIfRequiredArgsNotSet() { + assertThrows(NullPointerException.class, () -> SourceTable.builder().name("name").build()); + + assertThrows( + NullPointerException.class, + () -> SourceTable.builder().basePath("file://bucket/path").build()); + + assertThrows( + NullPointerException.class, + () -> SourceTable.builder().basePath("file://bucket/path").name("name").build()); + } +} diff --git a/api/src/test/java/io/onetable/client/TestTargetTable.java b/api/src/test/java/io/onetable/client/TestTargetTable.java new file mode 100644 index 000000000..a39157684 --- /dev/null +++ b/api/src/test/java/io/onetable/client/TestTargetTable.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.onetable.client; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import org.junit.jupiter.api.Test; + +public class TestTargetTable { + @Test + void sanitizePath() { + TargetTable tooManySlashes = + TargetTable.builder().basePath("s3://bucket//path").name("name").formatName("hudi").build(); + assertEquals("s3://bucket/path", tooManySlashes.getBasePath()); + + TargetTable localFilePath = + TargetTable.builder().basePath("/local/data//path").name("name").formatName("hudi").build(); + assertEquals("file:///local/data/path", localFilePath.getBasePath()); + + TargetTable properLocalFilePath = + TargetTable.builder() + .basePath("file:///local/data//path") + .name("name") + .formatName("hudi") + .build(); + assertEquals("file:///local/data/path", properLocalFilePath.getBasePath()); + } + + @Test + void defaultValueSet() { + TargetTable table = + TargetTable.builder() + .basePath("file://bucket/path") + .name("name") + .formatName("hudi") + .build(); + + assertEquals(24 * 7, table.getMetadataRetentionInHours()); + assertNull(table.getNamespace()); + assertNull(table.getCatalogConfig()); + } + + @Test + void errorIfRequiredArgsNotSet() { + assertThrows(NullPointerException.class, () -> SourceTable.builder().name("name").build()); + + assertThrows( + NullPointerException.class, + () -> SourceTable.builder().basePath("file://bucket/path").build()); + + assertThrows( + NullPointerException.class, + () -> SourceTable.builder().basePath("file://bucket/path").name("name").build()); + } +} diff --git a/core/src/main/java/io/onetable/client/OneTableClient.java b/core/src/main/java/io/onetable/client/OneTableClient.java index 0881546f8..b0f8a0681 100644 --- a/core/src/main/java/io/onetable/client/OneTableClient.java +++ b/core/src/main/java/io/onetable/client/OneTableClient.java @@ -26,7 +26,6 @@ import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -52,7 +51,7 @@ /** * Responsible for completing the entire lifecycle of the sync process given {@link - * PerTableConfigImpl}. This is done in three steps, + * TableSyncConfig}. This is done in three steps, * *