From 8e15482295c1324eefea020faeb11e4c686357c6 Mon Sep 17 00:00:00 2001 From: bering <70102274+lawofcycles@users.noreply.github.com> Date: Thu, 28 Dec 2023 05:23:09 +0900 Subject: [PATCH] feat(glue-alpha): add `cfn-glue-table-tableinput-parameters` to Glue table construct (#27643) Add [cfn-glue-table-tableinput-parameters](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-glue-table-tableinput.html#cfn-glue-table-tableinput-parameters) to Glue Table construct as optional props User can specify additional table parameter when creating Glue Table. Any key/value can be set depending on each user's requirement like table's additional metadata or statistics. Some parameter can be used when AWS services / 3rd party tools read table like `skip.header.line.count`. Closes https://github.com/aws/aws-cdk/issues/14159. --- All Submissions: - [x] Have you followed the guidelines in our [Contributing guide?](https://github.com/aws/aws-cdk/blob/main/CONTRIBUTING.md) Adding new Unconventional Dependencies: - [ ] This PR adds new unconventional dependencies following the process described [here](https://github.com/aws/aws-cdk/blob/main/CONTRIBUTING.md/#adding-new-unconventional-dependencies) New Features - [x] Have you added the new feature to an [integration test](https://github.com/aws/aws-cdk/blob/main/INTEGRATION_TESTS.md)? - [x] Did you use yarn integ to deploy the infrastructure and generate the snapshot (i.e. yarn integ without --dry-run)? --- By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license --------- Co-authored-by: Vinayak Kukreja <78971045+vinayak-kukreja@users.noreply.github.com> Co-authored-by: Sumu Pitchayan <35242245+sumupitchayan@users.noreply.github.com> --- packages/@aws-cdk/aws-glue-alpha/README.md | 18 ++++ .../aws-glue-alpha/lib/external-table.ts | 1 + .../@aws-cdk/aws-glue-alpha/lib/s3-table.ts | 1 + .../@aws-cdk/aws-glue-alpha/lib/table-base.ts | 17 ++++ .../test/external-table.test.ts | 41 +++++++++ .../aws-cdk-glue.assets.json | 4 +- .../aws-cdk-glue.template.json | 66 +++++++++++++++ .../integ.table.js.snapshot/manifest.json | 28 ++----- .../test/integ.table.js.snapshot/tree.json | 84 +++++++++++++++++++ .../aws-glue-alpha/test/integ.table.ts | 12 +++ .../aws-glue-alpha/test/s3-table.test.ts | 30 +++++++ .../aws-glue-alpha/test/table-base.test.ts | 28 +++++++ 12 files changed, 309 insertions(+), 21 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/README.md b/packages/@aws-cdk/aws-glue-alpha/README.md index cbbea0b1dbf84..7da9ff427ecec 100644 --- a/packages/@aws-cdk/aws-glue-alpha/README.md +++ b/packages/@aws-cdk/aws-glue-alpha/README.md @@ -263,6 +263,24 @@ new glue.S3Table(this, 'MyTable', { }); ``` +Glue tables can also be configured to contain user-defined table properties through the [`parameters`](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-glue-table-tableinput.html#cfn-glue-table-tableinput-parameters) property: + +```ts +declare const myDatabase: glue.Database; +new glue.S3Table(this, 'MyTable', { + parameters: { + key1: 'val1', + key2: 'val2', + }, + database: myDatabase, + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, +}); +``` + ### Partition Keys To improve query performance, a table can specify `partitionKeys` on which data is stored and queried separately. For example, you might partition a table by `year` and `month` to optimize queries based on a time window: diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/external-table.ts b/packages/@aws-cdk/aws-glue-alpha/lib/external-table.ts index eacfcdb99b38e..38bb5b28e5919 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/external-table.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/external-table.ts @@ -69,6 +69,7 @@ export class ExternalTable extends TableBase { 'has_encrypted_data': true, 'partition_filtering.enabled': props.enablePartitionFiltering, 'connectionName': props.connection.connectionName, + ...props.parameters, }, storageDescriptor: { location: props.externalDataLocation, diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/s3-table.ts b/packages/@aws-cdk/aws-glue-alpha/lib/s3-table.ts index 56c81e4e473c4..49d56abdd5968 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/s3-table.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/s3-table.ts @@ -141,6 +141,7 @@ export class S3Table extends TableBase { 'classification': props.dataFormat.classificationString?.value, 'has_encrypted_data': true, 'partition_filtering.enabled': props.enablePartitionFiltering, + ...this.parameters, }, storageDescriptor: { location: `s3://${this.bucket.bucketName}/${this.s3Prefix}`, diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/table-base.ts b/packages/@aws-cdk/aws-glue-alpha/lib/table-base.ts index e77875c6c75e3..b313ae72885c2 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/table-base.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/table-base.ts @@ -147,6 +147,16 @@ export interface TableBaseProps { * @default - The parameter is not defined */ readonly storageParameters?: StorageParameter[]; + + /** + * The key/value pairs define properties associated with the table. + * The key/value pairs that are allowed to be submitted are not limited, however their functionality is not guaranteed. + * + * @see https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-glue-table-tableinput.html#cfn-glue-table-tableinput-parameters + * + * @default - The parameter is not defined + */ + readonly parameters?: { [key: string]: string } } /** @@ -214,6 +224,12 @@ export abstract class TableBase extends Resource implements ITable { */ public readonly storageParameters?: StorageParameter[]; + /** + * The tables' properties associated with the table. + * @see https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-glue-table-tableinput.html#cfn-glue-table-tableinput-parameters + */ + protected readonly parameters: { [key: string]: string } + /** * Partition indexes must be created one at a time. To avoid * race conditions, we store the resource and add dependencies @@ -236,6 +252,7 @@ export abstract class TableBase extends Resource implements ITable { this.columns = props.columns; this.partitionKeys = props.partitionKeys; this.storageParameters = props.storageParameters; + this.parameters = props.parameters ?? {}; this.compressed = props.compressed ?? false; } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/external-table.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/external-table.test.ts index 012722e050ebc..68aac264d2d35 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/external-table.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/external-table.test.ts @@ -1066,6 +1066,47 @@ test('can associate an external location with the glue table', () => { }); }); +test('can specify table parameter', () => { + const app = new cdk.App(); + const stack = new cdk.Stack(app, 'Stack'); + const database = new glue.Database(stack, 'Database'); + const connection = new glue.Connection(stack, 'Connection', { + connectionName: 'my_connection', + type: glue.ConnectionType.JDBC, + properties: { + JDBC_CONNECTION_URL: 'jdbc:server://server:443/connection', + USERNAME: 'username', + PASSWORD: 'password', + }, + }); + new glue.ExternalTable(stack, 'Table', { + database, + tableName: 'my_table', + connection, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, + externalDataLocation, + parameters: { + key1: 'val1', + key2: 'val2', + }, + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Table', { + TableInput: { + Parameters: { + key1: 'val1', + key2: 'val2', + classification: 'json', + has_encrypted_data: true, + }, + }, + }); +}); + function createTable(props: Pick>): void { const stack = new cdk.Stack(); const connection = new glue.Connection(stack, 'Connection', { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/aws-cdk-glue.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/aws-cdk-glue.assets.json index e08d0476521bf..99d84e8b93cc1 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/aws-cdk-glue.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/aws-cdk-glue.assets.json @@ -1,7 +1,7 @@ { "version": "34.0.0", "files": { - "7a0cca4ed21fb1c6d9b050e5fd7c8d857b13af8ef7b8cead40cd08d2e25fc892": { + "b91bf4cf21d2d51d240c781968ceabd5c521d572397afa2f3e8ed032b9dab278": { "source": { "path": "aws-cdk-glue.template.json", "packaging": "file" @@ -9,7 +9,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "7a0cca4ed21fb1c6d9b050e5fd7c8d857b13af8ef7b8cead40cd08d2e25fc892.json", + "objectKey": "b91bf4cf21d2d51d240c781968ceabd5c521d572397afa2f3e8ed032b9dab278.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/aws-cdk-glue.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/aws-cdk-glue.template.json index 75020f0d007ad..743215cd09bff 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/aws-cdk-glue.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/aws-cdk-glue.template.json @@ -623,6 +623,72 @@ } } }, + "MyTableWithParametersTable39568AB8": { + "Type": "AWS::Glue::Table", + "Properties": { + "CatalogId": { + "Ref": "AWS::AccountId" + }, + "DatabaseName": { + "Ref": "MyDatabase1E2517DB" + }, + "TableInput": { + "Description": "table_with_parameters generated by CDK", + "Name": "table_with_parameters", + "Parameters": { + "classification": "json", + "has_encrypted_data": true, + "key1": "val1", + "key2": "val2" + }, + "StorageDescriptor": { + "Columns": [ + { + "Name": "col1", + "Type": "string" + }, + { + "Comment": "col2 comment", + "Name": "col2", + "Type": "string" + }, + { + "Name": "col3", + "Type": "array" + }, + { + "Name": "col4", + "Type": "map" + }, + { + "Name": "col5", + "Type": "struct" + } + ], + "Compressed": false, + "InputFormat": "org.apache.hadoop.mapred.TextInputFormat", + "Location": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Ref": "DataBucketE3889A50" + }, + "/" + ] + ] + }, + "OutputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "SerdeInfo": { + "SerializationLibrary": "org.openx.data.jsonserde.JsonSerDe" + }, + "StoredAsSubDirectories": false + }, + "TableType": "EXTERNAL_TABLE" + } + } + }, "MyDeprecatedTableAA0364FD": { "Type": "AWS::Glue::Table", "Properties": { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/manifest.json index d511e7b0795fb..dee2a80c834e3 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/manifest.json @@ -14,10 +14,11 @@ "environment": "aws://unknown-account/unknown-region", "properties": { "templateFile": "aws-cdk-glue.template.json", + "terminationProtection": false, "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/7a0cca4ed21fb1c6d9b050e5fd7c8d857b13af8ef7b8cead40cd08d2e25fc892.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/b91bf4cf21d2d51d240c781968ceabd5c521d572397afa2f3e8ed032b9dab278.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ @@ -105,6 +106,12 @@ "data": "MyTableWithStorageDescriptorParametersTable1A347345" } ], + "/aws-cdk-glue/MyTableWithParameters/Table": [ + { + "type": "aws:cdk:logicalId", + "data": "MyTableWithParametersTable39568AB8" + } + ], "/aws-cdk-glue/MyDeprecatedTable/Table": [ { "type": "aws:cdk:logicalId", @@ -146,24 +153,6 @@ "type": "aws:cdk:logicalId", "data": "CheckBootstrapVersion" } - ], - "MyConnection5621880D": [ - { - "type": "aws:cdk:logicalId", - "data": "MyConnection5621880D", - "trace": [ - "!!DESTRUCTIVE_CHANGES: WILL_DESTROY" - ] - } - ], - "MyTableWithCustomLocationTable43A19D42": [ - { - "type": "aws:cdk:logicalId", - "data": "MyTableWithCustomLocationTable43A19D42", - "trace": [ - "!!DESTRUCTIVE_CHANGES: WILL_DESTROY" - ] - } ] }, "displayName": "aws-cdk-glue" @@ -181,6 +170,7 @@ "environment": "aws://unknown-account/unknown-region", "properties": { "templateFile": "awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.template.json", + "terminationProtection": false, "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/tree.json index ca372333c5ca8..7d8e102d6d975 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/tree.json @@ -842,6 +842,90 @@ "version": "0.0.0" } }, + "MyTableWithParameters": { + "id": "MyTableWithParameters", + "path": "aws-cdk-glue/MyTableWithParameters", + "children": { + "Table": { + "id": "Table", + "path": "aws-cdk-glue/MyTableWithParameters/Table", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Table", + "aws:cdk:cloudformation:props": { + "catalogId": { + "Ref": "AWS::AccountId" + }, + "databaseName": { + "Ref": "MyDatabase1E2517DB" + }, + "tableInput": { + "name": "table_with_parameters", + "description": "table_with_parameters generated by CDK", + "parameters": { + "classification": "json", + "has_encrypted_data": true, + "key1": "val1", + "key2": "val2" + }, + "storageDescriptor": { + "location": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Ref": "DataBucketE3889A50" + }, + "/" + ] + ] + }, + "compressed": false, + "storedAsSubDirectories": false, + "columns": [ + { + "name": "col1", + "type": "string" + }, + { + "name": "col2", + "type": "string", + "comment": "col2 comment" + }, + { + "name": "col3", + "type": "array" + }, + { + "name": "col4", + "type": "map" + }, + { + "name": "col5", + "type": "struct" + } + ], + "inputFormat": "org.apache.hadoop.mapred.TextInputFormat", + "outputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "serdeInfo": { + "serializationLibrary": "org.openx.data.jsonserde.JsonSerDe" + } + }, + "tableType": "EXTERNAL_TABLE" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.aws_glue.CfnTable", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.S3Table", + "version": "0.0.0" + } + }, "MyDeprecatedTable": { "id": "MyDeprecatedTable", "path": "aws-cdk-glue/MyDeprecatedTable", diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.ts index 7c175b5bfa3bc..45d2ccb0110a1 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.ts @@ -124,6 +124,18 @@ new glue.S3Table(stack, 'MyTableWithStorageDescriptorParameters', { ], }); +new glue.S3Table(stack, 'MyTableWithParameters', { + database, + bucket, + tableName: 'table_with_parameters', + columns, + dataFormat: glue.DataFormat.JSON, + parameters: { + key1: 'val1', + key2: 'val2', + }, +}); + new glue.Table(stack, 'MyDeprecatedTable', { database, bucket, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/s3-table.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/s3-table.test.ts index c5e498ad61c41..dd228f1c8336a 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/s3-table.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/s3-table.test.ts @@ -1095,6 +1095,36 @@ describe('validate', () => { }); }); +test('can specify table parameter', () => { + const app = new cdk.App(); + const stack = new cdk.Stack(app, 'Stack'); + const database = new glue.Database(stack, 'Database'); + const dataFormat = glue.DataFormat.JSON; + new glue.S3Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + dataFormat, + parameters: { + key1: 'val1', + key2: 'val2', + }, + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Table', { + TableInput: { + Parameters: { + key1: 'val1', + key2: 'val2', + classification: 'json', + has_encrypted_data: true, + }, + }, + }); +}); + function createTable(props: Pick>): void { const stack = new cdk.Stack(); new glue.S3Table(stack, 'table', { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/table-base.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/table-base.test.ts index d39345c288f24..abc837d19f225 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/table-base.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/table-base.test.ts @@ -674,6 +674,34 @@ test('data format without classification string', () => { }); }); +test('can specify table parameter', () => { + const app = new cdk.App(); + const stack = new cdk.Stack(app, 'Stack'); + const database = new glue.Database(stack, 'Database'); + const dataFormat = glue.DataFormat.JSON; + new glue.S3Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + dataFormat, + parameters: { + key1: 'val1', + key2: 'val2', + }, + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Table', { + TableInput: { + Parameters: { + key1: 'val1', + key2: 'val2', + }, + }, + }); +}); + function createTable(props: Pick>): void { const stack = new cdk.Stack(); new glue.S3Table(stack, 'table', {