Skip to content

Commit

Permalink
[LoginNodes] Modify cli to add custom action configuration parameters…
Browse files Browse the repository at this point in the history
… to login nodes (#6319)

* Add CustomActions configuration for login nodes

- Added schema for login nodes custom actions
- Added CustomAction to LoginNodesPool resource in cluster_config.py

Signed-off-by: Chris Makin <cmakin@amazon.com>

* Add login node Pool Name to dna_json

* Add DescribeStack action to LoginNodesIamResources

This change allows custom actions OnNodeUpdated to be
performed for login nodes, which requires a check of the cluster stack
status in custom_action_executor.py.

* Modify integration test for custom actions on Login Nodes

- Modified test_essential_features integration test to account
for custom actions on login nodes.

* Update unit tests to include login node custom action changes

- Modified login node dna_json unit test to include pool_name

- Ran tox autoformatter

* Merge head and login nodes custom action schema unit test

- Update changelog
- Fix DescribeStacks resource error in unit test

---------

Signed-off-by: Chris Makin <cmakin@amazon.com>
  • Loading branch information
cjmakin committed Jul 1, 2024
1 parent 937dcbc commit 2a13ae6
Show file tree
Hide file tree
Showing 15 changed files with 149 additions and 12 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
CHANGELOG
=========

3.11.0
------

**ENHANCEMENTS**

- Add support for custom actions on login nodes.

3.10.0
------

Expand Down
2 changes: 2 additions & 0 deletions cli/src/pcluster/config/cluster_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1346,6 +1346,7 @@ def __init__(
networking: LoginNodesNetworking = None,
count: int = None,
ssh: LoginNodesSsh = None,
custom_actions: CustomActions = None,
iam: LoginNodesIam = None,
gracetime_period: int = None,
**kwargs,
Expand All @@ -1357,6 +1358,7 @@ def __init__(
self.networking = networking
self.count = Resource.init_param(count, default=1)
self.ssh = ssh
self.custom_actions = custom_actions
self.iam = iam or LoginNodesIam(implied=True)
self.gracetime_period = Resource.init_param(gracetime_period, default=10)

Expand Down
14 changes: 14 additions & 0 deletions cli/src/pcluster/schemas/cluster_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -1315,6 +1315,19 @@ def make_resource(self, data, **kwargs):
return CustomActions(**data)


class LoginNodesCustomActionsSchema(BaseSchema):
"""Represent the schema for all available custom actions in a login node pool."""

on_node_start = OneOrManyCustomActionField(metadata={"update_policy": UpdatePolicy.UNSUPPORTED})
on_node_configured = OneOrManyCustomActionField(metadata={"update_policy": UpdatePolicy.UNSUPPORTED})
on_node_updated = OneOrManyCustomActionField(metadata={"update_policy": UpdatePolicy.UNSUPPORTED})

@post_load
def make_resource(self, data, **kwargs):
"""Generate resource."""
return CustomActions(**data)


class InstanceTypeSchema(BaseSchema):
"""Schema of a compute resource that supports a pool of instance types."""

Expand Down Expand Up @@ -1422,6 +1435,7 @@ class LoginNodesPoolSchema(BaseSchema):
metadata={"update_policy": UpdatePolicy.SUPPORTED},
)
ssh = fields.Nested(LoginNodesSshSchema, metadata={"update_policy": UpdatePolicy.LOGIN_NODES_STOP})
custom_actions = fields.Nested(LoginNodesCustomActionsSchema, metadata={"update_policy": UpdatePolicy.IGNORED})
iam = fields.Nested(LoginNodesIamSchema, metadata={"update_policy": UpdatePolicy.LOGIN_NODES_STOP})
gracetime_period = fields.Int(
validate=validate.Range(
Expand Down
11 changes: 10 additions & 1 deletion cli/src/pcluster/templates/cdk_builder_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -934,9 +934,18 @@ def _build_policy(self) -> List[iam.PolicyStatement]:
sid="CloudFormation",
actions=[
"cloudformation:DescribeStackResource",
"cloudformation:DescribeStacks",
],
effect=iam.Effect.ALLOW,
resources=[core.Aws.STACK_ID],
resources=[
self._format_arn(
service="cloudformation",
resource=f"stack/{Stack.of(self).stack_name}/*",
region=Stack.of(self).region,
account=Stack.of(self).account,
),
core.Aws.STACK_ID,
],
),
iam.PolicyStatement(
sid="DynamoDBTable",
Expand Down
1 change: 1 addition & 0 deletions cli/src/pcluster/templates/login_nodes_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ def _add_login_nodes_pool_launch_template(self):
"hosted_zone": (str(self._cluster_hosted_zone.ref) if self._cluster_hosted_zone else ""),
"log_group_name": self._log_group.log_group_name,
"log_rotation_enabled": "true" if self._config.is_log_rotation_enabled else "false",
"pool_name": self._pool.name,
"node_type": "LoginNode",
"proxy": self._pool.networking.proxy.http_proxy_address if self._pool.networking.proxy else "NONE",
"raid_shared_dir": to_comma_separated_string(
Expand Down
16 changes: 16 additions & 0 deletions cli/tests/pcluster/example_configs/slurm.full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,22 @@ LoginNodes:
- subnet-12345678
Ssh:
KeyName: ec2-key-name
CustomActions:
OnNodeStart:
Script: https://test.tgz # s3:// | https://
Args:
- arg1
- arg2
OnNodeConfigured:
Script: https://test.tgz # s3:// | https://
Args:
- arg1
- arg2
OnNodeUpdated:
Script: https://test.tgz # s3:// | https://
Args:
- arg1
- arg2
Iam:
InstanceRole: arn:aws:iam::aws:role/LoginNodeRole
HeadNode:
Expand Down
11 changes: 8 additions & 3 deletions cli/tests/pcluster/schemas/test_cluster_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
HeadNodeIamSchema,
HeadNodeRootVolumeSchema,
ImageSchema,
LoginNodesCustomActionsSchema,
QueueCustomActionsSchema,
QueueIamSchema,
QueueTagSchema,
Expand Down Expand Up @@ -259,14 +260,18 @@ def test_head_node_root_volume_schema(mocker, config_dict, failure_message):
),
],
)
def test_head_node_custom_actions_schema(mocker, config_dict, failure_message):
def test_head_login_node_custom_actions_schema(mocker, config_dict, failure_message):
mock_aws_api(mocker)
if failure_message:
with pytest.raises(ValidationError, match=failure_message):
HeadNodeCustomActionsSchema().load(config_dict)
LoginNodesCustomActionsSchema().load(config_dict)
else:
conf = HeadNodeCustomActionsSchema().load(config_dict)
HeadNodeCustomActionsSchema().dump(conf)
head_conf = HeadNodeCustomActionsSchema().load(config_dict)
login_conf = LoginNodesCustomActionsSchema().load(config_dict)

HeadNodeCustomActionsSchema().dump(head_conf)
LoginNodesCustomActionsSchema().dump(login_conf)


@pytest.mark.parametrize(
Expand Down
22 changes: 18 additions & 4 deletions cli/tests/pcluster/templates/test_cluster_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -756,11 +756,25 @@ def assert_iam_policy_properties(self, template, resource_name: str):
"Sid": "Autoscaling",
},
{
"Action": "cloudformation:DescribeStackResource",
"Action": ["cloudformation:DescribeStackResource", "cloudformation:DescribeStacks"],
"Effect": "Allow",
"Resource": {
"Ref": "AWS::StackId",
},
"Resource": [
{
"Fn::Join": [
"",
[
"arn:",
{"Ref": "AWS::Partition"},
":cloudformation:",
{"Ref": "AWS::Region"},
":",
{"Ref": "AWS::AccountId"},
":stack/clustername/*",
],
]
},
{"Ref": "AWS::StackId"},
],
"Sid": "CloudFormation",
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,26 @@ LoginNodes:
HttpProxyAddress: https://proxy-address:port
Ssh:
KeyName: validate_key_name
CustomActions:
OnNodeStart:
Script: https://test.tgz
Args:
# Number of args doesn't impact number of resources, just the size of the template
{% for i in range(number_of_script_args) %}
- arg{{ i }}
{% endfor %}
OnNodeConfigured:
Script: https://test.tgz
Args:
{% for i in range(number_of_script_args) %}
- arg{{ i }}
{% endfor %}
OnNodeUpdated:
Script: https://test.tgz
Args:
{% for i in range(number_of_script_args) %}
- arg{{ i }}
{% endfor %}
Iam:
AdditionalIamPolicies:
- Policy: arn:aws:iam::aws:policy/AdministratorAccess
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,19 @@ Imds:
LoginNodes:
Pools:
- Count: 1
CustomActions:
OnNodeConfigured:
Args:
- arg0
Script: https://test.tgz
OnNodeStart:
Args:
- arg0
Script: https://test.tgz
OnNodeUpdated:
Args:
- arg0
Script: https://test.tgz
GracetimePeriod: 10
Iam:
AdditionalIamPolicies:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"hosted_zone": "{\"Ref\": \"referencetoclusternameRoute53HostedZone2388733DRef\"}",
"log_group_name": "/aws/parallelcluster/clustername-202401151530",
"log_rotation_enabled": "true",
"pool_name": "login",
"node_type": "LoginNode",
"proxy": "NONE",
"raid_shared_dir": "",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"hosted_zone": "{\"Ref\": \"referencetoclusternameRoute53HostedZone2388733DRef\"}",
"log_group_name": "/aws/parallelcluster/clustername-202401151530",
"log_rotation_enabled": "true",
"pool_name": "login",
"node_type": "LoginNode",
"proxy": "NONE",
"raid_shared_dir": "",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,13 @@ def _test_custom_bootstrap_scripts_args_quotes(cluster):
The cluster should be created and running.
"""
# Check head node and compute node status
check_status(cluster, "CREATE_COMPLETE", head_node_status="running", compute_fleet_status="RUNNING")
check_status(
cluster,
"CREATE_COMPLETE",
head_node_status="running",
compute_fleet_status="RUNNING",
login_nodes_status="active",
)


def _test_disable_hyperthreading(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,30 @@ HeadNode:
Dcv:
Enabled: True
{% endif %}
LoginNodes:
Pools:
- Name: pool
InstanceType: {{ instance }}
Count: 3
Networking:
SubnetIds:
- {{ private_subnet_id }}
CustomActions:
OnNodeStart:
Script: s3://{{ bucket_name }}/scripts/pre_install.sh
Args:
- "R curl wget"
- arg2
- 'arg3 arg3'
OnNodeConfigured:
Script: s3://{{ bucket_name }}/scripts/post_install.sh
Args:
- "R curl wget"
- arg2
- 'arg3 arg3'
Iam:
AdditionalIamPolicies:
- Policy: "arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess"
Scheduling:
Scheduler: {{ scheduler }}
{% if scheduler == "awsbatch" %}AwsBatchQueues:{% else %}SlurmQueues:{% endif %}
Expand Down Expand Up @@ -130,4 +154,4 @@ Monitoring:
SharedStorage:
- MountDir: /shared
Name: name1
StorageType: Ebs
StorageType: Ebs
8 changes: 6 additions & 2 deletions tests/integration-tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,15 +563,19 @@ def check_head_node_security_group(region, cluster, port, expected_cidr):
assert_that(target["IpRanges"][0]["CidrIp"]).is_equal_to(expected_cidr)


def check_status(cluster, cluster_status=None, head_node_status=None, compute_fleet_status=None):
"""Check the cluster's status and its head and compute status is as expected."""
def check_status(
cluster, cluster_status=None, head_node_status=None, compute_fleet_status=None, login_nodes_status=None
):
"""Check the cluster's status and its head, compute, and login nodes statuses are as expected."""
cluster_info = cluster.describe_cluster()
if cluster_status:
assert_that(cluster_info["clusterStatus"]).is_equal_to(cluster_status)
if head_node_status:
assert_that(cluster_info["headNode"]["state"]).is_equal_to(head_node_status)
if compute_fleet_status:
assert_that(cluster_info["computeFleetStatus"]).is_equal_to(compute_fleet_status)
if login_nodes_status:
assert_that(cluster_info["loginNodes"]["status"]).is_equal_to(login_nodes_status)


@retry(wait_fixed=seconds(20), stop_max_delay=minutes(5))
Expand Down

0 comments on commit 2a13ae6

Please sign in to comment.