Skip to content

Commit

Permalink
[Core][Disk] High disk tier on Azure (#3921)
Browse files Browse the repository at this point in the history
* init

* address comments and fix tests

* format

* remove unused function & nit
  • Loading branch information
cblmemo committed Sep 7, 2024
1 parent 90a840f commit 363f27b
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 13 deletions.
29 changes: 18 additions & 11 deletions sky/clouds/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,11 @@ class Azure(clouds.Cloud):
# names, so the limit is 64 - 4 - 7 - 10 = 43.
# Reference: https://azure.github.io/PSRule.Rules.Azure/en/rules/Azure.ResourceGroup.Name/ # pylint: disable=line-too-long
_MAX_CLUSTER_NAME_LEN_LIMIT = 42
_BEST_DISK_TIER = resources_utils.DiskTier.MEDIUM
_BEST_DISK_TIER = resources_utils.DiskTier.HIGH
_DEFAULT_DISK_TIER = resources_utils.DiskTier.MEDIUM
# Azure does not support high disk and ultra disk tier.
_SUPPORTED_DISK_TIERS = (
set(resources_utils.DiskTier) -
{resources_utils.DiskTier.HIGH, resources_utils.DiskTier.ULTRA})
_SUPPORTED_DISK_TIERS = (set(resources_utils.DiskTier) -
{resources_utils.DiskTier.ULTRA})

_INDENT_PREFIX = ' ' * 4

Expand Down Expand Up @@ -361,7 +360,9 @@ def _failover_disk_tier() -> Optional[resources_utils.DiskTier]:
start_index += 1
assert False, 'Low disk tier should always be supported on Azure.'

return {
disk_tier = _failover_disk_tier()

resources_vars = {
'instance_type': r.instance_type,
'custom_resources': custom_resources,
'num_gpus': acc_count,
Expand All @@ -371,12 +372,18 @@ def _failover_disk_tier() -> Optional[resources_utils.DiskTier]:
'zones': None,
**image_config,
'need_nvidia_driver_extension': need_nvidia_driver_extension,
'disk_tier': Azure._get_disk_type(_failover_disk_tier()),
'disk_tier': Azure._get_disk_type(disk_tier),
'cloud_init_setup_commands': cloud_init_setup_commands,
'azure_subscription_id': self.get_project_id(dryrun),
'resource_group': f'{cluster_name.name_on_cloud}-{region_name}',
}

# Setting disk performance tier for high disk tier.
if disk_tier == resources_utils.DiskTier.HIGH:
resources_vars['disk_performance_tier'] = 'P50'

return resources_vars

def _get_feasible_launchable_resources(
self, resources: 'resources.Resources'
) -> 'resources_utils.FeasibleResources':
Expand Down Expand Up @@ -600,18 +607,18 @@ def check_disk_tier(
disk_tier: Optional[resources_utils.DiskTier]) -> Tuple[bool, str]:
if disk_tier is None or disk_tier == resources_utils.DiskTier.BEST:
return True, ''
if disk_tier == resources_utils.DiskTier.HIGH or disk_tier == resources_utils.DiskTier.ULTRA:
if disk_tier == resources_utils.DiskTier.ULTRA:
return False, (
'Azure disk_tier={high, ultra} is not supported now. '
'Please use disk_tier={low, medium, best} instead.')
'Azure disk_tier=ultra is not supported now. '
'Please use disk_tier={low, medium, high, best} instead.')
# Only S-series supported premium ssd
# see https://stackoverflow.com/questions/48590520/azure-requested-operation-cannot-be-performed-because-storage-account-type-pre # pylint: disable=line-too-long
if cls._get_disk_type(
disk_tier
) == 'Premium_LRS' and not Azure._is_s_series(instance_type):
return False, (
'Azure premium SSDs are only supported for S-series '
'instances. To use disk_tier=medium, please make sure '
'instances. To use disk_tier>=medium, please make sure '
'instance_type is specified to an S-series instance.')
return True, ''

Expand All @@ -631,7 +638,7 @@ def _get_disk_type(cls,
# cannot be used as OS disks so we might need data disk support
tier2name = {
resources_utils.DiskTier.ULTRA: 'Disabled',
resources_utils.DiskTier.HIGH: 'Disabled',
resources_utils.DiskTier.HIGH: 'Premium_LRS',
resources_utils.DiskTier.MEDIUM: 'Premium_LRS',
resources_utils.DiskTier.LOW: 'Standard_LRS',
}
Expand Down
12 changes: 12 additions & 0 deletions sky/provision/azure/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from sky.provision import common
from sky.provision import constants
from sky.utils import common_utils
from sky.utils import subprocess_utils
from sky.utils import ux_utils

if typing.TYPE_CHECKING:
Expand Down Expand Up @@ -274,6 +275,17 @@ def _create_instances(
deployment_name=vm_name,
parameters=parameters,
).wait()

performance_tier = node_config.get('disk_performance_tier', None)
if performance_tier is not None:
disks = compute_client.disks.list_by_resource_group(resource_group)
for disk in disks:
name = disk.name
# TODO(tian): Investigate if we can use Python SDK to update this.
subprocess_utils.run_no_outputs(
f'az disk update -n {name} -g {resource_group} '
f'--set tier={performance_tier}')

filters = {
constants.TAG_RAY_CLUSTER_NAME: cluster_name_on_cloud,
_TAG_SKYPILOT_VM_ID: vm_id
Expand Down
3 changes: 3 additions & 0 deletions sky/templates/azure-ray.yml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ available_node_types:
{{ cmd }}
{%- endfor %}
need_nvidia_driver_extension: {{need_nvidia_driver_extension}}
{%- if disk_performance_tier is not none %}
disk_performance_tier: {{disk_performance_tier}}
{%- endif %}
# TODO: attach disk

head_node_type: ray.head.default
Expand Down
4 changes: 2 additions & 2 deletions tests/test_optimizer_dryruns.py
Original file line number Diff line number Diff line change
Expand Up @@ -765,12 +765,12 @@ def _get_all_candidate_cloud(r: sky.Resources) -> Set[clouds.Cloud]:
map(clouds.CLOUD_REGISTRY.get,
['aws', 'gcp', 'azure', 'oci'])), low_tier_candidates

# Only AWS, GCP, OCI supports HIGH disk tier.
# Only AWS, GCP, Azure, OCI supports HIGH disk tier.
high_tier_resources = sky.Resources(disk_tier=resources_utils.DiskTier.HIGH)
high_tier_candidates = _get_all_candidate_cloud(high_tier_resources)
assert high_tier_candidates == set(
map(clouds.CLOUD_REGISTRY.get,
['aws', 'gcp', 'oci'])), high_tier_candidates
['aws', 'gcp', 'azure', 'oci'])), high_tier_candidates

# Only AWS, GCP supports ULTRA disk tier.
ultra_tier_resources = sky.Resources(
Expand Down

0 comments on commit 363f27b

Please sign in to comment.