Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Containerapp] Support debug console #7945

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions src/containerapp/HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Release History
===============
upcoming
++++++
* 'az containerapp debug': Open an SSH-like interactive shell within a container app debug console.

1.0.0b2
++++++
Expand Down
9 changes: 9 additions & 0 deletions src/containerapp/azext_containerapp/_help.py
Original file line number Diff line number Diff line change
Expand Up @@ -2054,3 +2054,12 @@
az containerapp job registry set -n my-containerapp-job -g MyResourceGroup \\
--server MyContainerappJobRegistry.azurecr.io --identity system-environment
"""

helps['containerapp debug'] = """
type: command
short-summary: Open an SSH-like interactive shell within a container app debug console.
examples:
- name: debug by connecting to a container app's debug console by replica, revision and container
text: |
az containerapp debug -n MyContainerapp -g MyResourceGroup --revision MyRevision --replica MyReplica --container MyContainer
"""
10 changes: 10 additions & 0 deletions src/containerapp/azext_containerapp/_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,3 +440,13 @@ def load_arguments(self, _):
c.argument('logger_name', help="The logger name.")
c.argument('logger_level', arg_type=get_enum_type(["off", "error", "info", "debug", "trace", "warn"]), help="Set the log level for the specific logger name.")
c.argument('all', help="The flag to indicate all logger settings.", action="store_true")

with self.argument_context('containerapp debug') as c:
c.argument('container',
help="The container name that the debug console will connect to. Defaults to the first container of first replica.")
fangjian0423 marked this conversation as resolved.
Show resolved Hide resolved
c.argument('replica',
help="The name of the replica. List replicas with 'az containerapp replica list'. A replica may be not found when it's scaled to zero if there is no traffic to your app. Defaults to the first replica of 'az containerapp replica list'.")
c.argument('revision',
help="The name of the container app revision. Defaults to the latest revision.")
c.argument('name', name_type, id_part=None, help="The name of the Containerapp.")
c.argument('resource_group_name', arg_type=resource_group_name_type, id_part=None)
43 changes: 43 additions & 0 deletions src/containerapp/azext_containerapp/_ssh_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See License.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
# pylint: disable=logging-fstring-interpolation
# pylint: disable=possibly-used-before-assignment

from azure.cli.command_modules.containerapp._ssh_utils import WebSocketConnection, SSH_TERM_RESIZE_PREFIX, \
SSH_DEFAULT_ENCODING, read_ssh
from azure.cli.core.commands.client_factory import get_subscription_id

from knack.log import get_logger

logger = get_logger(__name__)


class DebugWebSocketConnection(WebSocketConnection):
def __init__(self, cmd, resource_group_name, name, revision, replica, container):
super(DebugWebSocketConnection, self).__init__(cmd, resource_group_name, name, revision, replica, container, "")

def _get_url(self, cmd, resource_group_name, name, revision, replica, container, startup_command):
sub = get_subscription_id(cmd.cli_ctx)
base_url = self._logstream_endpoint
fangjian0423 marked this conversation as resolved.
Show resolved Hide resolved
proxy_api_url = base_url[:base_url.index("/subscriptions/")].replace("https://", "")

return (f"wss://{proxy_api_url}/subscriptions/{sub}/resourceGroups/{resource_group_name}/containerApps/{name}"
f"/revisions/{revision}/replicas/{replica}/debug"
f"?targetContainer={container}")


def read_debug_ssh(connection: WebSocketConnection, response_encodings):
from shutil import get_terminal_size
size = get_terminal_size()
if connection.is_connected:
# We need to send resize for the whole session two times.
# First time is here and second time is in `read_ssh` method.
# refer `kubectl debug` command implementation:
# https://github.com/kubernetes/kubectl/blob/14f6a11dd84315dc5179ff04156b338def935eaa/pkg/cmd/attach/attach.go#L296
connection.send(b"".join([SSH_TERM_RESIZE_PREFIX,
fangjian0423 marked this conversation as resolved.
Show resolved Hide resolved
f'{{"Width": {size.columns + 1}, '
f'"Height": {size.lines}}}'.encode(SSH_DEFAULT_ENCODING)]))

read_ssh(connection, response_encodings)
57 changes: 54 additions & 3 deletions src/containerapp/azext_containerapp/_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,16 @@
from knack.log import get_logger
from urllib.parse import urlparse

from azure.cli.core.azclierror import (ValidationError, InvalidArgumentValueError,
MutuallyExclusiveArgumentError, RequiredArgumentMissingError)
from azure.cli.command_modules.containerapp._utils import is_registry_msi_system
from azure.mgmt.core.tools import is_valid_resource_id

from azure.cli.command_modules.containerapp._utils import is_registry_msi_system, safe_get
from azure.cli.command_modules.containerapp._validators import _validate_revision_exists, _validate_replica_exists, \
_validate_container_exists
from azure.cli.core.azclierror import (InvalidArgumentValueError,
MutuallyExclusiveArgumentError, RequiredArgumentMissingError,
ResourceNotFoundError, ValidationError)

from ._clients import ContainerAppPreviewClient
from ._utils import is_registry_msi_system_environment

from ._constants import ACR_IMAGE_SUFFIX, \
Expand Down Expand Up @@ -215,3 +220,49 @@ def validate_timeout_in_seconds(cmd, namespace):
if timeout_in_seconds is not None:
if timeout_in_seconds < 0 or timeout_in_seconds > 60:
raise ValidationError("timeout in seconds must be in range [0, 60].")


def validate_debug(cmd, namespace):
print("Validating...")
revision_already_set = bool(namespace.revision)
replica_already_set = bool(namespace.replica)
container_already_set = bool(namespace.container)
_set_debug_defaults(cmd, namespace)
if revision_already_set:
_validate_revision_exists(cmd, namespace)
if replica_already_set:
_validate_replica_exists(cmd, namespace)
if container_already_set:
_validate_container_exists(cmd, namespace)


def _set_debug_defaults(cmd, namespace):
app = ContainerAppPreviewClient.show(cmd, namespace.resource_group_name, namespace.name)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can list revisions here and see: 1) if we got 404 error or None object then the container app doesnt exist 2) if we got an empty array then the container app exists but doesnt have any revisions
and the revision list data could be reused in checking user input parameter "targetContainer"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO. List revisions will return all revision, this is not a good choice. If customer has lots of revisions, the payload will be big enough.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

normally the number of revisions in a single container app wont be too much. currently only 3.7% of container apps are in multiple active revision mode.
btw if you take a closer look at the container app payload and revision payload you will see container app is much larger than just a revision.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems this API will return all the revisions, which are active or not active.

if not app:
raise ResourceNotFoundError("Could not find a container app")
if not namespace.revision:
namespace.revision = app.get("properties", {}).get("latestRevisionName")
if not namespace.revision:
raise ResourceNotFoundError("Could not find a revision")
if not namespace.replica:
replicas = ContainerAppPreviewClient.list_replicas(
cmd=cmd,
resource_group_name=namespace.resource_group_name,
container_app_name=namespace.name,
revision_name=namespace.revision
)
if not replicas:
raise ResourceNotFoundError("Could not find a active replica")
namespace.replica = replicas[0]["name"]
if not namespace.container and replicas[0]["properties"]["containers"]:
namespace.container = replicas[0]["properties"]["containers"][0]["name"]
if not namespace.container:
revision = ContainerAppPreviewClient.show_revision(
cmd,
resource_group_name=namespace.resource_group_name,
container_app_name=namespace.name,
name=namespace.revision
)
revision_containers = safe_get(revision, "properties", "template", "containers")
if revision_containers:
namespace.container = revision_containers[0]["name"]
3 changes: 3 additions & 0 deletions src/containerapp/azext_containerapp/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@
# from azure.cli.core.commands import CliCommandType
# from azure.mgmt.core.tools import is_valid_resource_id, parse_resource_id
from azure.cli.command_modules.containerapp._transformers import (transform_containerapp_output, transform_containerapp_list_output)

from azext_containerapp._client_factory import ex_handler_factory
from ._transformers import (transform_sensitive_values,
transform_telemetry_data_dog_values,
transform_telemetry_app_insights_values,
transform_telemetry_otlp_values,
transform_telemetry_otlp_values_by_name_wrapper)
from ._utils import is_cloud_supported_by_connected_env
from ._validators import validate_debug


def load_command_table(self, args):
Expand All @@ -24,6 +26,7 @@ def load_command_table(self, args):
g.custom_command('update', 'update_containerapp', supports_no_wait=True, exception_handler=ex_handler_factory(), table_transformer=transform_containerapp_output, transform=transform_sensitive_values)
g.custom_command('delete', 'delete_containerapp', supports_no_wait=True, confirmation=True, exception_handler=ex_handler_factory())
g.custom_command('up', 'containerapp_up', supports_no_wait=False, exception_handler=ex_handler_factory())
g.custom_command('debug', 'containerapp_debug', is_preview=True, validator=validate_debug)

with self.command_group('containerapp replica') as g:
g.custom_show_command('show', 'get_replica') # TODO implement the table transformer
Expand Down
37 changes: 37 additions & 0 deletions src/containerapp/azext_containerapp/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
# --------------------------------------------------------------------------------------------
# pylint: disable=line-too-long, unused-argument, logging-fstring-interpolation, logging-not-lazy, consider-using-f-string, logging-format-interpolation, inconsistent-return-statements, broad-except, bare-except, too-many-statements, too-many-locals, too-many-boolean-expressions, too-many-branches, too-many-nested-blocks, pointless-statement, expression-not-assigned, unbalanced-tuple-unpacking, unsupported-assignment-operation

import threading
import time
from urllib.parse import urlparse
import json
import requests
import subprocess
from concurrent.futures import ThreadPoolExecutor

from azure.cli.command_modules.containerapp._ssh_utils import SSH_BACKUP_ENCODING, SSH_CTRL_C_MSG, get_stdin_writer
from azure.cli.core import telemetry as telemetry_core

from azure.cli.core.azclierror import (
Expand Down Expand Up @@ -112,6 +114,8 @@
AzureFileProperties as AzureFilePropertiesModel
)

from ._ssh_utils import (SSH_DEFAULT_ENCODING, DebugWebSocketConnection, read_debug_ssh)

from ._utils import connected_env_check_cert_name_availability, get_oryx_run_image_tags, patchable_check, get_pack_exec_path, is_docker_running, parse_build_env_vars, env_has_managed_identity

from ._constants import (CONTAINER_APPS_RP,
Expand Down Expand Up @@ -3224,3 +3228,36 @@ def set_registry_job(cmd, name, resource_group_name, server, username=None, pass
containerapp_job_registry_set_decorator.construct_payload()
r = containerapp_job_registry_set_decorator.set()
return r


def containerapp_debug(cmd, resource_group_name, name, container=None, revision=None, replica=None):
print("Connecting...")
conn = DebugWebSocketConnection(
cmd=cmd,
resource_group_name=resource_group_name,
name=name,
revision=revision,
replica=replica,
container=container
)

encodings = [SSH_DEFAULT_ENCODING, SSH_BACKUP_ENCODING]
reader = threading.Thread(target=read_debug_ssh, args=(conn, encodings))
reader.daemon = True
reader.start()

writer = get_stdin_writer(conn)
writer.daemon = True
writer.start()

while conn.is_connected:
if not reader.is_alive() or not writer.is_alive():
logger.warning("Reader or Writer for WebSocket is not alive. Closing the connection.")
conn.disconnect()

try:
time.sleep(0.1)
except KeyboardInterrupt:
if conn.is_connected:
logger.info("Caught KeyboardInterrupt. Sending ctrl+c to server")
conn.send(SSH_CTRL_C_MSG)
Loading
Loading