Skip to content

Commit

Permalink
Add retry functionality to monitor status request
Browse files Browse the repository at this point in the history
  • Loading branch information
aeshub committed Apr 24, 2024
1 parent 9c48a1c commit c19f69d
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 1 deletion.
3 changes: 3 additions & 0 deletions src/isar/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ def __init__(self) -> None:
# Number of attempts to initiate a step or mission before cancelling
INITIATE_FAILURE_COUNTER_LIMIT: int = Field(default=10)

# Number of attempts to request a step status in monitor before cancelling
REQUEST_STATUS_FAILURE_COUNTER_LIMIT: int = Field(default=3)

# Number of attempts to stop the robot before giving up
STOP_ROBOT_ATTEMPTS_LIMIT: int = Field(default=10)

Expand Down
37 changes: 36 additions & 1 deletion src/isar/state_machine/states/monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from transitions import State

from isar.mission_planner.task_selector_interface import TaskSelectorStop
from isar.config.settings import settings
from isar.services.utilities.threaded_request import (
ThreadedRequest,
ThreadedRequestNotFinishedError,
Expand All @@ -17,6 +18,7 @@
RobotMissionStatusException,
RobotRetrieveInspectionException,
RobotStepStatusException,
RobotCommunicationTimeoutException,
)
from robot_interface.models.inspection.inspection import Inspection
from robot_interface.models.mission.mission import Mission
Expand All @@ -32,6 +34,10 @@ class Monitor(State):
def __init__(self, state_machine: "StateMachine") -> None:
super().__init__(name="monitor", on_enter=self.start, on_exit=self.stop)
self.state_machine: "StateMachine" = state_machine
self.request_status_failure_counter: int = 0
self.request_status_failure_counter_limit: int = (
settings.REQUEST_STATUS_FAILURE_COUNTER_LIMIT
)

self.logger = logging.getLogger("state_machine")
self.step_status_thread: Optional[ThreadedRequest] = None
Expand Down Expand Up @@ -61,7 +67,6 @@ def _run(self) -> None:
status_function=self.state_machine.robot.step_status,
thread_name="State Machine Monitor Get Step Status",
)

try:
status: Union[StepStatus, MissionStatus] = (
self.step_status_thread.get_output()
Expand All @@ -70,6 +75,34 @@ def _run(self) -> None:
time.sleep(self.state_machine.sleep_time)
continue

except RobotCommunicationTimeoutException as e:
self.state_machine.current_mission.error_message = ErrorMessage(
error_reason=e.error_reason, error_description=e.error_description
)
self.step_status_thread = None
self.request_status_failure_counter += 1
self.logger.warning(
f"Monitoring step {self.state_machine.current_step.id} failed #: "
f"{self.request_status_failure_counter} failed because: {e.error_description}"
)

if (
self.request_status_failure_counter
>= self.request_status_failure_counter_limit
):
self.state_machine.current_step.error_message = ErrorMessage(
error_reason=e.error_reason,
error_description=e.error_description,
)
self.logger.error(
f"Step will be cancelled after failing to get step status "
f"{self.request_status_failure_counter} times because: "
f"{e.error_description}"
)
status = StepStatus.Failed
else:
continue

except RobotStepStatusException as e:
self.state_machine.current_step.error_message = ErrorMessage(
error_reason=e.error_reason, error_description=e.error_description
Expand Down Expand Up @@ -98,6 +131,8 @@ def _run(self) -> None:
f"Retrieving the status failed because: {e.error_description}"
)

self.request_status_failure_counter = 0

if isinstance(status, StepStatus):
self.state_machine.current_step.status = status
elif isinstance(status, MissionStatus):
Expand Down
13 changes: 13 additions & 0 deletions src/robot_interface/models/exceptions/robot_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

class ErrorReason(str, Enum):
RobotCommunicationException: str = "robot_communication_exception"
RobotCommunicationTimeoutException: str = "robot_communication_timeout_exception"
RobotInfeasibleStepException: str = "robot_infeasible_step_exception"
RobotInfeasibleMissionException: str = "robot_infeasible_mission_exception"
RobotMissionStatusException: str = "robot_mission_status_exception"
Expand Down Expand Up @@ -49,6 +50,18 @@ def __init__(self, error_description: str) -> None:
pass


# An exception which should be thrown by the robot package if the communication has timed
# out and ISAR should retry the request.
class RobotCommunicationTimeoutException(RobotException):
def __init__(self, error_description: str) -> None:
super().__init__(
error_reason=ErrorReason.RobotCommunicationTimeoutException,
error_description=error_description,
)

pass


# An exception which should be thrown by the robot package if it is unable to start the
# current step.
class RobotInfeasibleStepException(RobotException):
Expand Down

0 comments on commit c19f69d

Please sign in to comment.