Cherry pick master changes to IBM MQ

I did not cherry-pick release PRs (#9708, #9492, #9456) nor documentation-only PRs (#9418, #9616, #9642) from git history to keep the commit as small as possible. PR #9400 is also relevant but it was already on the git history of this branch (see `git log -- ibm_mq` to double check). Don't emit any warnings if NO_MSG_AVAILABLE is received (#9452) (cherry picked from commit 13c10d9) Properly close internal reply queues (#9488) * Properly close internal reply queues * Define pcf (cherry picked from commit 9d395c0) Add debug line when there are no messages available (#9702) * Add debug line when there are no messages available (cherry picked from commit 3055228) Do not submit critical service check when there are no messages (#9703) (cherry picked from commit f0568c1)
DataDog · Jul 27, 2021 · 6f4e2ea · 6f4e2ea
1 parent 9462fe5
commit 6f4e2ea
Show file tree

Hide file tree

Showing 4 changed files with 77 additions and 11 deletions.
diff --git a/ibm_mq/datadog_checks/ibm_mq/collectors/channel_metric_collector.py b/ibm_mq/datadog_checks/ibm_mq/collectors/channel_metric_collector.py
@@ -52,7 +52,9 @@ def get_pcf_channel_metrics(self, queue_manager):
         except pymqi.MQMIError as e:
             # Don't warn if no messages, see:
             # https://github.com/dsuch/pymqi/blob/v1.12.0/docs/examples.rst#how-to-wait-for-multiple-messages
-            if not (e.comp == pymqi.CMQC.MQCC_FAILED and e.reason == pymqi.CMQC.MQRC_NO_MSG_AVAILABLE):
+            if e.comp == pymqi.CMQC.MQCC_FAILED and e.reason == pymqi.CMQC.MQRC_NO_MSG_AVAILABLE:
+                self.log.debug("There are no messages available for PCF channel")
+            else:
                 self.log.warning("Error getting CHANNEL stats %s", e)
         else:
             channels = len(response)
@@ -91,10 +93,14 @@ def _submit_channel_status(self, queue_manager, search_channel_name, tags, chann
             response = pcf.MQCMD_INQUIRE_CHANNEL_STATUS(args)
             self.service_check(self.CHANNEL_SERVICE_CHECK, AgentCheck.OK, search_channel_tags)
         except pymqi.MQMIError as e:
-            self.service_check(self.CHANNEL_SERVICE_CHECK, AgentCheck.CRITICAL, search_channel_tags)
             if e.comp == pymqi.CMQC.MQCC_FAILED and e.reason == pymqi.CMQCFC.MQRCCF_CHL_STATUS_NOT_FOUND:
+                self.service_check(self.CHANNEL_SERVICE_CHECK, AgentCheck.CRITICAL, search_channel_tags)
                 self.log.debug("Channel status not found for channel %s: %s", search_channel_name, e)
+            elif e.comp == pymqi.CMQC.MQCC_FAILED and e.reason == pymqi.CMQC.MQRC_NO_MSG_AVAILABLE:
+                self.service_check(self.CHANNEL_SERVICE_CHECK, AgentCheck.UNKNOWN, search_channel_tags)
+                self.log.debug("There are no messages available for channel %s", search_channel_name)
             else:
+                self.service_check(self.CHANNEL_SERVICE_CHECK, AgentCheck.CRITICAL, search_channel_tags)
                 self.log.warning("Error getting CHANNEL status for channel %s: %s", search_channel_name, e)
         else:
             for channel_info in response:

diff --git a/ibm_mq/datadog_checks/ibm_mq/collectors/queue_metric_collector.py b/ibm_mq/datadog_checks/ibm_mq/collectors/queue_metric_collector.py
@@ -82,18 +82,27 @@ def _discover_queues(self, queue_manager, mq_pattern_filter):
 
         for queue_type in SUPPORTED_QUEUE_TYPES:
             args = {pymqi.CMQC.MQCA_Q_NAME: pymqi.ensure_bytes(mq_pattern_filter), pymqi.CMQC.MQIA_Q_TYPE: queue_type}
+            pcf = None
             try:
                 pcf = pymqi.PCFExecute(queue_manager, convert=self.config.convert_endianness)
                 response = pcf.MQCMD_INQUIRE_Q(args)
             except pymqi.MQMIError as e:
                 # Don't warn if no messages, see:
                 # https://github.com/dsuch/pymqi/blob/v1.12.0/docs/examples.rst#how-to-wait-for-multiple-messages
-                if not (e.comp == pymqi.CMQC.MQCC_FAILED and e.reason == pymqi.CMQC.MQRC_NO_MSG_AVAILABLE):
+                if e.comp == pymqi.CMQC.MQCC_FAILED and e.reason == pymqi.CMQC.MQRC_NO_MSG_AVAILABLE:
+                    self.log.debug("No queue info available")
+                else:
                     self.warning("Error discovering queue: %s", e)
             else:
                 for queue_info in response:
                     queue = queue_info[pymqi.CMQC.MQCA_Q_NAME]
                     queues.append(to_string(queue).strip())
+            finally:
+                # Close internal reply queue to prevent filling up a dead-letter queue.
+                # https://github.com/dsuch/pymqi/blob/084ab0b2638f9d27303a2844badc76635c4ad6de/code/pymqi/__init__.py#L2892-L2902
+                # https://dsuch.github.io/pymqi/examples.html#how-to-specify-dynamic-reply-to-queues
+                if pcf is not None:
+                    pcf.disconnect()
 
         return queues
 
@@ -115,19 +124,25 @@ def queue_stats(self, queue_manager, queue_name, tags):
         """
         Grab stats from queues
         """
+        pcf = None
         try:
             args = {pymqi.CMQC.MQCA_Q_NAME: pymqi.ensure_bytes(queue_name), pymqi.CMQC.MQIA_Q_TYPE: pymqi.CMQC.MQQT_ALL}
             pcf = pymqi.PCFExecute(queue_manager, convert=self.config.convert_endianness)
             response = pcf.MQCMD_INQUIRE_Q(args)
         except pymqi.MQMIError as e:
             # Don't warn if no messages, see:
             # https://github.com/dsuch/pymqi/blob/v1.12.0/docs/examples.rst#how-to-wait-for-multiple-messages
-            if not (e.comp == pymqi.CMQC.MQCC_FAILED and e.reason == pymqi.CMQC.MQRC_NO_MSG_AVAILABLE):
+            if e.comp == pymqi.CMQC.MQCC_FAILED and e.reason == pymqi.CMQC.MQRC_NO_MSG_AVAILABLE:
+                self.log.debug("No stat messages available for queue %s", queue_name)
+            else:
                 self.warning("Error getting queue stats for %s: %s", queue_name, e)
         else:
             # Response is a list. It likely has only one member in it.
             for queue_info in response:
                 self._submit_queue_stats(queue_info, queue_name, tags)
+        finally:
+            if pcf is not None:
+                pcf.disconnect()
 
     def _submit_queue_stats(self, queue_info, queue_name, tags):
         for metric_suffix, mq_attr in iteritems(metrics.queue_metrics()):
@@ -146,6 +161,7 @@ def _submit_queue_stats(self, queue_info, queue_name, tags):
                     self.log.debug("Attribute %s (%s) not found for queue %s", metric_suffix, mq_attr, queue_name)
 
     def get_pcf_queue_status_metrics(self, queue_manager, queue_name, tags):
+        pcf = None
         try:
             args = {
                 pymqi.CMQC.MQCA_Q_NAME: pymqi.ensure_bytes(queue_name),
@@ -157,8 +173,10 @@ def get_pcf_queue_status_metrics(self, queue_manager, queue_name, tags):
         except pymqi.MQMIError as e:
             # Don't warn if no messages, see:
             # https://github.com/dsuch/pymqi/blob/v1.12.0/docs/examples.rst#how-to-wait-for-multiple-messages
-            if not (e.comp == pymqi.CMQC.MQCC_FAILED and e.reason == pymqi.CMQC.MQRC_NO_MSG_AVAILABLE):
-                self.warning("Error getting pcf queue stats for %s: %s", queue_name, e)
+            if e.comp == pymqi.CMQC.MQCC_FAILED and e.reason == pymqi.CMQC.MQRC_NO_MSG_AVAILABLE:
+                self.log.debug("No PCF queue status messages available for queue %s", queue_name)
+            else:
+                self.warning("Error getting pcf queue status for %s: %s", queue_name, e)
         else:
             # Response is a list. It likely has only one member in it.
             for queue_info in response:
@@ -174,20 +192,29 @@ def get_pcf_queue_status_metrics(self, queue_manager, queue_name, tags):
                         msg = "Unable to get {}, turn on queue level monitoring to access these metrics for {}"
                         msg = msg.format(mname, queue_name)
                         self.log.debug(msg)
+        finally:
+            if pcf is not None:
+                pcf.disconnect()
 
     def get_pcf_queue_reset_metrics(self, queue_manager, queue_name, tags):
+        pcf = None
         try:
             args = {pymqi.CMQC.MQCA_Q_NAME: pymqi.ensure_bytes(queue_name)}
             pcf = pymqi.PCFExecute(queue_manager, convert=self.config.convert_endianness)
             response = pcf.MQCMD_RESET_Q_STATS(args)
         except pymqi.MQMIError as e:
             # Don't warn if no messages, see:
             # https://github.com/dsuch/pymqi/blob/v1.12.0/docs/examples.rst#how-to-wait-for-multiple-messages
-            if not (e.comp == pymqi.CMQC.MQCC_FAILED and e.reason == pymqi.CMQC.MQRC_NO_MSG_AVAILABLE):
-                self.warning("Error getting pcf queue stats for %s: %s", queue_name, e)
+            if e.comp == pymqi.CMQC.MQCC_FAILED and e.reason == pymqi.CMQC.MQRC_NO_MSG_AVAILABLE:
+                self.log.debug("No PCF queue reset metrics messages available for queue %s", queue_name)
+            else:
+                self.warning("Error getting pcf queue reset metrics for %s: %s", queue_name, e)
         else:
             # Response is a list. It likely has only one member in it.
             for queue_info in response:
                 metrics_map = metrics.pcf_status_reset_metrics()
                 prefix = "{}.queue".format(metrics.METRIC_PREFIX)
                 self.send_metrics_from_properties(queue_info, metrics_map, prefix, tags)
+        finally:
+            if pcf is not None:
+                pcf.disconnect()
diff --git a/ibm_mq/datadog_checks/ibm_mq/collectors/stats_collector.py b/ibm_mq/datadog_checks/ibm_mq/collectors/stats_collector.py
@@ -68,7 +68,9 @@ def collect(self, queue_manager):
         except pymqi.MQMIError as e:
             # Don't warn if no messages, see:
             # https://github.com/dsuch/pymqi/blob/v1.12.0/docs/examples.rst#how-to-wait-for-multiple-messages
-            if not (e.comp == pymqi.CMQC.MQCC_FAILED and e.reason == pymqi.CMQC.MQRC_NO_MSG_AVAILABLE):
+            if e.comp == pymqi.CMQC.MQCC_FAILED and e.reason == pymqi.CMQC.MQRC_NO_MSG_AVAILABLE:
+                self.log.debug("No messages available")
+            else:
                 raise
         finally:
             queue.close()

diff --git a/ibm_mq/tests/test_ibm_mq_int.py b/ibm_mq/tests/test_ibm_mq_int.py
@@ -2,12 +2,13 @@
 # All rights reserved
 # Licensed under a 3-clause BSD style license (see LICENSE)
 import datetime as dt
+import logging
 import os
 
 import mock
 import pytest
-from pymqi import MQMIError
-from pymqi.CMQC import MQCC_FAILED, MQRC_NO_MSG_AVAILABLE
+from pymqi import MQMIError, PCFExecute
+from pymqi.CMQC import MQCC_FAILED, MQRC_BUFFER_ERROR, MQRC_NO_MSG_AVAILABLE
 from six import iteritems
 
 from datadog_checks.base import AgentCheck
@@ -21,6 +22,36 @@
 pytestmark = [pytest.mark.usefixtures("dd_environment"), pytest.mark.integration]
 
 
+def test_no_msg_errors_are_caught(aggregator, instance, caplog):
+    caplog.set_level(logging.WARNING)
+    m = mock.MagicMock()
+    with mock.patch('datadog_checks.ibm_mq.collectors.channel_metric_collector.pymqi.PCFExecute', new=m), mock.patch(
+        'datadog_checks.ibm_mq.collectors.queue_metric_collector.pymqi.PCFExecute', new=m
+    ), mock.patch('datadog_checks.ibm_mq.collectors.stats_collector.pymqi.PCFExecute', new=m):
+        error = MQMIError(MQCC_FAILED, MQRC_NO_MSG_AVAILABLE)
+        m.side_effect = error
+        m.unpack = PCFExecute.unpack
+        check = IbmMqCheck('ibm_mq', {}, [instance])
+        check.check(instance)
+
+        assert not caplog.records
+
+
+def test_errors_are_loogged(aggregator, instance, caplog):
+    caplog.set_level(logging.WARNING)
+    m = mock.MagicMock()
+    with mock.patch('datadog_checks.ibm_mq.collectors.channel_metric_collector.pymqi.PCFExecute', new=m), mock.patch(
+        'datadog_checks.ibm_mq.collectors.queue_metric_collector.pymqi.PCFExecute', new=m
+    ), mock.patch('datadog_checks.ibm_mq.collectors.stats_collector.pymqi.PCFExecute', new=m):
+        error = MQMIError(MQCC_FAILED, MQRC_BUFFER_ERROR)
+        m.side_effect = error
+        m.unpack = PCFExecute.unpack
+        check = IbmMqCheck('ibm_mq', {}, [instance])
+        check.check(instance)
+
+        assert caplog.records
+
+
 def test_check_metrics_and_service_checks(aggregator, instance, seed_data):
     instance['mqcd_version'] = os.getenv('IBM_MQ_VERSION')
     check = IbmMqCheck('ibm_mq', {}, [instance])