From be1098915169cfb6a87c5fbad7b90cdb078b8257 Mon Sep 17 00:00:00 2001
From: romner <roman@skotnica.com>
Date: Tue, 6 Jun 2023 19:47:07 +0200
Subject: [PATCH] Parallelize NVML PCIe TX/RX data collection

---
 src/linux/btop_collect.cpp | 45 +++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 18 deletions(-)

diff --git a/src/linux/btop_collect.cpp b/src/linux/btop_collect.cpp
index d9ef56bd..14b7a1a1 100644
--- a/src/linux/btop_collect.cpp
+++ b/src/linux/btop_collect.cpp
@@ -1011,7 +1011,8 @@ namespace Gpu {
 			if (!initialized) return false;
 
 			nvmlReturn_t result;
-			// DebugTimer gpu_nvidia("Nvidia Total");
+			std::thread pcie_tx_thread, pcie_rx_thread;
+			// DebugTimer nvTotalTimer("Nvidia Total");
 			for (unsigned int i = 0; i < device_count; ++i) {
 				if constexpr(is_init) {
 					//? Device Handle
@@ -1053,6 +1054,27 @@ namespace Gpu {
     				else gpus[i].temp_max = (long long)temp_max;
 				}
 
+				//? PCIe link speeds, the data collection takes >=20ms each call so they run on separate threads
+				if (gpus_slice[i].supported_functions.pcie_txrx) {
+					pcie_tx_thread = std::thread([gpus_slice, i]() {
+						unsigned int tx;
+						nvmlReturn_t result = nvmlDeviceGetPcieThroughput(devices[i], NVML_PCIE_UTIL_TX_BYTES, &tx);
+    					if (result != NVML_SUCCESS) {
+							Logger::warning(std::string("NVML: Failed to get PCIe TX throughput: ") + nvmlErrorString(result));
+							if constexpr(is_init) gpus_slice[i].supported_functions.pcie_txrx = false;
+						} else gpus_slice[i].pcie_tx = (long long)tx;
+					});
+
+					pcie_rx_thread = std::thread([gpus_slice, i]() {
+						unsigned int rx;
+						nvmlReturn_t result = nvmlDeviceGetPcieThroughput(devices[i], NVML_PCIE_UTIL_RX_BYTES, &rx);
+    					if (result != NVML_SUCCESS) {
+							Logger::warning(std::string("NVML: Failed to get PCIe RX throughput: ") + nvmlErrorString(result));
+						} else gpus_slice[i].pcie_rx = (long long)rx;
+					});
+				}
+
+				// DebugTimer nvTimer("Nv utilization");
 				//? GPU & memory utilization
 				if (gpus_slice[i].supported_functions.gpu_utilization) {
 					nvmlUtilization_t utilization;
@@ -1142,23 +1164,6 @@ namespace Gpu {
 					}
 				}
 
-				//nvTimer.stop_rename_reset("Nv pcie");
-				//? PCIe link speeds
-				if (gpus_slice[i].supported_functions.pcie_txrx) {
-					unsigned int tx,rx;
-					result = nvmlDeviceGetPcieThroughput(devices[i], NVML_PCIE_UTIL_TX_BYTES, &tx);
-    				if (result != NVML_SUCCESS) {
-						Logger::warning(std::string("NVML: Failed to get PCIe TX throughput: ") + nvmlErrorString(result));
-						if constexpr(is_init) gpus_slice[i].supported_functions.pcie_txrx = false;
-					} else gpus_slice[i].pcie_tx = (long long)tx;
-
-					result = nvmlDeviceGetPcieThroughput(devices[i], NVML_PCIE_UTIL_RX_BYTES, &rx);
-    				if (result != NVML_SUCCESS) {
-						Logger::warning(std::string("NVML: Failed to get PCIe RX throughput: ") + nvmlErrorString(result));
-						if constexpr(is_init) gpus_slice[i].supported_functions.pcie_txrx = false;
-					} else gpus_slice[i].pcie_rx = (long long)rx;
-				}
-
     			//? TODO: Processes using GPU
     				/*unsigned int proc_info_len;
     				nvmlProcessInfo_t* proc_info = 0;
@@ -1169,6 +1174,10 @@ namespace Gpu {
     					for (unsigned int i = 0; i < proc_info_len; ++i)
     						gpus_slice[i].graphics_processes.push_back({proc_info[i].pid, proc_info[i].usedGpuMemory});
     				}*/
+
+				// nvTimer.stop_rename_reset("Nv pcie thread join");
+				//? Join PCIE TX/RX threads
+				pcie_tx_thread.join(); pcie_rx_thread.join();
     		}
 
 			return true;