apollographql · aaronArinder · Aug 12, 2024 · Mar 14, 2024 · Mar 14, 2024 · Mar 14, 2024
@@ -72,7 +72,7 @@ assert_cmd = "2"
 assert-json-diff = "2"
 anyhow = "1"
 backtrace = "0.3"
-backoff = "0.4"
+backoff = { version = "0.4", features = [ "tokio" ]}
 base64 = "0.22"
 billboard = "0.2"
 buildstructor = "0.5.4"
@@ -195,6 +195,8 @@ tracing = { workspace = true }
 which = { workspace = true }
 uuid = { workspace = true }
 url = { workspace = true, features = ["serde"] }
+tokio = { workspace = true, features = ["rt", "rt-multi-thread", "macros"] }
+futures.workspace = true
 
 [dev-dependencies]
 assert_cmd = { workspace = true }

@@ -12,7 +12,7 @@ ariadne = { workspace = true }
 apollo-federation-types = { workspace = true }
 apollo-parser = { workspace = true }
 apollo-encoder = { workspace = true }
-backoff = { workspace = true }
+backoff = { workspace = true, features = ["tokio", "futures"] }
 buildstructor = { workspace = true }
 chrono = { workspace = true, features = ["serde"] }
 derive-getters = { workspace = true }
@@ -40,6 +40,7 @@ serde_json = { workspace = true }
 thiserror = { workspace = true }
 tracing = { workspace = true }
 regex = { workspace = true }
+tokio = { workspace = true, features = ["rt", "macros"] }
 
 [build-dependencies]
 anyhow = { workspace = true }

@@ -0,0 +1,42 @@
+# Rover Client
+
+This is the client used by rover to make network requests. This README covers aspects of the client that are useful to know when developing and using it.
+
+The rover client uses [Reqwest](https://docs.rs/reqwest/latest/reqwest/) and some familiarity with that crate is useful in both developing and using it.
+
+# Development :: WIP
+
+We're in the midst of undergoing a transition from a synchronous, blocking client used by threads to an asynchronous one used by an event loop (a tokio runtime that also uses threads, but is non-blocking). Because of that, some of the naming and ergonmics might feel weird.
+
+# Using the client
+
+## Timeouts
+
+By default, the timeout is 10s. This is set _not_ by the `MAX_ELAPSED_TIME` const in the `rover-client/src/blocking/client.rs` file, but in the `Default` implementation for `ClientTimeout` in `rover/src/utils/client.rs`. Users can pass the flag `--client-timeout` with an integer representing seconds to control the overall client timeout.
+
+## Retries
+
+### Overview
+Retries can happen for two broad reasons: either the client failed or the server failed. Retries are also enabled by default, but can be disabled by passing an argument to the client's `execute` method called `should_retry` (a boolean).
+
+Retries are also only part of the story. The interval of time you place between retries matters. If you retry all at once, you might get rate-limited or otherwise fail. It's best to spread them out exponentially, adding big chunks of time so that the server can complete its work and get ready for more work. Spreading out retries is a good idea, but if you have multiple calls happening at the same time with the same spread, they might fail if the server is overloaded. It's better to spread them out with some added noise, meaning that you spread them out with some randomly generated bit of time added or subtracted so that calls are received by the server in a somewhat distributed fashion.
+
+#### `backoff` crate
+
+We use the [backoff](https://docs.rs/backoff/latest/backoff/) crate for retries. It builds in both the spreading-out of retries in an exponential way, but also the little bit of jitter that helps the server handle many requests.
+
+The crate is interesting in handling retries not by a total amount of retries, but total amount of time. The `MAX_ELAPSED_TIME` in the client file sets this value and defaults to 10s.
+
+#### Client failures
+
+Retries happen when either the client times out (there's a flag for setting the timeout, but by default it's 10s), when there's a connection error, or when incomplete messages are received. Errors about the request or response body, decoding, building the client, or redirecting the network call aren't retried.
+
+#### Server failures
+
+Retries happen for general server errors (noteably, _all_ statuses between 500-99),  but not when the request is ill-formed as identified by the server (that is, a 400).
+
+
+
+
+
+
@@ -2,9 +2,8 @@ use std::time::Duration;
 
 use graphql_client::{Error as GraphQLError, GraphQLQuery, Response as GraphQLResponse};
 use reqwest::{
-    blocking::{Client as ReqwestClient, Response},
     header::{HeaderMap, HeaderValue},
-    StatusCode,
+    Client as ReqwestClient, Response, StatusCode,
 };
 
 use crate::error::{EndpointKind, RoverClientError};
@@ -37,7 +36,7 @@ impl GraphQLClient {
     ///
     /// Takes one argument, `variables`. Returns an optional response.
     /// Automatically retries requests.
-    pub fn post<Q>(
+    pub async fn post<Q>(
         &self,
         variables: Q::Variables,
         header_map: &mut HeaderMap,
@@ -48,15 +47,17 @@ impl GraphQLClient {
     {
         let request_body = self.get_request_body::<Q>(variables)?;
         header_map.append("Content-Type", HeaderValue::from_str(JSON_CONTENT_TYPE)?);
-        let response = self.execute(request_body, header_map, true, endpoint_kind);
-        GraphQLClient::handle_response::<Q>(response?, endpoint_kind)
+        let response = self
+            .execute(request_body, header_map, true, endpoint_kind)
+            .await;
+        GraphQLClient::handle_response::<Q>(response?, endpoint_kind).await
     }
 
     /// Client method for making a GraphQL request.
     ///
     /// Takes one argument, `variables`. Returns an optional response.
     /// Does not automatically retry requests.
-    pub fn post_no_retry<Q>(
+    pub async fn post_no_retry<Q>(
         &self,
         variables: Q::Variables,
         header_map: &mut HeaderMap,
@@ -67,8 +68,10 @@ impl GraphQLClient {
     {
         let request_body = self.get_request_body::<Q>(variables)?;
         header_map.append("Content-Type", HeaderValue::from_str(JSON_CONTENT_TYPE)?);
-        let response = self.execute(request_body, header_map, false, endpoint_kind);
-        GraphQLClient::handle_response::<Q>(response?, endpoint_kind)
+        let response = self
+            .execute(request_body, header_map, false, endpoint_kind)
+            .await;
+        GraphQLClient::handle_response::<Q>(response?, endpoint_kind).await
     }
 
     fn get_request_body<Q: GraphQLQuery>(
@@ -79,24 +82,25 @@ impl GraphQLClient {
         Ok(serde_json::to_string(&body)?)
     }
 
-    fn execute(
+    async fn execute(
         &self,
         request_body: String,
         header_map: &HeaderMap,
         should_retry: bool,
         endpoint_kind: EndpointKind,
     ) -> Result<Response, RoverClientError> {
-        use backoff::{retry, Error as BackoffError, ExponentialBackoff};
+        use backoff::{future::retry, Error as BackoffError, ExponentialBackoff};
 
         tracing::trace!(request_headers = ?header_map);
         tracing::debug!("Request Body: {}", request_body);
-        let graphql_operation = || {
+        let graphql_operation = || async {
             let response = self
                 .client
                 .post(&self.graphql_endpoint)
                 .headers(header_map.clone())
                 .body(request_body.clone())
-                .send();
+                .send()
+                .await;
 
             match response {
                 Err(client_error) => {
@@ -132,7 +136,7 @@ impl GraphQLClient {
                                 || response_status.is_redirection()
                             {
                                 if matches!(response_status, StatusCode::BAD_REQUEST) {
-                                    if let Ok(text) = success.text() {
+                                    if let Ok(text) = success.text().await {
                                         tracing::debug!("{}", text);
                                     }
                                     Err(BackoffError::Permanent(status_error))
@@ -158,18 +162,14 @@ impl GraphQLClient {
                 ..Default::default()
             };
 
-            retry(backoff_strategy, graphql_operation).map_err(|e| match e {
-                BackoffError::Permanent(reqwest_error)
-                | BackoffError::Transient {
-                    err: reqwest_error,
-                    retry_after: _,
-                } => RoverClientError::SendRequest {
-                    source: reqwest_error,
+            retry(backoff_strategy, graphql_operation)
+                .await
+                .map_err(|e| RoverClientError::SendRequest {
+                    source: e,
                     endpoint_kind,
-                },
-            })
+                })
         } else {
-            graphql_operation().map_err(|e| match e {
+            graphql_operation().await.map_err(|e| match e {
                 BackoffError::Permanent(reqwest_error)
                 | BackoffError::Transient {
                     err: reqwest_error,
@@ -190,13 +190,13 @@ impl GraphQLClient {
     /// body.data, it will also error, as this shouldn't be possible.
     ///
     /// If successful, it will return body.data, unwrapped
-    pub(crate) fn handle_response<Q: GraphQLQuery>(
+    pub(crate) async fn handle_response<Q: GraphQLQuery>(
         response: Response,
         endpoint_kind: EndpointKind,
     ) -> Result<Q::ResponseData, RoverClientError> {
         let response_status = response.status();
         tracing::debug!(response_status = ?response_status, response_headers = ?response.headers());
-        match response.json::<GraphQLResponse<Q::ResponseData>>() {
+        match response.json::<GraphQLResponse<Q::ResponseData>>().await {
             Ok(response_body) => {
                 if let Some(response_body_errors) = response_body.errors {
                     handle_graphql_body_errors(response_body_errors)?;
@@ -316,8 +316,8 @@ mod tests {
         assert_eq!(actual_error, expected_error);
     }
 
-    #[test]
-    fn test_successful_response() {
+    #[tokio::test]
+    async fn test_successful_response() {
         let server = MockServer::start();
         let success_path = "/throw-me-a-frickin-bone-here";
         let success_mock = server.mock(|when, then| {
@@ -332,21 +332,23 @@ mod tests {
             Some(Duration::from_secs(3)),
         );
 
-        let response = graphql_client.execute(
-            "{}".to_string(),
-            &HeaderMap::new(),
-            true,
-            EndpointKind::ApolloStudio,
-        );
+        let response = graphql_client
+            .execute(
+                "{}".to_string(),
+                &HeaderMap::new(),
+                true,
+                EndpointKind::ApolloStudio,
+            )
+            .await;
 
         let mock_hits = success_mock.hits();
 
         assert_eq!(mock_hits, 1);
         assert!(response.is_ok())
     }
 
-    #[test]
-    fn test_unrecoverable_server_error() {
+    #[tokio::test]
+    async fn test_unrecoverable_server_error() {
         let server = MockServer::start();
         let internal_server_error_path = "/this-is-me-in-a-nutshell";
         let internal_server_error_mock = server.mock(|when, then| {
@@ -361,21 +363,23 @@ mod tests {
             Some(Duration::from_secs(3)),
         );
 
-        let response = graphql_client.execute(
-            "{}".to_string(),
-            &HeaderMap::new(),
-            true,
-            EndpointKind::ApolloStudio,
-        );
+        let response = graphql_client
+            .execute(
+                "{}".to_string(),
+                &HeaderMap::new(),
+                true,
+                EndpointKind::ApolloStudio,
+            )
+            .await;
 
         let mock_hits = internal_server_error_mock.hits();
 
         assert!(mock_hits > 1);
         assert!(response.is_err());
     }
 
-    #[test]
-    fn test_unrecoverable_client_error() {
+    #[tokio::test]
+    async fn test_unrecoverable_client_error() {
         let server = MockServer::start();
         let not_found_path = "/austin-powers-the-musical";
         let not_found_mock = server.mock(|when, then| {
@@ -390,12 +394,14 @@ mod tests {
             Some(Duration::from_secs(3)),
         );
 
-        let response = graphql_client.execute(
-            "{}".to_string(),
-            &HeaderMap::new(),
-            true,
-            EndpointKind::ApolloStudio,
-        );
+        let response = graphql_client
+            .execute(
+                "{}".to_string(),
+                &HeaderMap::new(),
+                true,
+                EndpointKind::ApolloStudio,
+            )
+            .await;
 
         let mock_hits = not_found_mock.hits();
 
@@ -405,8 +411,8 @@ mod tests {
         assert!(error.to_string().contains("Not Found"));
     }
 
-    #[test]
-    fn test_timeout_error() {
+    #[tokio::test]
+    async fn test_timeout_error() {
         let server = MockServer::start();
         let timeout_path = "/i-timeout-easily";
         let timeout_mock = server.mock(|when, then| {
@@ -416,7 +422,7 @@ mod tests {
                 .delay(Duration::from_secs(3));
         });
 
-        let client = reqwest::blocking::ClientBuilder::new()
+        let client = reqwest::ClientBuilder::new()
             .timeout(Duration::from_secs(1))
             .build()
             .unwrap();
@@ -426,12 +432,14 @@ mod tests {
             Some(Duration::from_secs(3)),
         );
 
-        let response = graphql_client.execute(
-            "{}".to_string(),
-            &HeaderMap::new(),
-            true,
-            EndpointKind::ApolloStudio,
-        );
+        let response = graphql_client
+            .execute(
+                "{}".to_string(),
+                &HeaderMap::new(),
+                true,
+                EndpointKind::ApolloStudio,
+            )
+            .await;
 
         let mock_hits = timeout_mock.hits();