thanos-io · matej-g · Oct 25, 2022 · Oct 25, 2022 · Oct 25, 2022
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -18,6 +18,8 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
 
 ### Changed
 
+- [#5818] Receive: Don't batch requests which are already replicated, this improves performance by removing unecessary operation. *breaking :warning: If you have replication enabled, ensure all your receive nodes are already running at least `v0.28.0` prior to updating to this version.*
+
 ## [v0.29.0](https://github.com/thanos-io/thanos/tree/release-0.29) - Release in progress
 
 ### Fixed

diff --git a/pkg/receive/handler.go b/pkg/receive/handler.go
@@ -552,14 +552,24 @@ func (h *Handler) forward(ctx context.Context, tenant string, r replica, wreq *p
 		return errors.New("hashring is not ready")
 	}
 
+	wreqs := make(map[endpointReplica]*prompb.WriteRequest)
+
+	// If request was already replicated, we know it's intended for
+	// current endpoint, so we can go to local write directly (taken care of
+	// in fanoutForward)
+	if r.replicated {
+		wreqs[endpointReplica{endpoint: h.options.Endpoint, replica: r}] = wreq
+		h.mtx.RUnlock()
+		return h.fanoutForward(ctx, tenant, wreqs, 1)
+	}
+
 	// Batch all of the time series in the write request
 	// into several smaller write requests that are
 	// grouped by target endpoint. This ensures that
 	// for any incoming write request to a node,
 	// at most one outgoing write request will be made
 	// to every other node in the hashring, rather than
 	// one request per time series.
-	wreqs := make(map[endpointReplica]*prompb.WriteRequest)
 	for i := range wreq.Timeseries {
 		endpoint, err := h.hashring.GetN(tenant, &wreq.Timeseries[i], r.n)
 		if err != nil {
@@ -573,8 +583,8 @@ func (h *Handler) forward(ctx context.Context, tenant string, r replica, wreq *p
 		wr := wreqs[key]
 		wr.Timeseries = append(wr.Timeseries, wreq.Timeseries[i])
 	}
-	h.mtx.RUnlock()
 
+	h.mtx.RUnlock()
 	return h.fanoutForward(ctx, tenant, wreqs, len(wreqs))
 }
 

diff --git a/pkg/receive/handler_test.go b/pkg/receive/handler_test.go
@@ -1425,6 +1425,40 @@ func benchmarkHandlerMultiTSDBReceiveRemoteWrite(b testutil.TB) {
 				}
 			})
 
+			handler.options.DefaultTenantID = fmt.Sprintf("%v-ok-w-replicated", tcase.name)
+			handler.options.ReplicaHeader = "test-header"
+			handler.writer.multiTSDB = &tsOverrideTenantStorage{TenantStorage: m, interval: 1}
+
+			// It takes time to create new tenant, wait for it.
+			{
+				app, err := m.TenantAppendable(handler.options.DefaultTenantID)
+				testutil.Ok(b, err)
+
+				ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+				defer cancel()
+
+				testutil.Ok(b, runutil.Retry(1*time.Second, ctx.Done(), func() error {
+					_, err = app.Appender(ctx)
+					return err
+				}))
+			}
+
+			b.Run("OK-with-replicated", func(b testutil.TB) {
+				n := b.N()
+				headers := make(http.Header)
+				headers.Set(handler.options.ReplicaHeader, "1")
+				b.ResetTimer()
+				for i := 0; i < n; i++ {
+					r := httptest.NewRecorder()
+					handler.receiveHTTP(r, &http.Request{
+						ContentLength: int64(len(tcase.writeRequest)),
+						Body:          io.NopCloser(bytes.NewReader(tcase.writeRequest)),
+						Header:        headers,
+					})
+					testutil.Equals(b, http.StatusOK, r.Code, "got non 200 error: %v", r.Body.String())
+				}
+			})
+
 			handler.options.DefaultTenantID = fmt.Sprintf("%v-conflicting", tcase.name)
 			handler.writer.multiTSDB = &tsOverrideTenantStorage{TenantStorage: m, interval: -1} // Timestamp can't go down, which will cause conflict error.