From ed2aa399ec92bb90803c165a19e1d66309ab139b Mon Sep 17 00:00:00 2001
From: Matthijs Douze <matthijs@meta.com>
Date: Tue, 23 May 2023 07:20:35 -0700
Subject: [PATCH] move by_residual to IndexIVF (#2870)

Summary:
Pull Request resolved: https://github.com/facebookresearch/faiss/pull/2870

Factor by_residual for all the IndexIVF inheritors.
Some training code can be put in IndexIVF and `train_residual` is replaced with `train_encoder`.

This will be used for the IndependentQuantizer work.

Reviewed By: alexanderguzhva

Differential Revision: D45987304

fbshipit-source-id: c79e9f21e66e185480b93b6c149841ad90bb92a1
---
 c_api/IndexIVF_c.cpp                        | 11 ++++
 c_api/IndexIVF_c.h                          |  6 ++
 c_api/IndexScalarQuantizer_c.cpp            | 10 ---
 c_api/IndexScalarQuantizer_c.h              |  5 --
 faiss/IndexIVF.cpp                          | 42 ++++++++++--
 faiss/IndexIVF.h                            | 19 ++++--
 faiss/IndexIVFAdditiveQuantizer.cpp         | 26 +++-----
 faiss/IndexIVFAdditiveQuantizer.h           |  5 +-
 faiss/IndexIVFAdditiveQuantizerFastScan.cpp | 46 +++----------
 faiss/IndexIVFAdditiveQuantizerFastScan.h   |  4 +-
 faiss/IndexIVFFastScan.cpp                  |  3 +
 faiss/IndexIVFFastScan.h                    |  1 -
 faiss/IndexIVFFlat.cpp                      |  7 ++
 faiss/IndexIVFFlat.h                        |  2 +-
 faiss/IndexIVFPQ.cpp                        | 72 +++------------------
 faiss/IndexIVFPQ.h                          |  7 +-
 faiss/IndexIVFPQFastScan.cpp                | 52 +++------------
 faiss/IndexIVFPQFastScan.h                  |  4 +-
 faiss/IndexIVFPQR.cpp                       | 37 ++++++++---
 faiss/IndexIVFPQR.h                         |  4 +-
 faiss/IndexIVFSpectralHash.cpp              | 22 +++----
 faiss/IndexIVFSpectralHash.h                | 12 ++--
 faiss/IndexScalarQuantizer.cpp              | 23 ++++---
 faiss/IndexScalarQuantizer.h                |  7 +-
 faiss/gpu/GpuIndexIVFScalarQuantizer.cu     | 12 +++-
 faiss/impl/FaissException.h                 | 18 ++++++
 faiss/impl/ScalarQuantizer.cpp              | 26 --------
 faiss/impl/ScalarQuantizer.h                |  8 ---
 faiss/impl/index_write.cpp                  |  2 +
 29 files changed, 224 insertions(+), 269 deletions(-)

diff --git a/c_api/IndexIVF_c.cpp b/c_api/IndexIVF_c.cpp
index 5c54fb6e29..9a6f39dfa1 100644
--- a/c_api/IndexIVF_c.cpp
+++ b/c_api/IndexIVF_c.cpp
@@ -165,6 +165,17 @@ void faiss_IndexIVF_invlists_get_ids(
     memcpy(invlist, list, list_size * sizeof(idx_t));
 }
 
+int faiss_IndexIVF_train_encoder(
+        FaissIndexIVF* index,
+        idx_t n,
+        const float* x,
+        const idx_t* assign) {
+    try {
+        reinterpret_cast<IndexIVF*>(index)->train_encoder(n, x, assign);
+    }
+    CATCH_AND_HANDLE
+}
+
 void faiss_IndexIVFStats_reset(FaissIndexIVFStats* stats) {
     reinterpret_cast<IndexIVFStats*>(stats)->reset();
 }
diff --git a/c_api/IndexIVF_c.h b/c_api/IndexIVF_c.h
index 3ef8a9ad7f..98a09c2668 100644
--- a/c_api/IndexIVF_c.h
+++ b/c_api/IndexIVF_c.h
@@ -154,6 +154,12 @@ void faiss_IndexIVF_invlists_get_ids(
         size_t list_no,
         idx_t* invlist);
 
+int faiss_IndexIVF_train_encoder(
+        FaissIndexIVF* index,
+        idx_t n,
+        const float* x,
+        const idx_t* assign);
+
 typedef struct FaissIndexIVFStats {
     size_t nq;                // nb of queries run
     size_t nlist;             // nb of inverted lists scanned
diff --git a/c_api/IndexScalarQuantizer_c.cpp b/c_api/IndexScalarQuantizer_c.cpp
index 5c00e342c3..9f3393e831 100644
--- a/c_api/IndexScalarQuantizer_c.cpp
+++ b/c_api/IndexScalarQuantizer_c.cpp
@@ -110,13 +110,3 @@ int faiss_IndexIVFScalarQuantizer_add_core(
     }
     CATCH_AND_HANDLE
 }
-
-int faiss_IndexIVFScalarQuantizer_train_residual(
-        FaissIndexIVFScalarQuantizer* index,
-        idx_t n,
-        const float* x) {
-    try {
-        reinterpret_cast<IndexIVFScalarQuantizer*>(index)->train_residual(n, x);
-    }
-    CATCH_AND_HANDLE
-}
diff --git a/c_api/IndexScalarQuantizer_c.h b/c_api/IndexScalarQuantizer_c.h
index becdb201e0..2c5e3f2942 100644
--- a/c_api/IndexScalarQuantizer_c.h
+++ b/c_api/IndexScalarQuantizer_c.h
@@ -88,11 +88,6 @@ int faiss_IndexIVFScalarQuantizer_add_core(
         const idx_t* xids,
         const idx_t* precomputed_idx);
 
-int faiss_IndexIVFScalarQuantizer_train_residual(
-        FaissIndexIVFScalarQuantizer* index,
-        idx_t n,
-        const float* x);
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/faiss/IndexIVF.cpp b/faiss/IndexIVF.cpp
index 19e18e4666..aeaca78011 100644
--- a/faiss/IndexIVF.cpp
+++ b/faiss/IndexIVF.cpp
@@ -1061,22 +1061,52 @@ void IndexIVF::update_vectors(int n, const idx_t* new_ids, const float* x) {
 }
 
 void IndexIVF::train(idx_t n, const float* x) {
-    if (verbose)
+    if (verbose) {
         printf("Training level-1 quantizer\n");
+    }
 
     train_q1(n, x, verbose, metric_type);
 
-    if (verbose)
+    if (verbose) {
         printf("Training IVF residual\n");
+    }
+
+    // optional subsampling
+    idx_t max_nt = train_encoder_num_vectors();
+    if (max_nt <= 0) {
+        max_nt = (size_t)1 << 35;
+    }
+
+    TransformedVectors tv(
+            x, fvecs_maybe_subsample(d, (size_t*)&n, max_nt, x, verbose));
+
+    if (by_residual) {
+        std::vector<idx_t> assign(n);
+        quantizer->assign(n, tv.x, assign.data());
+
+        std::vector<float> residuals(n * d);
+        quantizer->compute_residual_n(n, tv.x, residuals.data(), assign.data());
+
+        train_encoder(n, residuals.data(), assign.data());
+    } else {
+        train_encoder(n, tv.x, nullptr);
+    }
 
-    train_residual(n, x);
     is_trained = true;
 }
 
-void IndexIVF::train_residual(idx_t /*n*/, const float* /*x*/) {
-    if (verbose)
-        printf("IndexIVF: no residual training\n");
+idx_t IndexIVF::train_encoder_num_vectors() const {
+    return 0;
+}
+
+void IndexIVF::train_encoder(
+        idx_t /*n*/,
+        const float* /*x*/,
+        const idx_t* assign) {
     // does nothing by default
+    if (verbose) {
+        printf("IndexIVF: no residual training\n");
+    }
 }
 
 bool check_compatible_for_merge_expensive_check = true;
diff --git a/faiss/IndexIVF.h b/faiss/IndexIVF.h
index ade8b5113d..a4a40194f9 100644
--- a/faiss/IndexIVF.h
+++ b/faiss/IndexIVF.h
@@ -177,6 +177,7 @@ struct IndexIVF : Index, IndexIVFInterface {
     bool own_invlists = false;
 
     size_t code_size = 0; ///< code size per vector in bytes
+
     /** Parallel mode determines how queries are parallelized with OpenMP
      *
      * 0 (default): split over queries
@@ -194,6 +195,10 @@ struct IndexIVF : Index, IndexIVFInterface {
      *  enables reconstruct() */
     DirectMap direct_map;
 
+    /// do the codes in the invlists encode the vectors relative to the
+    /// centroids?
+    bool by_residual = true;
+
     /** The Inverted file takes a quantizer (an Index) on input,
      * which implements the function mapping a vector to a list
      * identifier.
@@ -207,7 +212,7 @@ struct IndexIVF : Index, IndexIVFInterface {
 
     void reset() override;
 
-    /// Trains the quantizer and calls train_residual to train sub-quantizers
+    /// Trains the quantizer and calls train_encoder to train sub-quantizers
     void train(idx_t n, const float* x) override;
 
     /// Calls add_with_ids with NULL ids
@@ -252,9 +257,15 @@ struct IndexIVF : Index, IndexIVFInterface {
      */
     void add_sa_codes(idx_t n, const uint8_t* codes, const idx_t* xids);
 
-    /// Sub-classes that encode the residuals can train their encoders here
-    /// does nothing by default
-    virtual void train_residual(idx_t n, const float* x);
+    /** Train the encoder for the vectors.
+     *
+     * If by_residual then it is called with residuals and corresponding assign
+     * array, otherwise x is the raw training vectors and assign=nullptr */
+    virtual void train_encoder(idx_t n, const float* x, const idx_t* assign);
+
+    /// can be redefined by subclasses to indicate how many training vectors
+    /// they need
+    virtual idx_t train_encoder_num_vectors() const;
 
     void search_preassigned(
             idx_t n,
diff --git a/faiss/IndexIVFAdditiveQuantizer.cpp b/faiss/IndexIVFAdditiveQuantizer.cpp
index 0fa836aa08..54779792b8 100644
--- a/faiss/IndexIVFAdditiveQuantizer.cpp
+++ b/faiss/IndexIVFAdditiveQuantizer.cpp
@@ -37,30 +37,20 @@ IndexIVFAdditiveQuantizer::IndexIVFAdditiveQuantizer(
 IndexIVFAdditiveQuantizer::IndexIVFAdditiveQuantizer(AdditiveQuantizer* aq)
         : IndexIVF(), aq(aq) {}
 
-void IndexIVFAdditiveQuantizer::train_residual(idx_t n, const float* x) {
-    const float* x_in = x;
+void IndexIVFAdditiveQuantizer::train_encoder(
+        idx_t n,
+        const float* x,
+        const idx_t* assign) {
+    aq->train(n, x);
+}
 
+idx_t IndexIVFAdditiveQuantizer::train_encoder_num_vectors() const {
     size_t max_train_points = 1024 * ((size_t)1 << aq->nbits[0]);
     // we need more data to train LSQ
     if (dynamic_cast<LocalSearchQuantizer*>(aq)) {
         max_train_points = 1024 * aq->M * ((size_t)1 << aq->nbits[0]);
     }
-
-    x = fvecs_maybe_subsample(
-            d, (size_t*)&n, max_train_points, x, verbose, 1234);
-    ScopeDeleter<float> del_x(x_in == x ? nullptr : x);
-
-    if (by_residual) {
-        std::vector<idx_t> idx(n);
-        quantizer->assign(n, x, idx.data());
-
-        std::vector<float> residuals(n * d);
-        quantizer->compute_residual_n(n, x, residuals.data(), idx.data());
-
-        aq->train(n, residuals.data());
-    } else {
-        aq->train(n, x);
-    }
+    return max_train_points;
 }
 
 void IndexIVFAdditiveQuantizer::encode_vectors(
diff --git a/faiss/IndexIVFAdditiveQuantizer.h b/faiss/IndexIVFAdditiveQuantizer.h
index 483f5e4b6e..d065947d09 100644
--- a/faiss/IndexIVFAdditiveQuantizer.h
+++ b/faiss/IndexIVFAdditiveQuantizer.h
@@ -26,7 +26,6 @@ namespace faiss {
 struct IndexIVFAdditiveQuantizer : IndexIVF {
     // the quantizer
     AdditiveQuantizer* aq;
-    bool by_residual = true;
     int use_precomputed_table = 0; // for future use
 
     using Search_type_t = AdditiveQuantizer::Search_type_t;
@@ -40,7 +39,9 @@ struct IndexIVFAdditiveQuantizer : IndexIVF {
 
     explicit IndexIVFAdditiveQuantizer(AdditiveQuantizer* aq);
 
-    void train_residual(idx_t n, const float* x) override;
+    void train_encoder(idx_t n, const float* x, const idx_t* assign) override;
+
+    idx_t train_encoder_num_vectors() const override;
 
     void encode_vectors(
             idx_t n,
diff --git a/faiss/IndexIVFAdditiveQuantizerFastScan.cpp b/faiss/IndexIVFAdditiveQuantizerFastScan.cpp
index e44e70c77c..2f41f87bcc 100644
--- a/faiss/IndexIVFAdditiveQuantizerFastScan.cpp
+++ b/faiss/IndexIVFAdditiveQuantizerFastScan.cpp
@@ -131,45 +131,20 @@ IndexIVFAdditiveQuantizerFastScan::~IndexIVFAdditiveQuantizerFastScan() {}
  * Training
  *********************************************************/
 
-void IndexIVFAdditiveQuantizerFastScan::train_residual(
+idx_t IndexIVFAdditiveQuantizerFastScan::train_encoder_num_vectors() const {
+    return max_train_points;
+}
+
+void IndexIVFAdditiveQuantizerFastScan::train_encoder(
         idx_t n,
-        const float* x_in) {
+        const float* x,
+        const idx_t* assign) {
     if (aq->is_trained) {
         return;
     }
 
-    const int seed = 0x12345;
-    size_t nt = n;
-    const float* x = fvecs_maybe_subsample(
-            d, &nt, max_train_points, x_in, verbose, seed);
-    n = nt;
     if (verbose) {
-        printf("training additive quantizer on %zd vectors\n", nt);
-    }
-    aq->verbose = verbose;
-
-    std::unique_ptr<float[]> del_x;
-    if (x != x_in) {
-        del_x.reset((float*)x);
-    }
-
-    const float* trainset;
-    std::vector<float> residuals(n * d);
-    std::vector<idx_t> assign(n);
-
-    if (by_residual) {
-        if (verbose) {
-            printf("computing residuals\n");
-        }
-        quantizer->assign(n, x, assign.data());
-        residuals.resize(n * d);
-        for (idx_t i = 0; i < n; i++) {
-            quantizer->compute_residual(
-                    x + i * d, residuals.data() + i * d, assign[i]);
-        }
-        trainset = residuals.data();
-    } else {
-        trainset = x;
+        printf("training additive quantizer on %d vectors\n", int(n));
     }
 
     if (verbose) {
@@ -181,17 +156,16 @@ void IndexIVFAdditiveQuantizerFastScan::train_residual(
                d);
     }
     aq->verbose = verbose;
-    aq->train(n, trainset);
+    aq->train(n, x);
 
     // train norm quantizer
     if (by_residual && metric_type == METRIC_L2) {
         std::vector<float> decoded_x(n * d);
         std::vector<uint8_t> x_codes(n * aq->code_size);
-        aq->compute_codes(residuals.data(), x_codes.data(), n);
+        aq->compute_codes(x, x_codes.data(), n);
         aq->decode(x_codes.data(), decoded_x.data(), n);
 
         // add coarse centroids
-        FAISS_THROW_IF_NOT(assign.size() == n);
         std::vector<float> centroid(d);
         for (idx_t i = 0; i < n; i++) {
             auto xi = decoded_x.data() + i * d;
diff --git a/faiss/IndexIVFAdditiveQuantizerFastScan.h b/faiss/IndexIVFAdditiveQuantizerFastScan.h
index 7a70a3ba46..24ce7287ec 100644
--- a/faiss/IndexIVFAdditiveQuantizerFastScan.h
+++ b/faiss/IndexIVFAdditiveQuantizerFastScan.h
@@ -63,7 +63,9 @@ struct IndexIVFAdditiveQuantizerFastScan : IndexIVFFastScan {
             const IndexIVFAdditiveQuantizer& orig,
             int bbs = 32);
 
-    void train_residual(idx_t n, const float* x) override;
+    void train_encoder(idx_t n, const float* x, const idx_t* assign) override;
+
+    idx_t train_encoder_num_vectors() const override;
 
     void estimate_norm_scale(idx_t n, const float* x);
 
diff --git a/faiss/IndexIVFFastScan.cpp b/faiss/IndexIVFFastScan.cpp
index 701edc1fd7..800172cc9e 100644
--- a/faiss/IndexIVFFastScan.cpp
+++ b/faiss/IndexIVFFastScan.cpp
@@ -43,6 +43,8 @@ IndexIVFFastScan::IndexIVFFastScan(
         size_t code_size,
         MetricType metric)
         : IndexIVF(quantizer, d, nlist, code_size, metric) {
+    // unlike other indexes, we prefer no residuals for performance reasons.
+    by_residual = false;
     FAISS_THROW_IF_NOT(metric == METRIC_L2 || metric == METRIC_INNER_PRODUCT);
 }
 
@@ -50,6 +52,7 @@ IndexIVFFastScan::IndexIVFFastScan() {
     bbs = 0;
     M2 = 0;
     is_trained = false;
+    by_residual = false;
 }
 
 void IndexIVFFastScan::init_fastscan(
diff --git a/faiss/IndexIVFFastScan.h b/faiss/IndexIVFFastScan.h
index fd7d021137..c1a6b506c1 100644
--- a/faiss/IndexIVFFastScan.h
+++ b/faiss/IndexIVFFastScan.h
@@ -45,7 +45,6 @@ struct IndexIVFFastScan : IndexIVF {
     int implem = 0;
     // skip some parts of the computation (for timing)
     int skip = 0;
-    bool by_residual = false;
 
     // batching factors at search time (0 = default)
     int qbs = 0;
diff --git a/faiss/IndexIVFFlat.cpp b/faiss/IndexIVFFlat.cpp
index 03f8cb0dc4..a6f090c2c9 100644
--- a/faiss/IndexIVFFlat.cpp
+++ b/faiss/IndexIVFFlat.cpp
@@ -36,6 +36,11 @@ IndexIVFFlat::IndexIVFFlat(
         MetricType metric)
         : IndexIVF(quantizer, d, nlist, sizeof(float) * d, metric) {
     code_size = sizeof(float) * d;
+    by_residual = false;
+}
+
+IndexIVFFlat::IndexIVFFlat() {
+    by_residual = false;
 }
 
 void IndexIVFFlat::add_core(
@@ -45,6 +50,7 @@ void IndexIVFFlat::add_core(
         const int64_t* coarse_idx) {
     FAISS_THROW_IF_NOT(is_trained);
     FAISS_THROW_IF_NOT(coarse_idx);
+    FAISS_THROW_IF_NOT(!by_residual);
     assert(invlists);
     direct_map.check_can_add(xids);
 
@@ -89,6 +95,7 @@ void IndexIVFFlat::encode_vectors(
         const idx_t* list_nos,
         uint8_t* codes,
         bool include_listnos) const {
+    FAISS_THROW_IF_NOT(!by_residual);
     if (!include_listnos) {
         memcpy(codes, x, code_size * n);
     } else {
diff --git a/faiss/IndexIVFFlat.h b/faiss/IndexIVFFlat.h
index 1ecc6ffc74..a0233052fa 100644
--- a/faiss/IndexIVFFlat.h
+++ b/faiss/IndexIVFFlat.h
@@ -50,7 +50,7 @@ struct IndexIVFFlat : IndexIVF {
 
     void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
 
-    IndexIVFFlat() {}
+    IndexIVFFlat();
 };
 
 struct IndexIVFFlatDedup : IndexIVFFlat {
diff --git a/faiss/IndexIVFPQ.cpp b/faiss/IndexIVFPQ.cpp
index fd91738ad1..60633cc41b 100644
--- a/faiss/IndexIVFPQ.cpp
+++ b/faiss/IndexIVFPQ.cpp
@@ -64,74 +64,16 @@ IndexIVFPQ::IndexIVFPQ(
 /****************************************************************
  * training                                                     */
 
-void IndexIVFPQ::train_residual(idx_t n, const float* x) {
-    train_residual_o(n, x, nullptr);
-}
-
-void IndexIVFPQ::train_residual_o(idx_t n, const float* x, float* residuals_2) {
-    const float* x_in = x;
-
-    x = fvecs_maybe_subsample(
-            d,
-            (size_t*)&n,
-            pq.cp.max_points_per_centroid * pq.ksub,
-            x,
-            verbose,
-            pq.cp.seed);
-
-    ScopeDeleter<float> del_x(x_in == x ? nullptr : x);
-
-    const float* trainset;
-    ScopeDeleter<float> del_residuals;
-    if (by_residual) {
-        if (verbose)
-            printf("computing residuals\n");
-        idx_t* assign = new idx_t[n]; // assignement to coarse centroids
-        ScopeDeleter<idx_t> del(assign);
-        quantizer->assign(n, x, assign);
-        float* residuals = new float[n * d];
-        del_residuals.set(residuals);
-        for (idx_t i = 0; i < n; i++)
-            quantizer->compute_residual(
-                    x + i * d, residuals + i * d, assign[i]);
-
-        trainset = residuals;
-    } else {
-        trainset = x;
-    }
-    if (verbose)
-        printf("training %zdx%zd product quantizer on %" PRId64
-               " vectors in %dD\n",
-               pq.M,
-               pq.ksub,
-               n,
-               d);
-    pq.verbose = verbose;
-    pq.train(n, trainset);
+void IndexIVFPQ::train_encoder(idx_t n, const float* x, const idx_t* assign) {
+    pq.train(n, x);
 
     if (do_polysemous_training) {
         if (verbose)
             printf("doing polysemous training for PQ\n");
         PolysemousTraining default_pt;
-        PolysemousTraining* pt = polysemous_training;
-        if (!pt)
-            pt = &default_pt;
-        pt->optimize_pq_for_hamming(pq, n, trainset);
-    }
-
-    // prepare second-level residuals for refine PQ
-    if (residuals_2) {
-        uint8_t* train_codes = new uint8_t[pq.code_size * n];
-        ScopeDeleter<uint8_t> del(train_codes);
-        pq.compute_codes(trainset, train_codes, n);
-
-        for (idx_t i = 0; i < n; i++) {
-            const float* xx = trainset + i * d;
-            float* res = residuals_2 + i * d;
-            pq.decode(train_codes + i * pq.code_size, res);
-            for (int j = 0; j < d; j++)
-                res[j] = xx[j] - res[j];
-        }
+        PolysemousTraining* pt =
+                polysemous_training ? polysemous_training : &default_pt;
+        pt->optimize_pq_for_hamming(pq, n, x);
     }
 
     if (by_residual) {
@@ -139,6 +81,10 @@ void IndexIVFPQ::train_residual_o(idx_t n, const float* x, float* residuals_2) {
     }
 }
 
+idx_t IndexIVFPQ::train_encoder_num_vectors() const {
+    return pq.cp.max_points_per_centroid * pq.ksub;
+}
+
 /****************************************************************
  * IVFPQ as codec                                               */
 
diff --git a/faiss/IndexIVFPQ.h b/faiss/IndexIVFPQ.h
index 58c85fa27b..ab49f1e549 100644
--- a/faiss/IndexIVFPQ.h
+++ b/faiss/IndexIVFPQ.h
@@ -32,8 +32,6 @@ FAISS_API extern size_t precomputed_table_max_bytes;
  * vector is encoded as a product quantizer code.
  */
 struct IndexIVFPQ : IndexIVF {
-    bool by_residual; ///< Encode residual or plain vector?
-
     ProductQuantizer pq; ///< produces the codes
 
     bool do_polysemous_training; ///< reorder PQ centroids after training?
@@ -86,10 +84,9 @@ struct IndexIVFPQ : IndexIVF {
             const idx_t* precomputed_idx = nullptr);
 
     /// trains the product quantizer
-    void train_residual(idx_t n, const float* x) override;
+    void train_encoder(idx_t n, const float* x, const idx_t* assign) override;
 
-    /// same as train_residual, also output 2nd level residuals
-    void train_residual_o(idx_t n, const float* x, float* residuals_2);
+    idx_t train_encoder_num_vectors() const override;
 
     void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons)
             const override;
diff --git a/faiss/IndexIVFPQFastScan.cpp b/faiss/IndexIVFPQFastScan.cpp
index 07d88bf50e..b44b71ec67 100644
--- a/faiss/IndexIVFPQFastScan.cpp
+++ b/faiss/IndexIVFPQFastScan.cpp
@@ -44,7 +44,7 @@ IndexIVFPQFastScan::IndexIVFPQFastScan(
         MetricType metric,
         int bbs)
         : IndexIVFFastScan(quantizer, d, nlist, 0, metric), pq(d, M, nbits) {
-    by_residual = false; // set to false by default because it's much faster
+    by_residual = false; // set to false by default because it's faster
 
     init_fastscan(M, nbits, nlist, metric, bbs);
 }
@@ -106,54 +106,22 @@ IndexIVFPQFastScan::IndexIVFPQFastScan(const IndexIVFPQ& orig, int bbs)
  * Training
  *********************************************************/
 
-void IndexIVFPQFastScan::train_residual(idx_t n, const float* x_in) {
-    const float* x = fvecs_maybe_subsample(
-            d,
-            (size_t*)&n,
-            pq.cp.max_points_per_centroid * pq.ksub,
-            x_in,
-            verbose,
-            pq.cp.seed);
-
-    std::unique_ptr<float[]> del_x;
-    if (x != x_in) {
-        del_x.reset((float*)x);
-    }
-
-    const float* trainset;
-    AlignedTable<float> residuals;
-
-    if (by_residual) {
-        if (verbose)
-            printf("computing residuals\n");
-        std::vector<idx_t> assign(n);
-        quantizer->assign(n, x, assign.data());
-        residuals.resize(n * d);
-        for (idx_t i = 0; i < n; i++) {
-            quantizer->compute_residual(
-                    x + i * d, residuals.data() + i * d, assign[i]);
-        }
-        trainset = residuals.data();
-    } else {
-        trainset = x;
-    }
-
-    if (verbose) {
-        printf("training %zdx%zd product quantizer on "
-               "%" PRId64 " vectors in %dD\n",
-               pq.M,
-               pq.ksub,
-               n,
-               d);
-    }
+void IndexIVFPQFastScan::train_encoder(
+        idx_t n,
+        const float* x,
+        const idx_t* assign) {
     pq.verbose = verbose;
-    pq.train(n, trainset);
+    pq.train(n, x);
 
     if (by_residual && metric_type == METRIC_L2) {
         precompute_table();
     }
 }
 
+idx_t IndexIVFPQFastScan::train_encoder_num_vectors() const {
+    return pq.cp.max_points_per_centroid * pq.ksub;
+}
+
 void IndexIVFPQFastScan::precompute_table() {
     initialize_IVFPQ_precomputed_table(
             use_precomputed_table,
diff --git a/faiss/IndexIVFPQFastScan.h b/faiss/IndexIVFPQFastScan.h
index 55c5430b64..9a79833591 100644
--- a/faiss/IndexIVFPQFastScan.h
+++ b/faiss/IndexIVFPQFastScan.h
@@ -54,7 +54,9 @@ struct IndexIVFPQFastScan : IndexIVFFastScan {
     // built from an IndexIVFPQ
     explicit IndexIVFPQFastScan(const IndexIVFPQ& orig, int bbs = 32);
 
-    void train_residual(idx_t n, const float* x) override;
+    void train_encoder(idx_t n, const float* x, const idx_t* assign) override;
+
+    idx_t train_encoder_num_vectors() const override;
 
     /// build precomputed table, possibly updating use_precomputed_table
     void precompute_table();
diff --git a/faiss/IndexIVFPQR.cpp b/faiss/IndexIVFPQR.cpp
index f60302396d..5a5b88d94d 100644
--- a/faiss/IndexIVFPQR.cpp
+++ b/faiss/IndexIVFPQR.cpp
@@ -35,10 +35,12 @@ IndexIVFPQR::IndexIVFPQR(
           refine_pq(d, M_refine, nbits_per_idx_refine),
           k_factor(4) {
     by_residual = true;
+    refine_pq.cp.max_points_per_centroid = 1000;
 }
 
 IndexIVFPQR::IndexIVFPQR() : k_factor(1) {
     by_residual = true;
+    refine_pq.cp.max_points_per_centroid = 1000;
 }
 
 void IndexIVFPQR::reset() {
@@ -46,24 +48,39 @@ void IndexIVFPQR::reset() {
     refine_codes.clear();
 }
 
-void IndexIVFPQR::train_residual(idx_t n, const float* x) {
-    float* residual_2 = new float[n * d];
-    ScopeDeleter<float> del(residual_2);
-
-    train_residual_o(n, x, residual_2);
-
-    if (verbose)
+void IndexIVFPQR::train_encoder(idx_t n, const float* x, const idx_t* assign) {
+    IndexIVFPQ::train_encoder(n, x, assign);
+    if (verbose) {
         printf("training %zdx%zd 2nd level PQ quantizer on %" PRId64
                " %dD-vectors\n",
                refine_pq.M,
                refine_pq.ksub,
                n,
                d);
-
-    refine_pq.cp.max_points_per_centroid = 1000;
+    }
     refine_pq.cp.verbose = verbose;
 
-    refine_pq.train(n, residual_2);
+    // 2nd level residual
+    std::vector<float> residual_2(n * d);
+    std::vector<uint8_t> train_codes(pq.code_size * n);
+    pq.compute_codes(x, train_codes.data(), n);
+
+    for (idx_t i = 0; i < n; i++) {
+        const float* xx = x + i * d;
+        float* res = residual_2.data() + i * d;
+        pq.decode(train_codes.data() + i * pq.code_size, res);
+        for (int j = 0; j < d; j++) {
+            res[j] = xx[j] - res[j];
+        }
+    }
+
+    refine_pq.train(n, residual_2.data());
+}
+
+idx_t IndexIVFPQR::train_encoder_num_vectors() const {
+    return std::max(
+            pq.cp.max_points_per_centroid * pq.ksub,
+            refine_pq.cp.max_points_per_centroid * refine_pq.ksub);
 }
 
 void IndexIVFPQR::add_with_ids(idx_t n, const float* x, const idx_t* xids) {
diff --git a/faiss/IndexIVFPQR.h b/faiss/IndexIVFPQR.h
index 55756f59f9..73502879f2 100644
--- a/faiss/IndexIVFPQR.h
+++ b/faiss/IndexIVFPQR.h
@@ -37,7 +37,9 @@ struct IndexIVFPQR : IndexIVFPQ {
     size_t remove_ids(const IDSelector& sel) override;
 
     /// trains the two product quantizers
-    void train_residual(idx_t n, const float* x) override;
+    void train_encoder(idx_t n, const float* x, const idx_t* assign) override;
+
+    idx_t train_encoder_num_vectors() const override;
 
     void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
 
diff --git a/faiss/IndexIVFSpectralHash.cpp b/faiss/IndexIVFSpectralHash.cpp
index 61de5aecb3..443c45dee6 100644
--- a/faiss/IndexIVFSpectralHash.cpp
+++ b/faiss/IndexIVFSpectralHash.cpp
@@ -31,22 +31,17 @@ IndexIVFSpectralHash::IndexIVFSpectralHash(
         float period)
         : IndexIVF(quantizer, d, nlist, (nbit + 7) / 8, METRIC_L2),
           nbit(nbit),
-          period(period),
-          threshold_type(Thresh_global) {
+          period(period) {
     RandomRotationMatrix* rr = new RandomRotationMatrix(d, nbit);
     rr->init(1234);
     vt = rr;
-    own_fields = true;
     is_trained = false;
+    by_residual = false;
 }
 
-IndexIVFSpectralHash::IndexIVFSpectralHash()
-        : IndexIVF(),
-          vt(nullptr),
-          own_fields(false),
-          nbit(0),
-          period(0),
-          threshold_type(Thresh_global) {}
+IndexIVFSpectralHash::IndexIVFSpectralHash() : IndexIVF() {
+    by_residual = false;
+}
 
 IndexIVFSpectralHash::~IndexIVFSpectralHash() {
     if (own_fields) {
@@ -67,10 +62,14 @@ float median(size_t n, float* x) {
 
 } // namespace
 
-void IndexIVFSpectralHash::train_residual(idx_t n, const float* x) {
+void IndexIVFSpectralHash::train_encoder(
+        idx_t n,
+        const float* x,
+        const idx_t* assign) {
     if (!vt->is_trained) {
         vt->train(n, x);
     }
+    FAISS_THROW_IF_NOT(!by_residual);
 
     if (threshold_type == Thresh_global) {
         // nothing to do
@@ -167,6 +166,7 @@ void IndexIVFSpectralHash::encode_vectors(
         uint8_t* codes,
         bool include_listnos) const {
     FAISS_THROW_IF_NOT(is_trained);
+    FAISS_THROW_IF_NOT(!by_residual);
     float freq = 2.0 / period;
     size_t coarse_size = include_listnos ? coarse_code_size() : 0;
 
diff --git a/faiss/IndexIVFSpectralHash.h b/faiss/IndexIVFSpectralHash.h
index ee464859c3..ae7df58e40 100644
--- a/faiss/IndexIVFSpectralHash.h
+++ b/faiss/IndexIVFSpectralHash.h
@@ -30,14 +30,14 @@ struct IndexPreTransform;
  */
 struct IndexIVFSpectralHash : IndexIVF {
     /// transformation from d to nbit dim
-    VectorTransform* vt;
+    VectorTransform* vt = nullptr;
     /// own the vt
-    bool own_fields;
+    bool own_fields = true;
 
     /// nb of bits of the binary signature
-    int nbit;
+    int nbit = 0;
     /// interval size for 0s and 1s
-    float period;
+    float period = 0;
 
     enum ThresholdType {
         Thresh_global,        ///< global threshold at 0
@@ -45,7 +45,7 @@ struct IndexIVFSpectralHash : IndexIVF {
         Thresh_centroid_half, ///< central interval around centroid
         Thresh_median         ///< median of training set
     };
-    ThresholdType threshold_type;
+    ThresholdType threshold_type = Thresh_global;
 
     /// Trained threshold.
     /// size nlist * nbit or 0 if Thresh_global
@@ -60,7 +60,7 @@ struct IndexIVFSpectralHash : IndexIVF {
 
     IndexIVFSpectralHash();
 
-    void train_residual(idx_t n, const float* x) override;
+    void train_encoder(idx_t n, const float* x, const idx_t* assign) override;
 
     void encode_vectors(
             idx_t n,
diff --git a/faiss/IndexScalarQuantizer.cpp b/faiss/IndexScalarQuantizer.cpp
index 4189bcd034..acd3592bf9 100644
--- a/faiss/IndexScalarQuantizer.cpp
+++ b/faiss/IndexScalarQuantizer.cpp
@@ -122,21 +122,28 @@ IndexIVFScalarQuantizer::IndexIVFScalarQuantizer(
         size_t nlist,
         ScalarQuantizer::QuantizerType qtype,
         MetricType metric,
-        bool encode_residual)
-        : IndexIVF(quantizer, d, nlist, 0, metric),
-          sq(d, qtype),
-          by_residual(encode_residual) {
+        bool by_residual)
+        : IndexIVF(quantizer, d, nlist, 0, metric), sq(d, qtype) {
     code_size = sq.code_size;
+    this->by_residual = by_residual;
     // was not known at construction time
     invlists->code_size = code_size;
     is_trained = false;
 }
 
-IndexIVFScalarQuantizer::IndexIVFScalarQuantizer()
-        : IndexIVF(), by_residual(true) {}
+IndexIVFScalarQuantizer::IndexIVFScalarQuantizer() : IndexIVF() {
+    by_residual = true;
+}
+
+void IndexIVFScalarQuantizer::train_encoder(
+        idx_t n,
+        const float* x,
+        const idx_t* assign) {
+    sq.train(n, x);
+}
 
-void IndexIVFScalarQuantizer::train_residual(idx_t n, const float* x) {
-    sq.train_residual(n, x, quantizer, by_residual, verbose);
+idx_t IndexIVFScalarQuantizer::train_encoder_num_vectors() const {
+    return 100000;
 }
 
 void IndexIVFScalarQuantizer::encode_vectors(
diff --git a/faiss/IndexScalarQuantizer.h b/faiss/IndexScalarQuantizer.h
index c1e6b34f2c..c064bbeeb3 100644
--- a/faiss/IndexScalarQuantizer.h
+++ b/faiss/IndexScalarQuantizer.h
@@ -65,7 +65,6 @@ struct IndexScalarQuantizer : IndexFlatCodes {
 
 struct IndexIVFScalarQuantizer : IndexIVF {
     ScalarQuantizer sq;
-    bool by_residual;
 
     IndexIVFScalarQuantizer(
             Index* quantizer,
@@ -73,11 +72,13 @@ struct IndexIVFScalarQuantizer : IndexIVF {
             size_t nlist,
             ScalarQuantizer::QuantizerType qtype,
             MetricType metric = METRIC_L2,
-            bool encode_residual = true);
+            bool by_residual = true);
 
     IndexIVFScalarQuantizer();
 
-    void train_residual(idx_t n, const float* x) override;
+    void train_encoder(idx_t n, const float* x, const idx_t* assign) override;
+
+    idx_t train_encoder_num_vectors() const override;
 
     void encode_vectors(
             idx_t n,
diff --git a/faiss/gpu/GpuIndexIVFScalarQuantizer.cu b/faiss/gpu/GpuIndexIVFScalarQuantizer.cu
index f58c72889b..7c21a770d0 100644
--- a/faiss/gpu/GpuIndexIVFScalarQuantizer.cu
+++ b/faiss/gpu/GpuIndexIVFScalarQuantizer.cu
@@ -219,7 +219,17 @@ void GpuIndexIVFScalarQuantizer::reset() {
 
 void GpuIndexIVFScalarQuantizer::trainResiduals_(idx_t n, const float* x) {
     // The input is already guaranteed to be on the CPU
-    sq.train_residual(n, x, quantizer, by_residual, verbose);
+    if (!by_residual) {
+        sq.train(n, x);
+    } else {
+        std::vector<idx_t> assign(n);
+        quantizer->assign(n, x, assign.data());
+
+        std::vector<float> residuals(n * d);
+        quantizer->compute_residual_n(n, x, residuals.data(), assign.data());
+
+        sq.train(n, residuals.data());
+    }
 }
 
 void GpuIndexIVFScalarQuantizer::train(idx_t n, const float* x) {
diff --git a/faiss/impl/FaissException.h b/faiss/impl/FaissException.h
index bc8bb9aca6..5e5bcf1a30 100644
--- a/faiss/impl/FaissException.h
+++ b/faiss/impl/FaissException.h
@@ -1,3 +1,4 @@
+
 /**
  * Copyright (c) Facebook, Inc. and its affiliates.
  *
@@ -79,6 +80,23 @@ struct ScopeDeleter1 {
     }
 };
 
+/** RAII object for a set of possibly transformed vectors (deallocated only if
+ * they are indeed transformed)
+ */
+struct TransformedVectors {
+    const float* x;
+    bool own_x;
+    TransformedVectors(const float* x_orig, const float* x) : x(x) {
+        own_x = x_orig != x;
+    }
+
+    ~TransformedVectors() {
+        if (own_x) {
+            delete[] x;
+        }
+    }
+};
+
 /// make typeids more readable
 std::string demangle_cpp_symbol(const char* name);
 
diff --git a/faiss/impl/ScalarQuantizer.cpp b/faiss/impl/ScalarQuantizer.cpp
index a3cf4c744e..8d18907875 100644
--- a/faiss/impl/ScalarQuantizer.cpp
+++ b/faiss/impl/ScalarQuantizer.cpp
@@ -1115,32 +1115,6 @@ void ScalarQuantizer::train(size_t n, const float* x) {
     }
 }
 
-void ScalarQuantizer::train_residual(
-        size_t n,
-        const float* x,
-        Index* quantizer,
-        bool by_residual,
-        bool verbose) {
-    const float* x_in = x;
-
-    // 100k points more than enough
-    x = fvecs_maybe_subsample(d, (size_t*)&n, 100000, x, verbose, 1234);
-
-    ScopeDeleter<float> del_x(x_in == x ? nullptr : x);
-
-    if (by_residual) {
-        std::vector<idx_t> idx(n);
-        quantizer->assign(n, x, idx.data());
-
-        std::vector<float> residuals(n * d);
-        quantizer->compute_residual_n(n, x, residuals.data(), idx.data());
-
-        train(n, residuals.data());
-    } else {
-        train(n, x);
-    }
-}
-
 ScalarQuantizer::SQuantizer* ScalarQuantizer::select_quantizer() const {
 #ifdef USE_F16C
     if (d % 8 == 0) {
diff --git a/faiss/impl/ScalarQuantizer.h b/faiss/impl/ScalarQuantizer.h
index e29a1420c9..550a979092 100644
--- a/faiss/impl/ScalarQuantizer.h
+++ b/faiss/impl/ScalarQuantizer.h
@@ -65,14 +65,6 @@ struct ScalarQuantizer : Quantizer {
 
     void train(size_t n, const float* x) override;
 
-    /// Used by an IVF index to train based on the residuals
-    void train_residual(
-            size_t n,
-            const float* x,
-            Index* quantizer,
-            bool by_residual,
-            bool verbose);
-
     /** Encode a set of vectors
      *
      * @param x      vectors to encode, size n * d
diff --git a/faiss/impl/index_write.cpp b/faiss/impl/index_write.cpp
index 6ea40e1be7..d40f651c56 100644
--- a/faiss/impl/index_write.cpp
+++ b/faiss/impl/index_write.cpp
@@ -385,6 +385,8 @@ static void write_ivf_header(const IndexIVF* ivf, IOWriter* f) {
     write_index_header(ivf, f);
     WRITE1(ivf->nlist);
     WRITE1(ivf->nprobe);
+    // subclasses write by_residual (some of them support only one setting of
+    // by_residual).
     write_index(ivf->quantizer, f);
     write_direct_map(&ivf->direct_map, f);
 }