From d45ad26e4be401bb605394f6c614d04973a90bee Mon Sep 17 00:00:00 2001 From: Radu Berinde Date: Tue, 6 Dec 2016 16:29:16 -0500 Subject: [PATCH] distsql: don't use VALUE encodings in HashJoiner It looks like the VALUE encodings are not unique (they contain a column ID) so they shouldn't be used for equality testing in hash joins. --- pkg/sql/distsql/hashjoiner.go | 7 ++++++- pkg/sql/sqlbase/encoded_datum.go | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/pkg/sql/distsql/hashjoiner.go b/pkg/sql/distsql/hashjoiner.go index 7113d95ef895..b92bad7feecd 100644 --- a/pkg/sql/distsql/hashjoiner.go +++ b/pkg/sql/distsql/hashjoiner.go @@ -240,7 +240,12 @@ func (h *hashJoiner) encode( if row[colIdx].IsNull() { return nil, true, nil } - appendTo, err = row[colIdx].Encode(&h.datumAlloc, sqlbase.DatumEncoding_VALUE, appendTo) + // Note: we cannot compare VALUE encodings because they contain column IDs + // which can vary. + // TODO(radu): we should figure out what encoding is readily available and + // use that (though it needs to be consistent across all rows). We could add + // functionality to compare VALUE encodings ignoring the column ID. + appendTo, err = row[colIdx].Encode(&h.datumAlloc, sqlbase.DatumEncoding_ASCENDING_KEY, appendTo) if err != nil { return appendTo, false, err } diff --git a/pkg/sql/sqlbase/encoded_datum.go b/pkg/sql/sqlbase/encoded_datum.go index 85a9bb3d6bf7..f1278dd72b07 100644 --- a/pkg/sql/sqlbase/encoded_datum.go +++ b/pkg/sql/sqlbase/encoded_datum.go @@ -197,6 +197,8 @@ func (ed *EncDatum) Encoding() (DatumEncoding, bool) { // Encode appends the encoded datum to the given slice using the requested // encoding. +// Note: DatumEncoding_VALUE encodings are not unique because they can contain +// a column ID so they should not be used to test for equality. func (ed *EncDatum) Encode(a *DatumAlloc, enc DatumEncoding, appendTo []byte) ([]byte, error) { if ed.encoded != nil && enc == ed.encoding { // We already have an encoding that matches