-
Notifications
You must be signed in to change notification settings - Fork 149
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Draft] Implement vector field type #1899
base: main
Are you sure you want to change the base?
Changes from 7 commits
36a50cb
415c94e
3551acb
248af30
6042c4d
764800b
93ae1d7
2f1b964
9ebd42a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,6 +30,49 @@ import { | |
|
||
import api = proto.google.firestore.v1; | ||
|
||
export class VectorValue implements firestore.VectorValue { | ||
private readonly values: number[]; | ||
constructor(values: number[] | undefined) { | ||
this.values = values || []; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider making a copy of the argument so the internal There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
} | ||
|
||
public toArray(): number[] { | ||
return this.values; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider returning a copy of the internal data so the values array cannot be modified. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. This is usually a good suggestion, I am wondering if it is as good with vectors, given the size and the likelihood of people modifying vectors. Nevertheless, I am OK with making copies for now, and later optimize. |
||
} | ||
|
||
/** | ||
* @private | ||
*/ | ||
toProto(serializer: Serializer): api.IValue { | ||
return serializer.encodeVector({ | ||
arrayValue: { | ||
values: this.values.map(value => { | ||
return { | ||
doubleValue: value, | ||
}; | ||
}), | ||
}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: it's a little challenging to read this proto serialization since it is spread across this fie and the Serializer. Would it be cleaner to move the array serialization into encodeVector so it's all in one place? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Or move the whole implementation here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
}); | ||
} | ||
|
||
/** | ||
* @private | ||
*/ | ||
static fromProto(valueArray: api.IValue): VectorValue { | ||
const values = valueArray.arrayValue?.values?.map(v => { | ||
return v.doubleValue!; | ||
}); | ||
return new VectorValue(values); | ||
} | ||
|
||
/** | ||
* @private | ||
*/ | ||
isEqual(other: VectorValue): boolean { | ||
return this.values === other.values; | ||
} | ||
} | ||
|
||
/** | ||
* Sentinel values that can be used when writing documents with set(), create() | ||
* or update(). | ||
|
@@ -40,6 +83,10 @@ export class FieldValue implements firestore.FieldValue { | |
/** @private */ | ||
constructor() {} | ||
|
||
static vector(values?: number[]): VectorValue { | ||
return new VectorValue(values); | ||
} | ||
|
||
/** | ||
* Returns a sentinel for use with update() or set() with {merge:true} to mark | ||
* a field for deletion. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -625,6 +625,11 @@ | |
'Initialized Firestore GAPIC Client (useFallback: %s)', | ||
useFallback | ||
); | ||
logger( | ||
'client', | ||
'init', | ||
`${JSON.stringify((client as any)._opts.servicePath)}` | ||
); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it intentional to leave this logger statement here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like something you may have used for debugging. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah. Deleted. |
||
return client; | ||
}, | ||
/* clientDestructor= */ client => client.close() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,7 +19,7 @@ import {DocumentData} from '@google-cloud/firestore'; | |
import * as proto from '../protos/firestore_v1_proto_api'; | ||
|
||
import {detectValueType} from './convert'; | ||
import {DeleteTransform, FieldTransform} from './field-value'; | ||
import {DeleteTransform, FieldTransform, VectorValue} from './field-value'; | ||
import {GeoPoint} from './geo-point'; | ||
import {DocumentReference, Firestore} from './index'; | ||
import {FieldPath, QualifiedResourcePath} from './path'; | ||
|
@@ -38,6 +38,10 @@ import api = proto.google.firestore.v1; | |
*/ | ||
const MAX_DEPTH = 20; | ||
|
||
const RESERVED_MAP_KEY = '__type__'; | ||
const RESERVED_MAP_KEY_VECTOR_VALUE = '__vector__'; | ||
const RESERVED_VECTOR_MAP_VECTORS_KEY = 'value'; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: "RESERVED_VECTOR_MAP_VECTORS_KEY" may not need the "RESERVED_" prefix since the value is not surrounded with "__" There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
|
||
/** | ||
* An interface for Firestore types that can be serialized to Protobuf. | ||
* | ||
|
@@ -168,6 +172,10 @@ export class Serializer { | |
}; | ||
} | ||
|
||
if (val instanceof VectorValue) { | ||
return val.toProto(this); | ||
} | ||
|
||
if (isObject(val)) { | ||
const toProto = val['toProto']; | ||
if (typeof toProto === 'function') { | ||
|
@@ -217,6 +225,22 @@ export class Serializer { | |
throw new Error(`Cannot encode value: ${val}`); | ||
} | ||
|
||
/** | ||
* @private | ||
*/ | ||
encodeVector(vectorValue: api.IValue): api.IValue { | ||
return { | ||
mapValue: { | ||
fields: { | ||
[RESERVED_MAP_KEY]: { | ||
stringValue: RESERVED_MAP_KEY_VECTOR_VALUE, | ||
}, | ||
[RESERVED_VECTOR_MAP_VECTORS_KEY]: vectorValue, | ||
}, | ||
}, | ||
}; | ||
} | ||
|
||
/** | ||
* Decodes a single Firestore 'Value' Protobuf. | ||
* | ||
|
@@ -263,15 +287,27 @@ export class Serializer { | |
return null; | ||
} | ||
case 'mapValue': { | ||
const obj: DocumentData = {}; | ||
const fields = proto.mapValue!.fields; | ||
if (fields) { | ||
for (const prop of Object.keys(fields)) { | ||
obj[prop] = this.decodeValue(fields[prop]); | ||
const props = Object.keys(fields); | ||
if ( | ||
props.indexOf(RESERVED_MAP_KEY) !== -1 && | ||
this.decodeValue(fields[RESERVED_MAP_KEY]) === | ||
RESERVED_MAP_KEY_VECTOR_VALUE | ||
) { | ||
return VectorValue.fromProto( | ||
fields[RESERVED_VECTOR_MAP_VECTORS_KEY] | ||
); | ||
} else { | ||
const obj: DocumentData = {}; | ||
for (const prop of Object.keys(fields)) { | ||
obj[prop] = this.decodeValue(fields[prop]); | ||
} | ||
return obj; | ||
} | ||
} else { | ||
return {}; | ||
} | ||
|
||
return obj; | ||
} | ||
case 'geoPointValue': { | ||
return GeoPoint.fromProto(proto.geoPointValue!); | ||
|
@@ -367,6 +403,8 @@ export function validateUserInput( | |
'If you want to ignore undefined values, enable `ignoreUndefinedProperties`.' | ||
); | ||
} | ||
} else if (value instanceof VectorValue) { | ||
// OK | ||
} else if (value instanceof DeleteTransform) { | ||
if (inArray) { | ||
throw new Error( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -62,6 +62,8 @@ | |
|
||
const version = require('../../package.json').version; | ||
|
||
setLogFunction(console.log); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not sure if this statement is in the PR intentionally There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Another debug line. Deleted. |
||
|
||
class DeferredPromise<T> { | ||
resolve: Function; | ||
reject: Function; | ||
|
@@ -104,6 +106,8 @@ | |
internalSettings.databaseId = process.env.FIRESTORE_NAMED_DATABASE; | ||
} | ||
|
||
settings.host = 'test-firestore.sandbox.googleapis.com'; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FWIW. You can also set environment variable I think this statement needs to be removed before committing. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Deleted. |
||
|
||
const firestore = new Firestore({ | ||
...internalSettings, | ||
...settings, // caller settings take precedent over internal settings | ||
|
@@ -1018,6 +1022,30 @@ | |
return promise; | ||
}); | ||
|
||
it.only('can write and read vector embeddings', async () => { | ||
const ref = randomCol.doc(); | ||
await ref.create({ | ||
vectorEmpty: FieldValue.vector(), | ||
vector1: FieldValue.vector([1, 2, 3.99]), | ||
}); | ||
await ref.set({ | ||
vectorEmpty: FieldValue.vector(), | ||
vector1: FieldValue.vector([1, 2, 3.99]), | ||
vector2: FieldValue.vector([0, 0, 0]), | ||
}); | ||
await ref.update({ | ||
vector3: FieldValue.vector([-1, -200, -999]), | ||
}); | ||
|
||
const snap1 = await ref.get(); | ||
expect(snap1.get('vectorEmpty')).to.deep.equal(FieldValue.vector()); | ||
expect(snap1.get('vector1')).to.deep.equal(FieldValue.vector([1, 2, 3.99])); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I want to change this in the other PR to save myself from conflict resolving, hopefully that is OK with you. |
||
expect(snap1.get('vector2')).to.deep.equal(FieldValue.vector([0, 0, 0])); | ||
expect(snap1.get('vector3')).to.deep.equal( | ||
FieldValue.vector([-1, -200, -999]) | ||
); | ||
}); | ||
|
||
describe('watch', () => { | ||
const currentDeferred = new DeferredPromise<DocumentSnapshot>(); | ||
|
||
|
@@ -1311,6 +1339,91 @@ | |
const result2 = await ref2.get(); | ||
expect(result2.data()).to.deep.equal([1, 2, 3]); | ||
}); | ||
|
||
it.only('can listen to documents with vectors', async () => { | ||
const ref = randomCol.doc(); | ||
const initialDeferred = new Deferred<void>(); | ||
const createDeferred = new Deferred<void>(); | ||
const setDeferred = new Deferred<void>(); | ||
const updateDeferred = new Deferred<void>(); | ||
const deleteDeferred = new Deferred<void>(); | ||
|
||
const expected = [ | ||
initialDeferred, | ||
createDeferred, | ||
setDeferred, | ||
updateDeferred, | ||
deleteDeferred, | ||
]; | ||
let idx = 0; | ||
let document: DocumentSnapshot | null = null; | ||
|
||
const unlisten = randomCol | ||
.where('purpose', '==', 'vector tests') | ||
.onSnapshot(snap => { | ||
expected[idx].resolve(); | ||
idx += 1; | ||
if (snap.docs.length > 0) { | ||
document = snap.docs[0]; | ||
} else { | ||
document = null; | ||
} | ||
}); | ||
|
||
await initialDeferred.promise; | ||
expect(document).to.be.null; | ||
|
||
await ref.create({ | ||
purpose: 'vector tests', | ||
vectorEmpty: FieldValue.vector(), | ||
vector1: FieldValue.vector([1, 2, 3.99]), | ||
}); | ||
|
||
await createDeferred.promise; | ||
expect(document).to.be.not.null; | ||
expect(document!.get('vectorEmpty')).to.deep.equal(FieldValue.vector()); | ||
expect(document!.get('vector1')).to.deep.equal( | ||
FieldValue.vector([1, 2, 3.99]) | ||
); | ||
|
||
await ref.set({ | ||
purpose: 'vector tests', | ||
vectorEmpty: FieldValue.vector(), | ||
vector1: FieldValue.vector([1, 2, 3.99]), | ||
vector2: FieldValue.vector([0, 0, 0]), | ||
}); | ||
await setDeferred.promise; | ||
expect(document).to.be.not.null; | ||
expect(document!.get('vectorEmpty')).to.deep.equal(FieldValue.vector()); | ||
expect(document!.get('vector1')).to.deep.equal( | ||
FieldValue.vector([1, 2, 3.99]) | ||
); | ||
expect(document!.get('vector2')).to.deep.equal( | ||
FieldValue.vector([0, 0, 0]) | ||
); | ||
|
||
await ref.update({ | ||
vector3: FieldValue.vector([-1, -200, -999]), | ||
}); | ||
await updateDeferred.promise; | ||
expect(document).to.be.not.null; | ||
expect(document!.get('vectorEmpty')).to.deep.equal(FieldValue.vector()); | ||
expect(document!.get('vector1')).to.deep.equal( | ||
FieldValue.vector([1, 2, 3.99]) | ||
); | ||
expect(document!.get('vector2')).to.deep.equal( | ||
FieldValue.vector([0, 0, 0]) | ||
); | ||
expect(document!.get('vector3')).to.deep.equal( | ||
FieldValue.vector([-1, -200, -999]) | ||
); | ||
|
||
await ref.delete(); | ||
await deleteDeferred.promise; | ||
expect(document).to.be.null; | ||
|
||
unlisten(); | ||
}); | ||
}); | ||
|
||
describe('Query class', () => { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider prefixing the variable with underscore to discourage use by JS developers
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.