From 9dbd1cd4177c43fcaac961a3b16fb2d044c9940a Mon Sep 17 00:00:00 2001 From: Robert Kieffer Date: Thu, 18 Jul 2024 12:05:45 -1000 Subject: [PATCH] fix!: refactor v7 internal state and options logic, fixes #764 (#779) Co-authored-by: Robert Kieffer Co-authored-by: Robin Pokorny --- README.md | 4 +- README_js.md | 4 +- package.json | 4 +- src/test/v7.test.ts | 233 +++++++++++++++++++++++--------------------- src/v7.ts | 204 +++++++++++++++----------------------- 5 files changed, 207 insertions(+), 242 deletions(-) diff --git a/README.md b/README.md index b9079b69..3720dadb 100644 --- a/README.md +++ b/README.md @@ -338,10 +338,10 @@ Create an RFC version 7 (random) UUID | | | | --- | --- | | [`options`] | `Object` with one or more of the following properties: | -| [`options.msecs`] | RFC "timestamp" field (`Number` of milliseconds, unix epoch) | +| [`options.msecs`] | RFC "timestamp" field (`Number` of milliseconds, unix epoch). Default = `Date.now()` | | [`options.random`] | `Array` of 16 random bytes (0-255) | | [`options.rng`] | Alternative to `options.random`, a `Function` that returns an `Array` of 16 random bytes (0-255) | -| [`options.seq`] | 31 bit monotonic sequence counter as `Number` between 0 - 0x7fffffff | +| [`options.seq`] | 32-bit sequence `Number` between 0 - 0xffffffff. This may be provided to help insure uniqueness for UUIDs generated within the same millisecond time interval. Default = random value. | | [`buffer`] | `Array \| Buffer` If specified, uuid will be written here in byte-form, starting at `offset` | | [`offset` = 0] | `Number` Index to start writing UUID bytes in `buffer` | | _returns_ | UUID `String` if no `buffer` is specified, otherwise returns `buffer` | diff --git a/README_js.md b/README_js.md index fc7a1e4c..1bc5bea4 100644 --- a/README_js.md +++ b/README_js.md @@ -346,10 +346,10 @@ Create an RFC version 7 (random) UUID | | | | --- | --- | | [`options`] | `Object` with one or more of the following properties: | -| [`options.msecs`] | RFC "timestamp" field (`Number` of milliseconds, unix epoch) | +| [`options.msecs`] | RFC "timestamp" field (`Number` of milliseconds, unix epoch). Default = `Date.now()` | | [`options.random`] | `Array` of 16 random bytes (0-255) | | [`options.rng`] | Alternative to `options.random`, a `Function` that returns an `Array` of 16 random bytes (0-255) | -| [`options.seq`] | 31 bit monotonic sequence counter as `Number` between 0 - 0x7fffffff | +| [`options.seq`] | 32-bit sequence `Number` between 0 - 0xffffffff. This may be provided to help insure uniqueness for UUIDs generated within the same millisecond time interval. Default = random value. | | [`buffer`] | `Array \| Buffer` If specified, uuid will be written here in byte-form, starting at `offset` | | [`offset` = 0] | `Number` Index to start writing UUID bytes in `buffer` | | _returns_ | UUID `String` if no `buffer` is specified, otherwise returns `buffer` | diff --git a/package.json b/package.json index 6b5e4d3e..4b0e241e 100644 --- a/package.json +++ b/package.json @@ -113,8 +113,8 @@ "test:browser": "wdio run ./wdio.conf.js", "test:node": "npm-run-all --parallel examples:node:**", "test:pack": "./scripts/testpack.sh", - "test:watch": "node --test --watch dist/esm/test", - "test": "node --test dist/esm/test" + "test:watch": "node --test --enable-source-maps --watch dist/esm/test", + "test": "node --test --enable-source-maps dist/esm/test" }, "repository": { "type": "git", diff --git a/src/test/v7.test.ts b/src/test/v7.test.ts index 98bf2bf5..5b7d280d 100644 --- a/src/test/v7.test.ts +++ b/src/test/v7.test.ts @@ -1,72 +1,40 @@ import * as assert from 'assert'; import test, { describe } from 'node:test'; import { Version7Options } from '../_types.js'; -import v7 from '../v7.js'; +import parse from '../parse.js'; import stringify from '../stringify.js'; +import v7, { updateV7State } from '../v7.js'; -/** - * fixture bit layout: - * ref: https://www.rfc-editor.org/rfc/rfc9562.html#name-example-of-a-uuidv7-value - * - * expectedBytes was calculated using this script: - * https://gist.github.com/d5382ac3a1ce4ba9ba40a90d9da8cbf1 - * - * ------------------------------- - * field bits value - * ------------------------------- - * unix_ts_ms 48 0x17F22E279B0 - * ver 4 0x7 - * rand_a 12 0xCC3 - * var 2 b10 - * rand_b 62 b01, 0x8C4DC0C0C07398F - * ------------------------------- - * total 128 - * ------------------------------- - * final: 017f22e2-79b0-7cc3-98c4-dc0c0c07398f - */ +// Fixture values for testing with the rfc v7 UUID example: +// https://www.rfc-editor.org/rfc/rfc9562.html#name-example-of-a-uuidv7-value +const RFC_V7 = '017f22e2-79b0-7cc3-98c4-dc0c0c07398f'; +const RFC_V7_BYTES = parse('017f22e2-79b0-7cc3-98c4-dc0c0c07398f'); +const RFC_MSECS = 0x17f22e279b0; -describe('v7', () => { - const msecsFixture = 1645557742000; - const seqFixture = 0x661b189b; - - const randomBytesFixture = Uint8Array.of( - 0x10, - 0x91, - 0x56, - 0xbe, - 0xc4, - 0xfb, - 0x0c, - 0xc3, - 0x18, - 0xc4, - 0xdc, - 0x0c, - 0x0c, - 0x07, - 0x39, - 0x8f - ); +// `option.seq` for the above RFC uuid +const RFC_SEQ = (0x0cc3 << 20) | (0x98c4dc >> 2); - const expectedBytes = Uint8Array.of( - 1, - 127, - 34, - 226, - 121, - 176, - 124, - 195, - 152, - 196, - 220, - 12, - 12, - 7, - 57, - 143 - ); +// `option,random` for the above RFC uuid +const RFC_RANDOM = Uint8Array.of( + 0x10, + 0x91, + 0x56, + 0xbe, + 0xc4, + 0xfb, + 0x0c, + 0xc3, + 0x18, + 0xc4, + 0x6c, + 0x0c, + 0x0c, + 0x07, + 0x39, + 0x8f +); +describe('v7', () => { test('subsequent UUIDs are different', () => { const id1 = v7(); const id2 = v7(); @@ -75,25 +43,25 @@ describe('v7', () => { test('explicit options.random and options.msecs produces expected result', () => { const id = v7({ - random: randomBytesFixture, - msecs: msecsFixture, - seq: seqFixture, + random: RFC_RANDOM, + msecs: RFC_MSECS, + seq: RFC_SEQ, }); - assert.strictEqual(id, '017f22e2-79b0-7cc3-98c4-dc0c0c07398f'); + assert.strictEqual(id, RFC_V7); }); test('explicit options.rng produces expected result', () => { const id = v7({ - rng: () => randomBytesFixture, - msecs: msecsFixture, - seq: seqFixture, + rng: () => RFC_RANDOM, + msecs: RFC_MSECS, + seq: RFC_SEQ, }); - assert.strictEqual(id, '017f22e2-79b0-7cc3-98c4-dc0c0c07398f'); + assert.strictEqual(id, RFC_V7); }); test('explicit options.msecs produces expected result', () => { const id = v7({ - msecs: msecsFixture, + msecs: RFC_MSECS, }); assert.strictEqual(id.indexOf('017f22e2'), 0); }); @@ -102,13 +70,15 @@ describe('v7', () => { const buffer = new Uint8Array(16); const result = v7( { - random: randomBytesFixture, - msecs: msecsFixture, - seq: seqFixture, + random: RFC_RANDOM, + msecs: RFC_MSECS, + seq: RFC_SEQ, }, buffer ); - assert.deepEqual(buffer, expectedBytes); + stringify(buffer); + + assert.deepEqual(buffer, RFC_V7_BYTES); assert.strictEqual(buffer, result); }); @@ -117,25 +87,25 @@ describe('v7', () => { v7( { - random: randomBytesFixture, - msecs: msecsFixture, - seq: seqFixture, + random: RFC_RANDOM, + msecs: RFC_MSECS, + seq: RFC_SEQ, }, buffer, 0 ); v7( { - random: randomBytesFixture, - msecs: msecsFixture, - seq: seqFixture, + random: RFC_RANDOM, + msecs: RFC_MSECS, + seq: RFC_SEQ, }, buffer, 16 ); const expected = new Uint8Array(32); - expected.set(expectedBytes); - expected.set(expectedBytes, 16); + expected.set(RFC_V7_BYTES); + expected.set(RFC_V7_BYTES, 16); assert.deepEqual(buffer, expected); }); @@ -146,7 +116,7 @@ describe('v7', () => { test('lexicographical sorting is preserved', () => { let id; let prior; - let msecs = msecsFixture; + let msecs = RFC_MSECS; for (let i = 0; i < 20000; ++i) { if (i % 1500 === 0) { // every 1500 runs increment msecs so seq is @@ -154,7 +124,7 @@ describe('v7', () => { msecs += 1; } - id = v7({ msecs }); + id = v7({ msecs, seq: i }); if (prior !== undefined) { assert.ok(prior < id, `${prior} < ${id}`); @@ -164,46 +134,89 @@ describe('v7', () => { } }); - test('handles seq rollover', () => { - const msecs = msecsFixture; - const a = v7({ - msecs, - seq: 0x7fffffff, - }); - - v7({ msecs }); - - const c = v7({ msecs }); - - assert.ok(a < c, `${a} < ${c}`); + test('internal state updates properly', () => { + const tests = [ + { + title: 'new time interval', + state: { msecs: 1, seq: 123 }, + now: 2, + expected: { + msecs: 2, // time interval should update + seq: 0x6c318c4, // sequence should be randomized + }, + }, + { + title: 'same time interval', + state: { msecs: 1, seq: 123 }, + now: 1, + expected: { + msecs: 1, // timestamp unchanged + seq: 124, // sequence increments + }, + }, + { + title: 'same time interval (sequence rollover)', + state: { msecs: 1, seq: 0xffffffff }, + now: 1, + expected: { + msecs: 2, // timestamp increments + seq: 0, // sequence rolls over + }, + }, + { + title: 'time regression', + state: { msecs: 2, seq: 123 }, + now: 1, + expected: { + msecs: 2, // timestamp unchanged + seq: 124, // sequence increments + }, + }, + { + title: 'time regression (sequence rollover)', + state: { msecs: 2, seq: 0xffffffff }, + now: 1, + expected: { + // timestamp increments (crazy, right? The system clock goes backwards + // but the UUID timestamp moves forward? Weird, but it's what's + // required to maintain monotonicity... and this is why we have unit + // tests!) + msecs: 3, + seq: 0, // sequence rolls over + }, + }, + ]; + for (const { title, state, now, expected } of tests) { + assert.deepStrictEqual(updateV7State(state, now, RFC_RANDOM), expected, `Failed: ${title}`); + } }); test('can supply seq', () => { let seq = 0x12345; let uuid = v7({ - msecs: msecsFixture, + msecs: RFC_MSECS, seq, }); - assert.strictEqual(uuid.substr(0, 25), '017f22e2-79b0-7000-891a-2'); + assert.strictEqual(uuid.substr(0, 25), '017f22e2-79b0-7000-848d-1'); seq = 0x6fffffff; uuid = v7({ - msecs: msecsFixture, + msecs: RFC_MSECS, seq, }); - assert.strictEqual(uuid.substr(0, 25), '017f22e2-79b0-7dff-bfff-f'); + assert.strictEqual(uuid.substring(0, 25), '017f22e2-79b0-76ff-bfff-f'); }); test('internal seq is reset upon timestamp change', () => { v7({ - msecs: msecsFixture, + msecs: RFC_MSECS, seq: 0x6fffffff, }); const uuid = v7({ - msecs: msecsFixture + 1, + msecs: RFC_MSECS + 1, }); assert.ok(uuid.indexOf('fff') !== 15); @@ -216,18 +229,18 @@ describe('v7', () => { // convert the given number of bits (LE) to number const asNumber = (bits: number, data: bigint) => Number(BigInt.asUintN(bits, data)); - // flip the nth bit (BE) in a BigInt + // flip the nth bit (BE) in a BigInt const flip = (data: bigint, n: number) => data ^ (1n << BigInt(127 - n)); // Extract v7 `options` from a (BigInt) UUID const optionsFrom = (data: bigint): Version7Options => { - const ms = asNumber(48, data >> (128n - 48n)); - const hi = asNumber(12, data >> (43n + 19n + 2n)); - const lo = asNumber(19, data >> 43n); - const r = BigInt.asUintN(43, data); + const ms = asNumber(48, data >> 80n); + const hi = asNumber(12, data >> 64n); + const lo = asNumber(20, data >> 42n); + const r = BigInt.asUintN(42, data); return { msecs: ms, - seq: (hi << 19) | lo, + seq: (hi << 20) | lo, random: Uint8Array.from([ ...Array(10).fill(0), ...Array(6) @@ -247,8 +260,8 @@ describe('v7', () => { } const flipped = flip(data, i); assert.strictEqual( - asBigInt(v7(optionsFrom(flipped), buf)), - flipped, + asBigInt(v7(optionsFrom(flipped), buf)).toString(16), + flipped.toString(16), `Unequal uuids at bit ${i}` ); assert.notStrictEqual(stringify(buf), id); diff --git a/src/v7.ts b/src/v7.ts index fdfedfce..f4fe5bba 100644 --- a/src/v7.ts +++ b/src/v7.ts @@ -2,154 +2,106 @@ import { UUIDTypes, Version7Options } from './_types.js'; import rng from './rng.js'; import { unsafeStringify } from './stringify.js'; -/** - * UUID V7 - Unix Epoch time-based UUID - * - * The IETF has published RFC9562, introducing 3 new UUID versions (6,7,8). This - * implementation of V7 is based on the accepted, though not yet approved, - * revisions. - * - * RFC 9562:https://www.rfc-editor.org/rfc/rfc9562.html Universally Unique - * IDentifiers (UUIDs) - - * - * Sample V7 value: - * https://www.rfc-editor.org/rfc/rfc9562.html#name-example-of-a-uuidv7-value - * - * Monotonic Bit Layout: RFC rfc9562.6.2 Method 1, Dedicated Counter Bits ref: - * https://www.rfc-editor.org/rfc/rfc9562.html#section-6.2-5.1 - * - * 0 1 2 3 0 1 2 3 4 5 6 - * 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | unix_ts_ms | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | unix_ts_ms | ver | seq_hi | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * |var| seq_low | rand | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | rand | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * - * seq is a 31 bit serialized counter; comprised of 12 bit seq_hi and 19 bit - * seq_low, and randomly initialized upon timestamp change. 31 bit counter size - * was selected as any bitwise operations in node are done as _signed_ 32 bit - * ints. we exclude the sign bit. - */ - -let _seqLow: number | null = null; -let _seqHigh: number | null = null; -let _msecs = 0; +type V7State = { + msecs: number; // time, milliseconds + seq: number; // sequence number (32-bits) +}; + +const _state: V7State = { + msecs: -Infinity, + seq: 0, +}; function v7(options?: Version7Options, buf?: undefined, offset?: number): string; function v7(options?: Version7Options, buf?: Uint8Array, offset?: number): Uint8Array; function v7(options?: Version7Options, buf?: Uint8Array, offset?: number): UUIDTypes { - options ??= {}; - - // initialize buffer and pointer - let i = (buf && offset) || 0; - const b = buf || new Uint8Array(16); - - // rnds is Uint8Array(16) filled with random bytes - const rnds = options.random || (options.rng || rng)(); - - // milliseconds since unix epoch, 1970-01-01 00:00 - const msecs = options.msecs !== undefined ? options.msecs : Date.now(); - - // seq is user provided 31 bit counter - let seq = options.seq !== undefined ? options.seq : null; - - // initialize local seq high/low parts - let seqHigh = _seqHigh; - let seqLow = _seqLow; - - // check if clock has advanced and user has not provided msecs - if (msecs > _msecs && options.msecs === undefined) { - _msecs = msecs; - - // unless user provided seq, reset seq parts - if (seq !== null) { - seqHigh = null; - seqLow = null; - } - } + let bytes: Uint8Array; + + if (options) { + // With options: Make UUID independent of internal state + bytes = v7Bytes( + options.random ?? options.rng?.() ?? rng(), + options.msecs, + options.seq, + buf, + offset + ); + } else { + // No options: Use internal state + const now = Date.now(); + const rnds = rng(); - // if we have a user provided seq - if (seq !== null) { - // trim provided seq to 31 bits of value, avoiding overflow - if (seq > 0x7fffffff) { - seq = 0x7fffffff; - } + updateV7State(_state, now, rnds); - // split provided seq into high/low parts - seqHigh = (seq >>> 19) & 0xfff; - seqLow = seq & 0x7ffff; + bytes = v7Bytes(rnds, _state.msecs, _state.seq, buf, offset); } - // randomly initialize seq - if (seqHigh === null || seqLow === null) { - seqHigh = rnds[6] & 0x7f; - seqHigh = (seqHigh << 8) | rnds[7]; + return buf ? bytes : unsafeStringify(bytes); +} - seqLow = rnds[8] & 0x3f; // pad for var - seqLow = (seqLow << 8) | rnds[9]; - seqLow = (seqLow << 5) | (rnds[10] >>> 3); +// (Private!) Do not use. This method is only exported for testing purposes +// and may change without notice. +export function updateV7State(state: V7State, now: number, rnds: Uint8Array) { + if (now > state.msecs) { + // Time has moved on! Pick a new random sequence number + state.seq = (rnds[6] << 23) | (rnds[7] << 16) | (rnds[8] << 8) | rnds[9]; + state.msecs = now; + } else { + // Bump sequence counter w/ 32-bit rollover + state.seq = (state.seq + 1) | 0; + + // In case of rollover, bump timestamp to preserve monotonicity. This is + // allowed by the RFC and should self-correct as the system clock catches + // up. See https://www.rfc-editor.org/rfc/rfc9562.html#section-6.2-9.4 + if (state.seq === 0) { + state.msecs++; + } } - // increment seq if within msecs window - if (msecs + 10000 > _msecs && seq === null) { - if (++seqLow > 0x7ffff) { - seqLow = 0; - - if (++seqHigh > 0xfff) { - seqHigh = 0; + return state; +} - // increment internal _msecs. this allows us to continue incrementing - // while staying monotonic. Note, once we hit 10k milliseconds beyond system - // clock, we will reset breaking monotonicity (after (2^31)*10000 generations) - _msecs++; - } - } - } else { - // resetting; we have advanced more than - // 10k milliseconds beyond system clock - _msecs = msecs; +function v7Bytes(rnds: Uint8Array, msecs?: number, seq?: number, buf?: Uint8Array, offset = 0) { + if (!buf) { + buf = new Uint8Array(16); + offset = 0; } - _seqHigh = seqHigh; - _seqLow = seqLow; + // Defaults + msecs ??= Date.now(); + seq ??= ((rnds[6] * 0x7f) << 24) | (rnds[7] << 16) | (rnds[8] << 8) | rnds[9]; - // [bytes 0-5] 48 bits of local timestamp - b[i++] = (_msecs / 0x10000000000) & 0xff; - b[i++] = (_msecs / 0x100000000) & 0xff; - b[i++] = (_msecs / 0x1000000) & 0xff; - b[i++] = (_msecs / 0x10000) & 0xff; - b[i++] = (_msecs / 0x100) & 0xff; - b[i++] = _msecs & 0xff; + // byte 0-5: timestamp (48 bits) + buf[offset++] = (msecs / 0x10000000000) & 0xff; + buf[offset++] = (msecs / 0x100000000) & 0xff; + buf[offset++] = (msecs / 0x1000000) & 0xff; + buf[offset++] = (msecs / 0x10000) & 0xff; + buf[offset++] = (msecs / 0x100) & 0xff; + buf[offset++] = msecs & 0xff; - // [byte 6] - set 4 bits of version (7) with first 4 bits seq_hi - b[i++] = ((seqHigh >>> 8) & 0x0f) | 0x70; + // byte 6: `version` (4 bits) | sequence bits 28-31 (4 bits) + buf[offset++] = 0x70 | ((seq >>> 28) & 0x0f); - // [byte 7] remaining 8 bits of seq_hi - b[i++] = seqHigh & 0xff; + // byte 7: sequence bits 20-27 (8 bits) + buf[offset++] = (seq >>> 20) & 0xff; - // [byte 8] - variant (2 bits), first 6 bits seq_low - b[i++] = ((seqLow >>> 13) & 0x3f) | 0x80; + // byte 8: `variant` (2 bits) | sequence bits 14-19 (6 bits) + buf[offset++] = 0x80 | ((seq >>> 14) & 0x3f); - // [byte 9] 8 bits seq_low - b[i++] = (seqLow >>> 5) & 0xff; + // byte 9: sequence bits 6-13 (8 bits) + buf[offset++] = (seq >>> 6) & 0xff; - // [byte 10] remaining 5 bits seq_low, 3 bits random - b[i++] = ((seqLow << 3) & 0xff) | (rnds[10] & 0x07); + // byte 10: sequence bits 0-5 (6 bits) | random (2 bits) + buf[offset++] = ((seq << 2) & 0xff) | (rnds[10] & 0x03); - // [bytes 11-15] always random - b[i++] = rnds[11]; - b[i++] = rnds[12]; - b[i++] = rnds[13]; - b[i++] = rnds[14]; - b[i++] = rnds[15]; + // bytes 11-15: random (40 bits) + buf[offset++] = rnds[11]; + buf[offset++] = rnds[12]; + buf[offset++] = rnds[13]; + buf[offset++] = rnds[14]; + buf[offset++] = rnds[15]; - return buf || unsafeStringify(b); + return buf; } export default v7;