From 6ce8b24c6d9fdbcb3ed6ab640a0c972f1771e468 Mon Sep 17 00:00:00 2001 From: Seth Brenith Date: Thu, 25 Jan 2018 09:48:38 -0800 Subject: [PATCH] http: simplify checkInvalidHeaderChar In the spirit of [17399](https://github.com/nodejs/node/pull/17399), we can also simplify checkInvalidHeaderChar to use regex matching instead of a loop. This makes it faster on long matches and slower on short matches or non-matches. This change also includes some sample data from an AcmeAir benchmark run, as a rough proxy for real-world data. PR-URL: https://github.com/nodejs/node/pull/18381 Reviewed-By: Ruben Bridgewater Reviewed-By: Matteo Collina Reviewed-By: Joyee Cheung Reviewed-By: Benedikt Meurer Reviewed-By: Tiancheng "Timothy" Gu --- benchmark/http/check_invalid_header_char.js | 81 +++++++++++++-------- lib/_http_common.js | 46 +----------- 2 files changed, 54 insertions(+), 73 deletions(-) diff --git a/benchmark/http/check_invalid_header_char.js b/benchmark/http/check_invalid_header_char.js index 399e71b2dfcc88..c70b0d39db2ffc 100644 --- a/benchmark/http/check_invalid_header_char.js +++ b/benchmark/http/check_invalid_header_char.js @@ -3,43 +3,66 @@ const common = require('../common.js'); const _checkInvalidHeaderChar = require('_http_common')._checkInvalidHeaderChar; -// Put it here so the benchmark result lines will not be super long. -const LONG_AND_INVALID = 'Here is a value that is really a folded header ' + - 'value\r\n this should be supported, but it is not currently'; +const groupedInputs = { + // Representative set of inputs from an AcmeAir benchmark run: + // all valid strings, average length 14.4, stdev 13.0 + group_acmeair: [ + 'W/"2-d4cbb29"', 'OK', 'Express', 'X-HTTP-Method-Override', 'Express', + 'application/json', 'application/json; charset=utf-8', '206', 'OK', + 'sessionid=; Path=/', 'text/html; charset=utf-8', + 'text/html; charset=utf-8', '10', 'W/"a-eda64de5"', 'OK', 'Express', + 'application/json', 'application/json; charset=utf-8', '2', 'W/"2-d4cbb29"', + 'OK', 'Express', 'X-HTTP-Method-Override', 'sessionid=; Path=/', 'Express', + 'sessionid=; Path=/,sessionid=6b059402-d62f-4e6f-b3dd-ce5b9e487c39; Path=/', + 'text/html; charset=utf-8', 'text/html; charset=utf-8', '9', 'OK', + 'sessionid=; Path=/', 'text/html; charset=utf-8', + 'text/html; charset=utf-8', '10', 'W/"a-eda64de5"', 'OK', 'Express', + 'Express', 'X-HTTP-Method-Override', 'sessionid=; Path=/', + 'application/json' + ], + + // Put it here so the benchmark result lines will not be super long. + LONG_AND_INVALID: ['Here is a value that is really a folded header ' + + 'value\r\n this should be supported, but it is not currently'] +}; + +const inputs = [ + // Valid + '', + '1', + '\t\t\t\t\t\t\t\t\t\tFoo bar baz', + 'keep-alive', + 'close', + 'gzip', + '20091', + 'private', + 'text/html; charset=utf-8', + 'text/plain', + 'Sat, 07 May 2016 16:54:48 GMT', + 'SAMEORIGIN', + 'en-US', + + // Invalid + '中文呢', // unicode + 'foo\nbar', + '\x7F' +]; const bench = common.createBenchmark(main, { - key: [ - // Valid - '', - '1', - '\t\t\t\t\t\t\t\t\t\tFoo bar baz', - 'keep-alive', - 'close', - 'gzip', - '20091', - 'private', - 'text/html; charset=utf-8', - 'text/plain', - 'Sat, 07 May 2016 16:54:48 GMT', - 'SAMEORIGIN', - 'en-US', - - // Invalid - 'LONG_AND_INVALID', - '中文呢', // unicode - 'foo\nbar', - '\x7F' - ], + input: inputs.concat(Object.keys(groupedInputs)), n: [1e6], }); -function main({ n, key }) { - if (key === 'LONG_AND_INVALID') { - key = LONG_AND_INVALID; +function main({ n, input }) { + let inputs = [input]; + if (groupedInputs.hasOwnProperty(input)) { + inputs = groupedInputs[input]; } + + const len = inputs.length; bench.start(); for (var i = 0; i < n; i++) { - _checkInvalidHeaderChar(key); + _checkInvalidHeaderChar(inputs[i % len]); } bench.end(n); } diff --git a/lib/_http_common.js b/lib/_http_common.js index a7e8b0c59b8854..ffb90407c62175 100644 --- a/lib/_http_common.js +++ b/lib/_http_common.js @@ -242,57 +242,15 @@ function checkIsHttpToken(val) { return tokenRegExp.test(val); } +const headerCharRegex = /[^\t\x20-\x7e\x80-\xff]/; /** * True if val contains an invalid field-vchar * field-value = *( field-content / obs-fold ) * field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] * field-vchar = VCHAR / obs-text - * - * checkInvalidHeaderChar() is currently designed to be inlinable by v8, - * so take care when making changes to the implementation so that the source - * code size does not exceed v8's default max_inlined_source_size setting. **/ -var validHdrChars = [ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, // 0 - 15 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16 - 31 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 32 - 47 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 48 - 63 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 64 - 79 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 80 - 95 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 96 - 111 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 112 - 127 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 128 ... - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // ... 255 -]; function checkInvalidHeaderChar(val) { - val += ''; - if (val.length < 1) - return false; - if (!validHdrChars[val.charCodeAt(0)]) - return true; - if (val.length < 2) - return false; - if (!validHdrChars[val.charCodeAt(1)]) - return true; - if (val.length < 3) - return false; - if (!validHdrChars[val.charCodeAt(2)]) - return true; - if (val.length < 4) - return false; - if (!validHdrChars[val.charCodeAt(3)]) - return true; - for (var i = 4; i < val.length; ++i) { - if (!validHdrChars[val.charCodeAt(i)]) - return true; - } - return false; + return headerCharRegex.test(val); } module.exports = {