From 83ca5c4b0f76cf7b307dd1be1dc934e1e8199863 Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Sun, 21 Jan 2024 06:11:42 +0900 Subject: [PATCH] Reduce calls to `Source#buffer`(`StringScanner#rest`) (#106) Reduce calls to `Source#buffer`(`StringScanner#rest`) ## Why `Source#buffer` calling `StringScanner#rest`. `StringScanner#rest` is slow. Reduce calls to `Source#buffer`. ## Benchmark ``` RUBYLIB= BUNDLER_ORIG_RUBYLIB= /Users/naitoh/.rbenv/versions/3.3.0/bin/ruby -v -S benchmark-driver /Users/naitoh/ghq/github.com/naitoh/rexml/benchmark/parse.yaml ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin22] Calculating ------------------------------------- before after before(YJIT) after(YJIT) dom 10.639 10.985 16.213 16.221 i/s - 100.000 times in 9.399033s 9.103461s 6.167962s 6.164794s sax 28.357 29.440 42.900 44.375 i/s - 100.000 times in 3.526479s 3.396688s 2.331024s 2.253511s pull 32.852 34.210 48.976 51.273 i/s - 100.000 times in 3.043965s 2.923140s 2.041816s 1.950344s stream 30.821 31.908 43.953 44.697 i/s - 100.000 times in 3.244539s 3.134020s 2.275172s 2.237310s Comparison: dom after(YJIT): 16.2 i/s before(YJIT): 16.2 i/s - 1.00x slower after: 11.0 i/s - 1.48x slower before: 10.6 i/s - 1.52x slower sax after(YJIT): 44.4 i/s before(YJIT): 42.9 i/s - 1.03x slower after: 29.4 i/s - 1.51x slower before: 28.4 i/s - 1.56x slower pull after(YJIT): 51.3 i/s before(YJIT): 49.0 i/s - 1.05x slower after: 34.2 i/s - 1.50x slower before: 32.9 i/s - 1.56x slower stream after(YJIT): 44.7 i/s before(YJIT): 44.0 i/s - 1.02x slower after: 31.9 i/s - 1.40x slower before: 30.8 i/s - 1.45x slower ``` - YJIT=ON : 1.00x - 1.05x faster - YJIT=OFF : 1.03x - 1.04x faster --- lib/rexml/parsers/baseparser.rb | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 65bad260..7126a12d 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -348,9 +348,13 @@ def pull_event @source.match(/\A\s*/um, true) end begin - @source.read if @source.buffer.size<2 - if @source.buffer[0] == ?< - if @source.buffer[1] == ?/ + next_data = @source.buffer + if next_data.size < 2 + @source.read + next_data = @source.buffer + end + if next_data[0] == ?< + if next_data[1] == ?/ @nsstack.shift last_tag = @tags.pop md = @source.match( CLOSE_MATCH, true ) @@ -364,7 +368,7 @@ def pull_event raise REXML::ParseException.new(message, @source) end return [ :end_element, last_tag ] - elsif @source.buffer[1] == ?! + elsif next_data[1] == ?! md = @source.match(/\A(\s*[^>]*>)/um) #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}" raise REXML::ParseException.new("Malformed node", @source) unless md @@ -383,7 +387,7 @@ def pull_event end raise REXML::ParseException.new( "Declarations can only occur "+ "in the doctype declaration.", @source) - elsif @source.buffer[1] == ?? + elsif next_data[1] == ?? return process_instruction else # Get the next tag