From 68939eab95441487bdaacca6bc7f57522434a12d Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Mon, 19 Aug 2024 10:48:54 +0900 Subject: [PATCH 1/5] Add a test case to unnormalize the predefined entities of "text" in the stream parser. See: https://github.com/ruby/rexml/pull/168 --- test/test_stream.rb | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/test_stream.rb b/test/test_stream.rb index 615d497f..76ec13b1 100644 --- a/test/test_stream.rb +++ b/test/test_stream.rb @@ -87,6 +87,21 @@ def entity(content) assert_equal(["ISOLat2"], listener.entities) end + + def test_characters_predefined_entities + source = '<P> <I> <B> Text </B> </I>' + + listener = MyListener.new + class << listener + attr_accessor :text_value + def text(text) + @text_value << text + end + end + listener.text_value = "" + REXML::Document.parse_stream(source, listener) + assert_equal("

Text ", listener.text_value) + end end class EntityExpansionLimitTest < Test::Unit::TestCase From 8b97bae94f9b2662da66fca8847bdb78e24e7aa8 Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Mon, 19 Aug 2024 15:12:58 +0900 Subject: [PATCH 2/5] Fix a bug that Stream parser doesn't expand the user-defined entity references for "text" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Why? Pull parser expands character references and predefined entity references, but doesn't expand user-defined entity references. ## Change - text_stream_unnormalize.rb ``` $LOAD_PATH.unshift(File.expand_path("lib")) require 'rexml/document' require 'rexml/parsers/sax2parser' require 'rexml/parsers/pullparser' require 'rexml/parsers/streamparser' require 'rexml/streamlistener' xml = < ]>&la;&lala;<P> <I> <B> Text </B> </I>test™ EOS class StListener include REXML::StreamListener def text(text) puts text end end puts "REXML(DOM)" REXML::Document.new(xml).elements.each("/root/*") {|element| puts element.text} puts "" puts "REXML(Pull)" parser = REXML::Parsers::PullParser.new(xml) while parser.has_next? event = parser.pull case event.event_type when :text puts event[1] end end puts "" puts "REXML(Stream)" parser = REXML::Parsers::StreamParser.new(xml, StListener.new).parse puts "" puts "REXML(SAX)" sax = REXML::Parsers::SAX2Parser.new(xml) sax.listen(:characters) {|x| puts x } sax.parse ``` ## Before (master) ``` $ ruby text_stream_unnormalize.rb REXML(DOM) 1234 --1234--

Text test™ REXML(Pull) 1234 --1234--

Text test™ REXML(Stream) &la; #<= This &lala; #<= This

Text test™ REXML(SAX) 1234 --1234--

Text test™ ``` After(This PR) ``` $ ruby text_stream_unnormalize.rb REXML(DOM) 1234 --1234--

Text test™ REXML(Pull) 1234 --1234--

Text test™ REXML(Stream) 1234 --1234--

Text test™ REXML(SAX) 1234 --1234--

Text test™ ``` --- lib/rexml/parsers/streamparser.rb | 4 +++- test/test_stream.rb | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/lib/rexml/parsers/streamparser.rb b/lib/rexml/parsers/streamparser.rb index e2da2a7d..f97c9124 100644 --- a/lib/rexml/parsers/streamparser.rb +++ b/lib/rexml/parsers/streamparser.rb @@ -7,6 +7,7 @@ class StreamParser def initialize source, listener @listener = listener @parser = BaseParser.new( source ) + @entities = {} end def add_listener( listener ) @@ -28,7 +29,7 @@ def parse when :end_element @listener.tag_end( event[1] ) when :text - unnormalized = @parser.unnormalize( event[1] ) + unnormalized = @parser.unnormalize( event[1], @entities ) @listener.text( unnormalized ) when :processing_instruction @listener.instruction( *event[1,2] ) @@ -40,6 +41,7 @@ def parse when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl @listener.send( event[0].to_s, *event[1..-1] ) when :entitydecl, :notationdecl + @entities[ event[1] ] = event[2] if event.size == 3 @listener.send( event[0].to_s, event[1..-1] ) when :externalentity entity_reference = event[1] diff --git a/test/test_stream.rb b/test/test_stream.rb index 76ec13b1..f08f18bf 100644 --- a/test/test_stream.rb +++ b/test/test_stream.rb @@ -88,6 +88,25 @@ def entity(content) assert_equal(["ISOLat2"], listener.entities) end + def test_entity_replacement + source = ' + + + ]>&la;&lala;' + + listener = MyListener.new + class << listener + attr_accessor :text_values + def text(text) + @text_values << text + end + end + listener.text_values = [] + REXML::Document.parse_stream(source, listener) + assert_equal(["1234", "--1234--"], listener.text_values) + end + def test_characters_predefined_entities source = '<P> <I> <B> Text </B> </I>' From dc48407c1cdf4ad14e4384ee137b7251ccad7bdc Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Mon, 19 Aug 2024 16:24:23 +0900 Subject: [PATCH 3/5] Add support for XML entity expansion limitation in Stream parser ## Why? See: - https://github.com/ruby/rexml/pull/187 - https://github.com/ruby/rexml/pull/195 ## Change - Supported `REXML::Security.entity_expansion_limit=` in Stream parser - Supported `REXML::Security.entity_expansion_text_limit=` in Stream parser --- lib/rexml/parsers/streamparser.rb | 4 ++ test/test_stream.rb | 101 ++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) diff --git a/lib/rexml/parsers/streamparser.rb b/lib/rexml/parsers/streamparser.rb index f97c9124..7781fe44 100644 --- a/lib/rexml/parsers/streamparser.rb +++ b/lib/rexml/parsers/streamparser.rb @@ -14,6 +14,10 @@ def add_listener( listener ) @parser.add_listener( listener ) end + def entity_expansion_count + @parser.entity_expansion_count + end + def parse # entity string while true diff --git a/test/test_stream.rb b/test/test_stream.rb index f08f18bf..4e769266 100644 --- a/test/test_stream.rb +++ b/test/test_stream.rb @@ -134,6 +134,81 @@ def teardown REXML::Security.entity_expansion_text_limit = @default_entity_expansion_text_limit end + def test_have_value + source = <<-XML + + + + + + +]> + +&a; + + XML + + assert_raise(RuntimeError.new("entity expansion has grown too large")) do + REXML::Document.parse_stream(source, MyListener.new) + end + end + + def test_empty_value + source = <<-XML + + + + + + +]> + +&a; + + XML + + listener = MyListener.new + REXML::Security.entity_expansion_limit = 100000 + parser = REXML::Parsers::StreamParser.new( source, listener ) + parser.parse + assert_equal(11111, parser.entity_expansion_count) + + REXML::Security.entity_expansion_limit = @default_entity_expansion_limit + parser = REXML::Parsers::StreamParser.new( source, listener ) + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do + parser.parse + end + assert do + parser.entity_expansion_count > @default_entity_expansion_limit + end + end + + def test_with_default_entity + source = <<-XML + + + +]> + +&a; +&a2; +< + + XML + + listener = MyListener.new + REXML::Security.entity_expansion_limit = 4 + REXML::Document.parse_stream(source, listener) + + REXML::Security.entity_expansion_limit = 3 + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do + REXML::Document.parse_stream(source, listener) + end + end + def test_with_only_default_entities member_value = "<p>#{'A' * @default_entity_expansion_text_limit}</p>" source = <<-XML @@ -159,6 +234,32 @@ def text(text) listener.text_value.bytesize > @default_entity_expansion_text_limit end end + + def test_entity_expansion_text_limit + source = <<-XML + + + + + +]> +&a; + XML + + listener = MyListener.new + class << listener + attr_accessor :text_value + def text(text) + @text_value << text + end + end + listener.text_value = "" + REXML::Security.entity_expansion_text_limit = 90 + REXML::Document.parse_stream(source, listener) + + assert_equal(90, listener.text_value.size) + end end # For test_listener From 7a8f3e3059f42c66d76837d54bbf4804b15778f0 Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Mon, 19 Aug 2024 16:59:08 +0900 Subject: [PATCH 4/5] Update test_with_only_default_entities test case ## Why? Because `StreamParser#entity_expansion_count` was added. --- test/test_stream.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_stream.rb b/test/test_stream.rb index 4e769266..79e0fe9e 100644 --- a/test/test_stream.rb +++ b/test/test_stream.rb @@ -226,10 +226,12 @@ def text(text) end end listener.text_value = "" - REXML::Document.parse_stream(source, listener) + parser = REXML::Parsers::StreamParser.new( source, listener ) + parser.parse expected_value = "

#{'A' * @default_entity_expansion_text_limit}

" assert_equal(expected_value, listener.text_value.strip) + assert_equal(0, parser.entity_expansion_count) assert do listener.text_value.bytesize > @default_entity_expansion_text_limit end From c636358a04a27e19ee22bf7a2c9bf5e2afa3abb2 Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Wed, 21 Aug 2024 09:49:09 +0900 Subject: [PATCH 5/5] Changed the XML description of the test code to `here document` format. --- test/test_stream.rb | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/test/test_stream.rb b/test/test_stream.rb index 79e0fe9e..782066c2 100644 --- a/test/test_stream.rb +++ b/test/test_stream.rb @@ -89,11 +89,13 @@ def entity(content) end def test_entity_replacement - source = ' - - - ]>&la;&lala;' + source = <<-XML + + + +]>&la;&lala; + XML listener = MyListener.new class << listener