Skip to content

Commit

Permalink
Fix a bug that Stream parser doesn't expand the user-defined entity r…
Browse files Browse the repository at this point in the history
…eferences for "text" (#200)

## Why?
Pull parser expands character references and predefined entity
references, but doesn't expand user-defined entity references.

## Change
- text_stream_unnormalize.rb
```
$LOAD_PATH.unshift(File.expand_path("lib"))
require 'rexml/document'
require 'rexml/parsers/sax2parser'
require 'rexml/parsers/pullparser'
require 'rexml/parsers/streamparser'
require 'rexml/streamlistener'

xml = <<EOS
<!DOCTYPE foo [
  <!ENTITY la "1234">
  <!ENTITY lala "--&la;--">
  <!ENTITY lalal "&la;&la;">
]><root><la>&la;</la><lala>&lala;</lala><a>&lt;P&gt; &lt;I&gt; &lt;B&gt; Text &lt;/B&gt; &lt;/I&gt;</a><b>test&#8482;</b></root>
EOS

class StListener
  include REXML::StreamListener

  def text(text)
    puts text
  end
end

puts "REXML(DOM)"
REXML::Document.new(xml).elements.each("/root/*") {|element| puts element.text}

puts ""
puts "REXML(Pull)"
parser = REXML::Parsers::PullParser.new(xml)
while parser.has_next?
  event = parser.pull
  case event.event_type
  when :text
    puts event[1]
  end
end

puts ""
puts "REXML(Stream)"
parser = REXML::Parsers::StreamParser.new(xml, StListener.new).parse

puts ""
puts "REXML(SAX)"
sax = REXML::Parsers::SAX2Parser.new(xml)
sax.listen(:characters) {|x| puts x }
sax.parse
```

## Before (master)
```
$ ruby text_stream_unnormalize.rb
REXML(DOM)
1234
--1234--
<P> <I> <B> Text </B> </I>
test™

REXML(Pull)
1234
--1234--
<P> <I> <B> Text </B> </I>
test™

REXML(Stream)
&la;      #<= This
&lala;    #<= This
<P> <I> <B> Text </B> </I>
test™

REXML(SAX)
1234
--1234--
<P> <I> <B> Text </B> </I>
test™
```

## After(This PR)

```
$ ruby text_stream_unnormalize.rb
REXML(DOM)
1234
--1234--
<P> <I> <B> Text </B> </I>
test™

REXML(Pull)
1234
--1234--
<P> <I> <B> Text </B> </I>
test™

REXML(Stream)
1234
--1234--
<P> <I> <B> Text </B> </I>
test™

REXML(SAX)
1234
--1234--
<P> <I> <B> Text </B> </I>
test™
```
  • Loading branch information
naitoh committed Aug 21, 2024
1 parent cb15858 commit 6109e01
Show file tree
Hide file tree
Showing 2 changed files with 147 additions and 2 deletions.
8 changes: 7 additions & 1 deletion lib/rexml/parsers/streamparser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,17 @@ class StreamParser
def initialize source, listener
@listener = listener
@parser = BaseParser.new( source )
@entities = {}
end

def add_listener( listener )
@parser.add_listener( listener )
end

def entity_expansion_count
@parser.entity_expansion_count
end

def parse
# entity string
while true
Expand All @@ -28,7 +33,7 @@ def parse
when :end_element
@listener.tag_end( event[1] )
when :text
unnormalized = @parser.unnormalize( event[1] )
unnormalized = @parser.unnormalize( event[1], @entities )
@listener.text( unnormalized )
when :processing_instruction
@listener.instruction( *event[1,2] )
Expand All @@ -40,6 +45,7 @@ def parse
when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
@listener.send( event[0].to_s, *event[1..-1] )
when :entitydecl, :notationdecl
@entities[ event[1] ] = event[2] if event.size == 3
@listener.send( event[0].to_s, event[1..-1] )
when :externalentity
entity_reference = event[1]
Expand Down
141 changes: 140 additions & 1 deletion test/test_stream.rb
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,42 @@ def entity(content)

assert_equal(["ISOLat2"], listener.entities)
end

def test_entity_replacement
source = <<-XML
<!DOCTYPE foo [
<!ENTITY la "1234">
<!ENTITY lala "--&la;--">
<!ENTITY lalal "&la;&la;">
]><a><la>&la;</la><lala>&lala;</lala></a>
XML

listener = MyListener.new
class << listener
attr_accessor :text_values
def text(text)
@text_values << text
end
end
listener.text_values = []
REXML::Document.parse_stream(source, listener)
assert_equal(["1234", "--1234--"], listener.text_values)
end

def test_characters_predefined_entities
source = '<root><a>&lt;P&gt; &lt;I&gt; &lt;B&gt; Text &lt;/B&gt; &lt;/I&gt;</a></root>'

listener = MyListener.new
class << listener
attr_accessor :text_value
def text(text)
@text_value << text
end
end
listener.text_value = ""
REXML::Document.parse_stream(source, listener)
assert_equal("<P> <I> <B> Text </B> </I>", listener.text_value)
end
end

class EntityExpansionLimitTest < Test::Unit::TestCase
Expand All @@ -100,6 +136,81 @@ def teardown
REXML::Security.entity_expansion_text_limit = @default_entity_expansion_text_limit
end

def test_have_value
source = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE member [
<!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;">
<!ENTITY b "&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;">
<!ENTITY c "&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;">
<!ENTITY d "&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;">
<!ENTITY e "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx">
]>
<member>
&a;
</member>
XML

assert_raise(RuntimeError.new("entity expansion has grown too large")) do
REXML::Document.parse_stream(source, MyListener.new)
end
end

def test_empty_value
source = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE member [
<!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;">
<!ENTITY b "&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;">
<!ENTITY c "&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;">
<!ENTITY d "&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;">
<!ENTITY e "">
]>
<member>
&a;
</member>
XML

listener = MyListener.new
REXML::Security.entity_expansion_limit = 100000
parser = REXML::Parsers::StreamParser.new( source, listener )
parser.parse
assert_equal(11111, parser.entity_expansion_count)

REXML::Security.entity_expansion_limit = @default_entity_expansion_limit
parser = REXML::Parsers::StreamParser.new( source, listener )
assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
parser.parse
end
assert do
parser.entity_expansion_count > @default_entity_expansion_limit
end
end

def test_with_default_entity
source = <<-XML
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE member [
<!ENTITY a "a">
<!ENTITY a2 "&a; &a;">
]>
<member>
&a;
&a2;
&lt;
</member>
XML

listener = MyListener.new
REXML::Security.entity_expansion_limit = 4
REXML::Document.parse_stream(source, listener)

REXML::Security.entity_expansion_limit = 3
assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
REXML::Document.parse_stream(source, listener)
end
end

def test_with_only_default_entities
member_value = "&lt;p&gt;#{'A' * @default_entity_expansion_text_limit}&lt;/p&gt;"
source = <<-XML
Expand All @@ -117,14 +228,42 @@ def text(text)
end
end
listener.text_value = ""
REXML::Document.parse_stream(source, listener)
parser = REXML::Parsers::StreamParser.new( source, listener )
parser.parse

expected_value = "<p>#{'A' * @default_entity_expansion_text_limit}</p>"
assert_equal(expected_value, listener.text_value.strip)
assert_equal(0, parser.entity_expansion_count)
assert do
listener.text_value.bytesize > @default_entity_expansion_text_limit
end
end

def test_entity_expansion_text_limit
source = <<-XML
<!DOCTYPE member [
<!ENTITY a "&b;&b;&b;">
<!ENTITY b "&c;&d;&e;">
<!ENTITY c "xxxxxxxxxx">
<!ENTITY d "yyyyyyyyyy">
<!ENTITY e "zzzzzzzzzz">
]>
<member>&a;</member>
XML

listener = MyListener.new
class << listener
attr_accessor :text_value
def text(text)
@text_value << text
end
end
listener.text_value = ""
REXML::Security.entity_expansion_text_limit = 90
REXML::Document.parse_stream(source, listener)

assert_equal(90, listener.text_value.size)
end
end

# For test_listener
Expand Down

0 comments on commit 6109e01

Please sign in to comment.