Skip to content

Commit

Permalink
Improve BaseParser#unnormalize (#194)
Browse files Browse the repository at this point in the history
The current implementation of `#unnormalize` iterates over matched
entity references that already has been substituted. With these changes
we will reduce the number of redundant calls to `rv.gsub!`.

* Reject filtered matches earlier in the loop
* Improve `#unnormalize` by removing redundant calls to `rv.gsub!`
* Improve `entity_expansion_limit` tests

---

Example:

```ruby
require "rexml/parsers/baseparser"

entity_less_than = "<"
entitiy_length = 100

filler_text = "A"
filler_length = 100

feed = "#{entity_less_than * entitiy_length}#{filler_text * filler_length}"

base_parser = REXML::Parsers::BaseParser.new("")
base_parser.unnormalize(feed) # => "<" * 100 + "A" * 100
```

Before this PR, the example above would require 100 iterations. After
this PR, 1 iteration.

---------

Co-authored-by: Sutou Kouhei <kou@clear-code.com>
  • Loading branch information
vikiv480 and kou committed Aug 16, 2024
1 parent e14847c commit 2f019f9
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 25 deletions.
53 changes: 39 additions & 14 deletions lib/rexml/parsers/baseparser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,22 @@

module REXML
module Parsers
unless [].respond_to?(:tally)
module EnumerableTally
refine Enumerable do
def tally
counts = {}
each do |item|
counts[item] ||= 0
counts[item] += 1
end
counts
end
end
end
using EnumerableTally
end

if StringScanner::Version < "3.0.8"
module StringScannerCaptures
refine StringScanner do
Expand Down Expand Up @@ -547,20 +563,29 @@ def unnormalize( string, entities=nil, filter=nil )
[Integer(m)].pack('U*')
}
matches.collect!{|x|x[0]}.compact!
if filter
matches.reject! do |entity_reference|
filter.include?(entity_reference)
end
end
if matches.size > 0
matches.each do |entity_reference|
unless filter and filter.include?(entity_reference)
entity_value = entity( entity_reference, entities )
if entity_value
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
rv.gsub!( re, entity_value )
if rv.bytesize > Security.entity_expansion_text_limit
raise "entity expansion has grown too large"
end
else
er = DEFAULT_ENTITIES[entity_reference]
rv.gsub!( er[0], er[2] ) if er
matches.tally.each do |entity_reference, n|
entity_expansion_count_before = @entity_expansion_count
entity_value = entity( entity_reference, entities )
if entity_value
if n > 1
entity_expansion_count_delta =
@entity_expansion_count - entity_expansion_count_before
record_entity_expansion(entity_expansion_count_delta * (n - 1))
end
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
rv.gsub!( re, entity_value )
if rv.bytesize > Security.entity_expansion_text_limit
raise "entity expansion has grown too large"
end
else
er = DEFAULT_ENTITIES[entity_reference]
rv.gsub!( er[0], er[2] ) if er
end
end
rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
Expand All @@ -570,8 +595,8 @@ def unnormalize( string, entities=nil, filter=nil )

private

def record_entity_expansion
@entity_expansion_count += 1
def record_entity_expansion(delta=1)
@entity_expansion_count += delta
if @entity_expansion_count > Security.entity_expansion_limit
raise "number of entity expansions exceeded, processing aborted."
end
Expand Down
14 changes: 8 additions & 6 deletions test/test_pullparser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -206,21 +206,23 @@ def test_empty_value
</member>
XML

REXML::Security.entity_expansion_limit = 100000
parser = REXML::Parsers::PullParser.new(source)
assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
while parser.has_next?
parser.pull
end
while parser.has_next?
parser.pull
end
assert_equal(11111, parser.entity_expansion_count)

REXML::Security.entity_expansion_limit = 100
REXML::Security.entity_expansion_limit = @default_entity_expansion_limit
parser = REXML::Parsers::PullParser.new(source)
assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
while parser.has_next?
parser.pull
end
end
assert_equal(101, parser.entity_expansion_count)
assert do
parser.entity_expansion_count > @default_entity_expansion_limit
end
end

def test_with_default_entity
Expand Down
12 changes: 7 additions & 5 deletions test/test_sax.rb
Original file line number Diff line number Diff line change
Expand Up @@ -147,17 +147,19 @@ def test_empty_value
</member>
XML

REXML::Security.entity_expansion_limit = 100000
sax = REXML::Parsers::SAX2Parser.new(source)
assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
sax.parse
end
sax.parse
assert_equal(11111, sax.entity_expansion_count)

REXML::Security.entity_expansion_limit = 100
REXML::Security.entity_expansion_limit = @default_entity_expansion_limit
sax = REXML::Parsers::SAX2Parser.new(source)
assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
sax.parse
end
assert_equal(101, sax.entity_expansion_count)
assert do
sax.entity_expansion_count > @default_entity_expansion_limit
end
end

def test_with_default_entity
Expand Down

0 comments on commit 2f019f9

Please sign in to comment.