Skip to content

Commit

Permalink
Fix to not allow parameter entity references at internal subsets (#191)
Browse files Browse the repository at this point in the history
## Why?
In the internal subset of DTD, references to parameter entities are not
allowed within markup declarations.

See: https://www.w3.org/TR/xml/#wfc-PEinInternalSubset

> Well-formedness constraint: PEs in Internal Subset

> In the internal DTD subset, parameter-entity references MUST NOT occur
within markup declarations; they may occur where markup declarations can
occur. (This does not apply to references that occur in external
parameter entities or to the external subset.)
  • Loading branch information
naitoh committed Aug 17, 2024
1 parent 1c76dbb commit c8110b4
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 118 deletions.
52 changes: 5 additions & 47 deletions lib/rexml/entity.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@ class Entity < Child
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
NDATADECL = "\\s+NDATA\\s+#{NAME}"
PEREFERENCE = "%#{NAME};"
PEREFERENCE_RE = /#{PEREFERENCE}/um
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um

attr_reader :name, :external, :ref, :ndata, :pubid
attr_reader :name, :external, :ref, :ndata, :pubid, :value

# Create a new entity. Simple entities can be constructed by passing a
# name, value to the constructor; this creates a generic, plain entity
Expand Down Expand Up @@ -68,14 +69,11 @@ def Entity::matches? string
end

# Evaluates to the unnormalized value of this entity; that is, replacing
# all entities -- both %ent; and &ent; entities. This differs from
# +value()+ in that +value+ only replaces %ent; entities.
# &ent; entities.
def unnormalized
document.record_entity_expansion unless document.nil?
v = value()
return nil if v.nil?
@unnormalized = Text::unnormalize(v, parent)
@unnormalized
return nil if @value.nil?
@unnormalized = Text::unnormalize(@value, parent)
end

#once :unnormalized
Expand Down Expand Up @@ -121,46 +119,6 @@ def to_s
write rv
rv
end

PEREFERENCE_RE = /#{PEREFERENCE}/um
# Returns the value of this entity. At the moment, only internal entities
# are processed. If the value contains internal references (IE,
# %blah;), those are replaced with their values. IE, if the doctype
# contains:
# <!ENTITY % foo "bar">
# <!ENTITY yada "nanoo %foo; nanoo>
# then:
# doctype.entity('yada').value #-> "nanoo bar nanoo"
def value
@resolved_value ||= resolve_value
end

def parent=(other)
@resolved_value = nil
super
end

private
def resolve_value
return nil if @value.nil?
return @value unless @value.match?(PEREFERENCE_RE)

matches = @value.scan(PEREFERENCE_RE)
rv = @value.clone
if @parent
sum = 0
matches.each do |entity_reference|
entity_value = @parent.entity( entity_reference[0] )
if sum + entity_value.bytesize > Security.entity_expansion_text_limit
raise "entity expansion has grown too large"
else
sum += entity_value.bytesize
end
rv.gsub!( /%#{entity_reference.join};/um, entity_value )
end
end
rv
end
end

# This is a set of entity constants -- the ones defined in the XML
Expand Down
3 changes: 3 additions & 0 deletions lib/rexml/parsers/baseparser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ class BaseParser
}

module Private
PEREFERENCE_PATTERN = /#{PEREFERENCE}/um
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
Expand Down Expand Up @@ -350,6 +351,8 @@ def pull_event
match[4] = match[4][1..-2] # HREF
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
elsif Private::PEREFERENCE_PATTERN.match?(match[2])
raise REXML::ParseException.new("Parameter entity references forbidden in internal subset: #{match[2]}", @source)
else
match[2] = match[2][1..-2]
match.pop if match.size == 4
Expand Down
50 changes: 0 additions & 50 deletions test/test_document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -147,56 +147,6 @@ def test_entity_expansion_text_limit
assert_equal(90, doc.root.children.first.value.bytesize)
end
end

class ParameterEntityTest < self
def test_have_value
xml = <<EOF
<!DOCTYPE root [
<!ENTITY % a "BOOM.BOOM.BOOM.BOOM.BOOM.BOOM.BOOM.BOOM.BOOM.">
<!ENTITY % b "%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;">
<!ENTITY % c "%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;">
<!ENTITY % d "%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;">
<!ENTITY % e "%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;">
<!ENTITY % f "%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;">
<!ENTITY % g "%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;">
<!ENTITY test "test %g;">
]>
<cd></cd>
EOF

assert_raise(REXML::ParseException) do
REXML::Document.new(xml)
end
REXML::Security.entity_expansion_limit = 100
assert_equal(100, REXML::Security.entity_expansion_limit)
assert_raise(REXML::ParseException) do
REXML::Document.new(xml)
end
end

def test_empty_value
xml = <<EOF
<!DOCTYPE root [
<!ENTITY % a "">
<!ENTITY % b "%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;">
<!ENTITY % c "%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;">
<!ENTITY % d "%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;">
<!ENTITY % e "%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;">
<!ENTITY % f "%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;">
<!ENTITY % g "%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;">
<!ENTITY test "test %g;">
]>
<cd></cd>
EOF

REXML::Document.new(xml)
REXML::Security.entity_expansion_limit = 90
assert_equal(90, REXML::Security.entity_expansion_limit)
assert_raise(REXML::ParseException) do
REXML::Document.new(xml)
end
end
end
end

def test_tag_in_cdata_with_not_ascii_only_but_ascii8bit_encoding_source
Expand Down
102 changes: 81 additions & 21 deletions test/test_entity.rb
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,7 @@ def test_parse_entity

def test_constructor
one = [ %q{<!ENTITY % YN '"Yes"'>},
%q{<!ENTITY % YN2 "Yes">},
%q{<!ENTITY WhatHeSaid "He said %YN;">},
%q{<!ENTITY WhatHeSaid 'He said "Yes"'>},
'<!ENTITY open-hatch
SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">',
'<!ENTITY open-hatch2
Expand All @@ -71,8 +70,7 @@ def test_constructor
NDATA gif>' ]
source = %q{<!DOCTYPE foo [
<!ENTITY % YN '"Yes"'>
<!ENTITY % YN2 "Yes">
<!ENTITY WhatHeSaid "He said %YN;">
<!ENTITY WhatHeSaid 'He said "Yes"'>
<!ENTITY open-hatch
SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
<!ENTITY open-hatch2
Expand Down Expand Up @@ -104,6 +102,84 @@ def test_replace_entities
assert_equal source, out
end

def test_readers_with_reference
entity = REXML::Entity.new([:entitydecl, "a", "B", "%"])
assert_equal([
'<!ENTITY % a "B">',
"a",
"B",
"B",
"B",
],
[
entity.to_s,
entity.name,
entity.value,
entity.normalized,
entity.unnormalized,
])
end

def test_readers_without_reference
entity = REXML::Entity.new([:entitydecl, "a", "&b;"])
assert_equal([
'<!ENTITY a "&b;">',
"a",
"&b;",
"&b;",
"&b;",
],
[
entity.to_s,
entity.name,
entity.value,
entity.normalized,
entity.unnormalized,
])
end

def test_readers_with_nested_references
doctype = REXML::DocType.new('root')
doctype.add(REXML::Entity.new([:entitydecl, "a", "&b;"]))
doctype.add(REXML::Entity.new([:entitydecl, "b", "X"]))
assert_equal([
"a",
"&b;",
"&b;",
"X",
"b",
"X",
"X",
"X",
],
[
doctype.entities["a"].name,
doctype.entities["a"].value,
doctype.entities["a"].normalized,
doctype.entities["a"].unnormalized,
doctype.entities["b"].name,
doctype.entities["b"].value,
doctype.entities["b"].normalized,
doctype.entities["b"].unnormalized,
])
end

def test_parameter_entity_reference_forbidden_by_internal_subset_in_parser
source = '<!DOCTYPE root [ <!ENTITY % a "B" > <!ENTITY c "%a;" > ]><root/>'
parser = REXML::Parsers::BaseParser.new(source)
exception = assert_raise(REXML::ParseException) do
while parser.has_next?
parser.pull
end
end
assert_equal(<<-DETAIL, exception.to_s)
Parameter entity references forbidden in internal subset: "%a;"
Line: 1
Position: 54
Last 80 unconsumed characters:
DETAIL
end

def test_entity_string_limit
template = '<!DOCTYPE bomb [ <!ENTITY a "^" > ]> <bomb>$</bomb>'
len = 5120 # 5k per entity
Expand All @@ -122,22 +198,6 @@ def test_entity_string_limit
end
end

def test_entity_string_limit_for_parameter_entity
template = '<!DOCTYPE bomb [ <!ENTITY % a "^" > <!ENTITY bomb "$" > ]><root/>'
len = 5120 # 5k per entity
template.sub!(/\^/, "B" * len)

# 10k is OK
entities = '%a;' * 2 # 5k entity * 2 = 10k
REXML::Document.new(template.sub(/\$/, entities))

# above 10k explodes
entities = '%a;' * 3 # 5k entity * 2 = 15k
assert_raise(REXML::ParseException) do
REXML::Document.new(template.sub(/\$/, entities))
end
end

def test_raw
source = '<!DOCTYPE foo [
<!ENTITY ent "replace">
Expand All @@ -161,7 +221,7 @@ def test_lazy_evaluation
def test_entity_replacement
source = %q{<!DOCTYPE foo [
<!ENTITY % YN '"Yes"'>
<!ENTITY WhatHeSaid "He said %YN;">]>
<!ENTITY WhatHeSaid 'He said "Yes"'>]>
<a>&WhatHeSaid;</a>}

d = REXML::Document.new( source )
Expand Down

0 comments on commit c8110b4

Please sign in to comment.