From d1c62de9c5976bd0785abbb0a67f1b94e8f1f953 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Mon, 26 Jun 2023 17:25:01 -0400 Subject: [PATCH 1/7] ci: update suppression stack signature something changed recently, not sure if it's the compiler in CI, or the library, or a combination. (cherry picked from commit 871cd51c5ed0dac96e99ed202d386006a23d6aeb) --- suppressions/ruby.supp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/suppressions/ruby.supp b/suppressions/ruby.supp index 806656fee7..13a5e0a967 100644 --- a/suppressions/ruby.supp +++ b/suppressions/ruby.supp @@ -98,7 +98,7 @@ fun:evaluate } { - TODO + https://github.com/sparklemotion/nokogiri/actions/runs/5354163940/jobs/9710862134 # 240 (120 direct, 120 indirect) bytes in 1 blocks are definitely lost in loss record 28,980 of 37,883 # *xmlNewNodeEatName (tree.c:2299) # *xmlNewDocNodeEatName (tree.c:2374) @@ -117,7 +117,7 @@ fun:xmlNewNodeEatName fun:xmlNewDocNodeEatName fun:xmlSAX2StartElementNs - fun:xmlParseStartTag2 + fun:xmlParseStartTag* fun:xmlParseElementStart fun:xmlParseContentInternal fun:xmlParseElement From 8d8c728890dbbea0550aabd8081d4d9a129d5591 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 3 Aug 2023 17:52:36 -0400 Subject: [PATCH 2/7] style: prefer Minitest to MiniTest since Minitest 5.19 doesn't load it by default (cherry picked from commit a29042b0c0dbdc1b488a97dd6d6dd88d4b583c05) --- .github/ISSUE_TEMPLATE/1-bug-report.md | 2 +- CONTRIBUTING.md | 2 +- test/helper.rb | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1-bug-report.md b/.github/ISSUE_TEMPLATE/1-bug-report.md index 297cd4de42..79d4bf6ff1 100644 --- a/.github/ISSUE_TEMPLATE/1-bug-report.md +++ b/.github/ISSUE_TEMPLATE/1-bug-report.md @@ -32,7 +32,7 @@ Here's an example of how you might structure such a script: require 'nokogiri' require 'minitest/autorun' -class Test < MiniTest::Spec +class Test < Minitest::Spec describe "Node#css" do it "should find a div using chained classes" do html = <<~HEREDOC diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 336c9c5c14..a0f844d16e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -124,7 +124,7 @@ Note that `rake test` does not compile the native extension, and this is intenti bundle exec rake compile test ``` -To run a focused test, use MiniTest's `TESTOPTS`: +To run a focused test, use Minitest's `TESTOPTS`: ``` sh bundle exec rake compile test TESTOPTS="-n/test_last_element_child/" diff --git a/test/helper.rb b/test/helper.rb index ed3133c6e8..8d4569e87c 100644 --- a/test/helper.rb +++ b/test/helper.rb @@ -128,7 +128,7 @@ class TestBenchmark < Minitest::BenchSpec end # rubocop:disable Style/ClassVars - class TestCase < MiniTest::Spec + class TestCase < Minitest::Spec include TestBase COMPACT_EVERY = 20 @@ -272,7 +272,7 @@ def assert_not_send(send_ary, m = nil) def pending(msg) begin yield - rescue MiniTest::Assertion + rescue Minitest::Assertion skip("pending #{msg} [#{caller(2..2).first}]") end flunk("pending test unexpectedly passed: #{msg} [#{caller(1..1).first}]") From a6fc29bbcf0fe3f4ffa17c2fd82beba905eb3144 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 3 Aug 2023 18:00:07 -0400 Subject: [PATCH 3/7] ci: ruby-saml's downstream test suite needs minitest compat (cherry picked from commit 87d3c11f52a2e704fc81ec0393473447afde32c9) --- .github/workflows/downstream.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/downstream.yml b/.github/workflows/downstream.yml index 4bc5c244c6..7faf9ae807 100644 --- a/.github/workflows/downstream.yml +++ b/.github/workflows/downstream.yml @@ -47,7 +47,7 @@ jobs: command: "bundle exec rake spec" - url: https://github.com/SAML-Toolkits/ruby-saml name: ruby-saml - command: "bundle exec rake test" + command: "bundle exec rake test MT_COMPAT=t" # - url: https://github.com/instructure/nokogiri-xmlsec-instructure # name: nokogiri-xmlsec-instructure # precommand: "apt install -y libxmlsec1-dev" From 1617d541353bcb5ee18afadf6311ed50233534e0 Mon Sep 17 00:00:00 2001 From: Anish Athalye Date: Thu, 13 Jul 2023 07:33:01 -0400 Subject: [PATCH 4/7] Fix typo (cherry picked from commit 5b65501ead7b198fc4a86cb90c532e1a6945e5a5) --- gumbo-parser/src/error.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gumbo-parser/src/error.c b/gumbo-parser/src/error.c index c26803b763..8b45a42bf9 100644 --- a/gumbo-parser/src/error.c +++ b/gumbo-parser/src/error.c @@ -357,7 +357,7 @@ static void handle_parser_error ( print_tag_stack(error, output); return; case GUMBO_TOKEN_END_TAG: - print_message(output, "Eng tag '%s' isn't allowed here.", + print_message(output, "End tag '%s' isn't allowed here.", gumbo_normalized_tagname(error->input_tag)); print_tag_stack(error, output); return; From c39ec3033ed40a4df87173ab1e715440942cd204 Mon Sep 17 00:00:00 2001 From: Stephen Checkoway Date: Thu, 6 Jul 2023 14:00:12 -0400 Subject: [PATCH 5/7] fix memsize_node when called on xmlAttrs The `properties` field of an `xmlNode` element points to an `xmlAttr`. The first few fields of `xmlAttr` are in common with `xmlNode`, but not the `properties` field which doesn't exist in an `xmlAttr`. The `memsize_node` function was passing an `xmlAttr` to a recursive call and then trying to do the same with the properties of that. This led to type confusion and subsequent crashes. Fixes: #2923 (cherry picked from commit 81762fa0b306a7d25ef22a303ff9dd6e9bd94ffd) --- ext/nokogiri/xml_document.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ext/nokogiri/xml_document.c b/ext/nokogiri/xml_document.c index 442884daae..c6856f979a 100644 --- a/ext/nokogiri/xml_document.c +++ b/ext/nokogiri/xml_document.c @@ -103,8 +103,11 @@ memsize_node(const xmlNodePtr node) size_t memsize = 0; memsize += xmlStrlen(node->name); - for (child = (xmlNodePtr)node->properties; child; child = child->next) { - memsize += sizeof(xmlAttr) + memsize_node(child); + + if (node->type == XML_ELEMENT_NODE) { + for (child = (xmlNodePtr)node->properties; child; child = child->next) { + memsize += sizeof(xmlAttr) + memsize_node(child); + } } if (node->type == XML_TEXT_NODE) { memsize += xmlStrlen(node->content); From 14d1f5a2a11e2cdad540ee33b05985e4301e7b4b Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sat, 8 Jul 2023 09:47:59 -0400 Subject: [PATCH 6/7] test: add coverage for the memsize_of bug which turns out to be because xmlDTD has "attributes" where xmlNode has "properties" (cherry picked from commit 19a64ba54f7673729ebae4d572711259223fbce8) --- ext/nokogiri/xml_document.c | 5 +++-- test/test_memory_leak.rb | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/ext/nokogiri/xml_document.c b/ext/nokogiri/xml_document.c index c6856f979a..bfaaf94f54 100644 --- a/ext/nokogiri/xml_document.c +++ b/ext/nokogiri/xml_document.c @@ -100,13 +100,14 @@ memsize_node(const xmlNodePtr node) { /* note we don't count namespace definitions, just going for a good-enough number here */ xmlNodePtr child; + xmlAttrPtr property; size_t memsize = 0; memsize += xmlStrlen(node->name); if (node->type == XML_ELEMENT_NODE) { - for (child = (xmlNodePtr)node->properties; child; child = child->next) { - memsize += sizeof(xmlAttr) + memsize_node(child); + for (property = node->properties; property; property = property->next) { + memsize += sizeof(xmlAttr) + memsize_node((xmlNodePtr)property); } } if (node->type == XML_TEXT_NODE) { diff --git a/test/test_memory_leak.rb b/test/test_memory_leak.rb index 3b8dc32d1e..14d0889b25 100644 --- a/test/test_memory_leak.rb +++ b/test/test_memory_leak.rb @@ -313,6 +313,21 @@ def test_object_space_memsize_of assert(bigger_name_size > base_size, "longer tags should increase memsize") end + def test_object_space_memsize_with_dtd + # https://github.com/sparklemotion/nokogiri/issues/2923 + require "objspace" + skip("memsize_of not defined") unless ObjectSpace.respond_to?(:memsize_of) + + doc = Nokogiri::XML(<<~XML) + + + ]> + + XML + ObjectSpace.memsize_of(doc) # assert_does_not_crash + end + module MemInfo # from https://stackoverflow.com/questions/7220896/get-current-ruby-process-memory-usage # this is only going to work on linux From 8460bfed0bbded56e4fbf9bb7982740ff05a0f78 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 10 Aug 2023 15:26:06 -0400 Subject: [PATCH 7/7] dep: update libxml2 to v2.11.5 https://gitlab.gnome.org/GNOME/libxml2/-/releases/v2.11.5 (cherry picked from commit f7940b7a0bcf730f1ee11285c2ff165eca58a017) --- CHANGELOG.md | 13 +++++++++++++ dependencies.yml | 6 +++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7aa1c3906e..a319ffe505 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,19 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA --- +## next / unreleased + +### Dependencies + +* [CRuby] Vendored libxml2 is updated to v2.11.5 from v2.11.4. For details please see https://gitlab.gnome.org/GNOME/libxml2/-/releases/v2.11.5 + + +### Fixed + +* Fixed a typo in a HTML5 parser error message. [[#2927](https://github.com/sparklemotion/nokogiri/issues/2927)] (Thanks, [@anishathalye](https://github.com/anishathalye)!) +* [CRuby] `ObjectSpace.memsize_of` is now safe to call on `Document`s with complex DTDs. In previous versions, this debugging method could result in a segfault. [[#2923](https://github.com/sparklemotion/nokogiri/issues/2923), [#2924](https://github.com/sparklemotion/nokogiri/issues/2924)] + + ## 1.15.3 / 2023-07-05 ### Fixed diff --git a/dependencies.yml b/dependencies.yml index cbe69b3d81..dd5597e051 100644 --- a/dependencies.yml +++ b/dependencies.yml @@ -1,7 +1,7 @@ libxml2: - version: "2.11.4" - sha256: "737e1d7f8ab3f139729ca13a2494fd17bf30ddb4b7a427cf336252cab57f57f7" - # sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.11/libxml2-2.11.4.sha256sum + version: "2.11.5" + sha256: "3727b078c360ec69fa869de14bd6f75d7ee8d36987b071e6928d4720a28df3a6" + # sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.11/libxml2-2.11.5.sha256sum libxslt: version: "1.1.38"