Skip to content

Commit

Permalink
Remove @buffer and process only use @scanner.
Browse files Browse the repository at this point in the history
Removed `attr_reader :buffer` and added similar `def buffer` and `def buffer=` interfaces.
  • Loading branch information
naitoh committed Jan 8, 2024
1 parent c8a1932 commit 8edd4ce
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 31 deletions.
2 changes: 1 addition & 1 deletion lib/rexml/parsers/baseparser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ def pull_event
else
@document_status = :after_doctype
if @source.encoding == "UTF-8"
@source.buffer.force_encoding(::Encoding::UTF_8)
@source.buffer = @source.buffer.force_encoding(::Encoding::UTF_8)
end
end
end
Expand Down
73 changes: 43 additions & 30 deletions lib/rexml/source.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ def SourceFactory::create_from(arg)
# objects and provides consumption of text
class Source
include Encoding
# The current buffer (what we're going to read next)
attr_reader :buffer
# The line number of the last consumed text
attr_reader :line
attr_reader :encoding
Expand All @@ -41,8 +39,8 @@ class Source
# @param encoding if non-null, sets the encoding of the source to this
# value, overriding all encoding detection
def initialize(arg, encoding=nil)
@orig = @buffer = arg
@scanner = StringScanner.new(@buffer)
@orig = arg
@scanner = StringScanner.new(@orig)
if encoding
self.encoding = encoding
else
Expand All @@ -51,6 +49,14 @@ def initialize(arg, encoding=nil)
@line = 0
end

# The current buffer (what we're going to read next)
def buffer
@scanner.rest
end

def buffer=(str)
@scanner.string = str
end

# Inherited from Encoding
# Overridden to support optimized en/decoding
Expand All @@ -63,54 +69,57 @@ def read
end

def match(pattern, cons=false)
@scanner.string = @buffer
@scanner.scan(pattern)
@buffer = @scanner.rest if cons and @scanner.matched?
if cons
@scanner.scan(pattern)
else
@scanner.check(pattern)
end
@scanner.matched? ? @scanner : nil
end

# @return true if the Source is exhausted
def empty?
@buffer == ""
@scanner.eos?
end

# @return the current line in the source
def current_line
lines = @orig.split
res = lines.grep @buffer[0..30]
res = lines.grep @scanner.rest[0..30]
res = res[-1] if res.kind_of? Array
lines.index( res ) if res
end

private

def detect_encoding
buffer_encoding = @buffer.encoding
scanner_encoding = @scanner.rest.encoding
detected_encoding = "UTF-8"
begin
@buffer.force_encoding("ASCII-8BIT")
if @buffer[0, 2] == "\xfe\xff"
@buffer[0, 2] = ""
@scanner.string = @scanner.rest.force_encoding("ASCII-8BIT")
if @scanner.rest[0, 2] == "\xfe\xff"
@scanner.string = @scanner.rest.delete_prefix("\xfe\xff")
detected_encoding = "UTF-16BE"
elsif @buffer[0, 2] == "\xff\xfe"
@buffer[0, 2] = ""
elsif @scanner.rest[0, 2] == "\xff\xfe"
@scanner.string = @scanner.rest.delete_prefix("\xff\xfe")
detected_encoding = "UTF-16LE"
elsif @buffer[0, 3] == "\xef\xbb\xbf"
@buffer[0, 3] = ""
elsif @scanner.rest[0, 3] == "\xef\xbb\xbf"
@scanner.string = @scanner.rest.delete_prefix("\xef\xbb\xbf")
detected_encoding = "UTF-8"
end
ensure
@buffer.force_encoding(buffer_encoding)
@scanner.string = @scanner.rest.force_encoding(scanner_encoding)
end
self.encoding = detected_encoding
end

def encoding_updated
if @encoding != 'UTF-8'
@buffer = decode(@buffer)
@scanner.string = decode(@scanner.rest)
@to_utf = true
else
@to_utf = false
@buffer.force_encoding ::Encoding::UTF_8
@scanner.string = @scanner.rest.force_encoding(::Encoding::UTF_8)
end
end
end
Expand All @@ -133,7 +142,7 @@ def initialize(arg, block_size=500, encoding=nil)
end

if !@to_utf and
@buffer.respond_to?(:force_encoding) and
@orig.respond_to?(:force_encoding) and
@source.respond_to?(:external_encoding) and
@source.external_encoding != ::Encoding::UTF_8
@force_utf8 = true
Expand All @@ -144,22 +153,26 @@ def initialize(arg, block_size=500, encoding=nil)

def read
begin
@buffer << readline
@scanner.string = @scanner.rest + readline
rescue Exception, NameError
@source = nil
end
end

def match( pattern, cons=false )
@scanner.string = @buffer
@scanner.scan(pattern)
@buffer = @scanner.rest if cons and @scanner.matched?
if cons
@scanner.scan(pattern)
else
@scanner.check(pattern)
end
while !@scanner.matched? and @source
begin
@buffer << readline
@scanner.string = @buffer
@scanner.scan(pattern)
@buffer = @scanner.rest if cons and @scanner.matched?
@scanner << readline
if cons
@scanner.scan(pattern)
else
@scanner.check(pattern)
end
rescue
@source = nil
end
Expand Down Expand Up @@ -222,7 +235,7 @@ def encoding_updated
@source.set_encoding(@encoding, @encoding)
end
@line_break = encode(">")
@pending_buffer, @buffer = @buffer, ""
@pending_buffer, @scanner.string = @scanner.rest, ""
@pending_buffer.force_encoding(@encoding)
super
end
Expand Down

0 comments on commit 8edd4ce

Please sign in to comment.