def char
unless @queue.empty?
return @queue.shift
else
if @tell + 3 > @buffer.length && !@raw_stream.eof?
@buffer = @buffer[@tell..-1] + @raw_stream.read(@NUM_BYTES_BUFFER)
@tell = 0
end
c = @buffer[@tell]
@tell += 1
case c
when 0x01..0x7F
if c == 0x0D
@tell += 1 if @buffer[@tell] == 0x0A
c = 0x0A
end
if c == 0x0a
@line_lengths << @col
@line += 1
@col = 0
else
@col += 1
end
c.chr
when 0x80..0xBF
if !@win1252
[0xFFFD].pack('U')
elsif c <= 0x9f
[ENTITIES_WINDOWS1252[c-0x80]].pack('U')
else
"\xC2" + c.chr
end
when 0xC0..0xFF
if instance_variables.include?("@win1252") && @win1252
"\xC3" + (c - 64).chr
elsif @buffer[@tell - 1..@tell + 3] =~ /^
( [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
| [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
)/x
@tell += $1.length - 1
$1
else
[0xFFFD].pack('U')
end
when 0x00
@errors.push("null-character")
[0xFFFD].pack('U')
else
:EOF
end
end
end