# File lib/html5/inputstream.rb, line 147 def detect_bom bom_dict = { "\xef\xbb\xbf" => 'utf-8', "\xff\xfe" => 'utf-16le', "\xfe\xff" => 'utf-16be', "\xff\xfe\x00\x00" => 'utf-32le', "\x00\x00\xfe\xff" => 'utf-32be' } # Go to beginning of file and read in 4 bytes string = @raw_stream.read(4) return nil unless string # Try detecting the BOM using bytes from the string encoding = bom_dict[string[0...3]] # UTF-8 seek = 3 unless encoding # Need to detect UTF-32 before UTF-16 encoding = bom_dict[string] # UTF-32 seek = 4 unless encoding encoding = bom_dict[string[0...2]] # UTF-16 seek = 2 end end # Set the read position past the BOM if one was found, otherwise # set it to the start of the stream seek(string, encoding ? seek : 0) return encoding end