encoding.rb 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. # coding: US-ASCII
  2. ##
  3. # This class is a wrapper around File IO and Encoding that helps RDoc load
  4. # files and convert them to the correct encoding.
  5. module RDoc::Encoding
  6. ##
  7. # Reads the contents of +filename+ and handles any encoding directives in
  8. # the file.
  9. #
  10. # The content will be converted to the +encoding+. If the file cannot be
  11. # converted a warning will be printed and nil will be returned.
  12. #
  13. # If +force_transcode+ is true the document will be transcoded and any
  14. # unknown character in the target encoding will be replaced with '?'
  15. def self.read_file filename, encoding, force_transcode = false
  16. content = open filename, "rb" do |f| f.read end
  17. content.gsub!("\r\n", "\n") if RUBY_PLATFORM =~ /mswin|mingw/
  18. utf8 = content.sub!(/\A\xef\xbb\xbf/, '')
  19. RDoc::Encoding.set_encoding content
  20. if Object.const_defined? :Encoding then
  21. encoding ||= Encoding.default_external
  22. orig_encoding = content.encoding
  23. if utf8 then
  24. content.force_encoding Encoding::UTF_8
  25. content.encode! encoding
  26. else
  27. # assume the content is in our output encoding
  28. content.force_encoding encoding
  29. end
  30. unless content.valid_encoding? then
  31. # revert and try to transcode
  32. content.force_encoding orig_encoding
  33. content.encode! encoding
  34. end
  35. unless content.valid_encoding? then
  36. warn "unable to convert #{filename} to #{encoding}, skipping"
  37. content = nil
  38. end
  39. end
  40. content
  41. rescue ArgumentError => e
  42. raise unless e.message =~ /unknown encoding name - (.*)/
  43. warn "unknown encoding name \"#{$1}\" for #{filename}, skipping"
  44. nil
  45. rescue Encoding::UndefinedConversionError => e
  46. if force_transcode then
  47. content.force_encoding orig_encoding
  48. content.encode! encoding, :undef => :replace, :replace => '?'
  49. content
  50. else
  51. warn "unable to convert #{e.message} for #{filename}, skipping"
  52. nil
  53. end
  54. rescue Errno::EISDIR, Errno::ENOENT
  55. nil
  56. end
  57. ##
  58. # Sets the encoding of +string+ based on the magic comment
  59. def self.set_encoding string
  60. first_line = string[/\A(?:#!.*\n)?.*\n/]
  61. name = case first_line
  62. when /^<\?xml[^?]*encoding=(["'])(.*?)\1/ then $2
  63. when /\b(?:en)?coding[=:]\s*([^\s;]+)/i then $1
  64. else return
  65. end
  66. string.sub! first_line, ''
  67. return unless Object.const_defined? :Encoding
  68. enc = Encoding.find name
  69. string.force_encoding enc if enc
  70. end
  71. end