encodings.rb 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274
  1. # encoding: utf-8
  2. module Mail
  3. # Raised when attempting to decode an unknown encoding type
  4. class UnknownEncodingType < StandardError #:nodoc:
  5. end
  6. module Encodings
  7. include Mail::Patterns
  8. extend Mail::Utilities
  9. @transfer_encodings = {}
  10. # Register transfer encoding
  11. #
  12. # Example
  13. #
  14. # Encodings.register "base64", Mail::Encodings::Base64
  15. def Encodings.register(name, cls)
  16. @transfer_encodings[get_name(name)] = cls
  17. end
  18. # Is the encoding we want defined?
  19. #
  20. # Example:
  21. #
  22. # Encodings.defined?(:base64) #=> true
  23. def Encodings.defined?( str )
  24. @transfer_encodings.include? get_name(str)
  25. end
  26. # Gets a defined encoding type, QuotedPrintable or Base64 for now.
  27. #
  28. # Each encoding needs to be defined as a Mail::Encodings::ClassName for
  29. # this to work, allows us to add other encodings in the future.
  30. #
  31. # Example:
  32. #
  33. # Encodings.get_encoding(:base64) #=> Mail::Encodings::Base64
  34. def Encodings.get_encoding( str )
  35. @transfer_encodings[get_name(str)]
  36. end
  37. def Encodings.get_all
  38. @transfer_encodings.values
  39. end
  40. def Encodings.get_name(enc)
  41. enc = enc.to_s.gsub("-", "_").downcase
  42. end
  43. # Encodes a parameter value using URI Escaping, note the language field 'en' can
  44. # be set using Mail::Configuration, like so:
  45. #
  46. # Mail.defaults.do
  47. # param_encode_language 'jp'
  48. # end
  49. #
  50. # The character set used for encoding will either be the value of $KCODE for
  51. # Ruby < 1.9 or the encoding on the string passed in.
  52. #
  53. # Example:
  54. #
  55. # Mail::Encodings.param_encode("This is fun") #=> "us-ascii'en'This%20is%20fun"
  56. def Encodings.param_encode(str)
  57. case
  58. when str.ascii_only? && str =~ TOKEN_UNSAFE
  59. %Q{"#{str}"}
  60. when str.ascii_only?
  61. str
  62. else
  63. RubyVer.param_encode(str)
  64. end
  65. end
  66. # Decodes a parameter value using URI Escaping.
  67. #
  68. # Example:
  69. #
  70. # Mail::Encodings.param_decode("This%20is%20fun", 'us-ascii') #=> "This is fun"
  71. #
  72. # str = Mail::Encodings.param_decode("This%20is%20fun", 'iso-8559-1')
  73. # str.encoding #=> 'ISO-8859-1' ## Only on Ruby 1.9
  74. # str #=> "This is fun"
  75. def Encodings.param_decode(str, encoding)
  76. RubyVer.param_decode(str, encoding)
  77. end
  78. # Decodes or encodes a string as needed for either Base64 or QP encoding types in
  79. # the =?<encoding>?[QB]?<string>?=" format.
  80. #
  81. # The output type needs to be :decode to decode the input string or :encode to
  82. # encode the input string. The character set used for encoding will either be
  83. # the value of $KCODE for Ruby < 1.9 or the encoding on the string passed in.
  84. #
  85. # On encoding, will only send out Base64 encoded strings.
  86. def Encodings.decode_encode(str, output_type)
  87. case
  88. when output_type == :decode
  89. Encodings.value_decode(str)
  90. else
  91. if str.ascii_only?
  92. str
  93. else
  94. Encodings.b_value_encode(str, find_encoding(str))
  95. end
  96. end
  97. end
  98. # Decodes a given string as Base64 or Quoted Printable, depending on what
  99. # type it is.
  100. #
  101. # String has to be of the format =?<encoding>?[QB]?<string>?=
  102. def Encodings.value_decode(str)
  103. # Optimization: If there's no encoded-words in the string, just return it
  104. return str unless str.index("=?")
  105. str = str.gsub(/\?=(\s*)=\?/, '?==?') # Remove whitespaces between 'encoded-word's
  106. # Split on white-space boundaries with capture, so we capture the white-space as well
  107. str.split(/([ \t])/).map do |text|
  108. if text.index('=?') .nil?
  109. text
  110. else
  111. # Join QP encoded-words that are adjacent to avoid decoding partial chars
  112. text.gsub!(/\?\=\=\?.+?\?[Qq]\?/m, '') if text =~ /\?==\?/
  113. # Search for occurences of quoted strings or plain strings
  114. text.scan(/( # Group around entire regex to include it in matches
  115. \=\?[^?]+\?([QB])\?[^?]+?\?\= # Quoted String with subgroup for encoding method
  116. | # or
  117. .+?(?=\=\?|$) # Plain String
  118. )/xmi).map do |matches|
  119. string, method = *matches
  120. if method == 'b' || method == 'B'
  121. b_value_decode(string)
  122. elsif method == 'q' || method == 'Q'
  123. q_value_decode(string)
  124. else
  125. string
  126. end
  127. end
  128. end
  129. end.join("")
  130. end
  131. # Takes an encoded string of the format =?<encoding>?[QB]?<string>?=
  132. def Encodings.unquote_and_convert_to(str, to_encoding)
  133. original_encoding = split_encoding_from_string( str )
  134. output = value_decode( str ).to_s
  135. if original_encoding.to_s.downcase.gsub("-", "") == to_encoding.to_s.downcase.gsub("-", "")
  136. output
  137. elsif original_encoding && to_encoding
  138. begin
  139. if RUBY_VERSION >= '1.9'
  140. output.encode(to_encoding)
  141. else
  142. require 'iconv'
  143. Iconv.iconv(to_encoding, original_encoding, output).first
  144. end
  145. rescue Iconv::IllegalSequence, Iconv::InvalidEncoding, Errno::EINVAL
  146. # the 'from' parameter specifies a charset other than what the text
  147. # actually is...not much we can do in this case but just return the
  148. # unconverted text.
  149. #
  150. # Ditto if either parameter represents an unknown charset, like
  151. # X-UNKNOWN.
  152. output
  153. end
  154. else
  155. output
  156. end
  157. end
  158. def Encodings.address_encode(address, charset = 'utf-8')
  159. if address.is_a?(Array)
  160. # loop back through for each element
  161. address.map { |a| Encodings.address_encode(a, charset) }.join(", ")
  162. else
  163. # find any word boundary that is not ascii and encode it
  164. encode_non_usascii(address, charset)
  165. end
  166. end
  167. def Encodings.encode_non_usascii(address, charset)
  168. return address if address.ascii_only? or charset.nil?
  169. us_ascii = %Q{\x00-\x7f}
  170. # Encode any non usascii strings embedded inside of quotes
  171. address.gsub!(/(".*?[^#{us_ascii}].*?")/) { |s| Encodings.b_value_encode(unquote(s), charset) }
  172. # Then loop through all remaining items and encode as needed
  173. tokens = address.split(/\s/)
  174. map_with_index(tokens) do |word, i|
  175. if word.ascii_only?
  176. word
  177. else
  178. previous_non_ascii = tokens[i-1] && !tokens[i-1].ascii_only?
  179. if previous_non_ascii
  180. word = " #{word}"
  181. end
  182. Encodings.b_value_encode(word, charset)
  183. end
  184. end.join(' ')
  185. end
  186. # Encode a string with Base64 Encoding and returns it ready to be inserted
  187. # as a value for a field, that is, in the =?<charset>?B?<string>?= format
  188. #
  189. # Example:
  190. #
  191. # Encodings.b_value_encode('This is あ string', 'UTF-8')
  192. # #=> "=?UTF-8?B?VGhpcyBpcyDjgYIgc3RyaW5n?="
  193. def Encodings.b_value_encode(encoded_str, encoding = nil)
  194. return encoded_str if encoded_str.to_s.ascii_only?
  195. string, encoding = RubyVer.b_value_encode(encoded_str, encoding)
  196. map_lines(string) do |str|
  197. "=?#{encoding}?B?#{str.chomp}?="
  198. end.join(" ")
  199. end
  200. # Encode a string with Quoted-Printable Encoding and returns it ready to be inserted
  201. # as a value for a field, that is, in the =?<charset>?Q?<string>?= format
  202. #
  203. # Example:
  204. #
  205. # Encodings.q_value_encode('This is あ string', 'UTF-8')
  206. # #=> "=?UTF-8?Q?This_is_=E3=81=82_string?="
  207. def Encodings.q_value_encode(encoded_str, encoding = nil)
  208. return encoded_str if encoded_str.to_s.ascii_only?
  209. string, encoding = RubyVer.q_value_encode(encoded_str, encoding)
  210. string.gsub!("=\r\n", '') # We already have limited the string to the length we want
  211. map_lines(string) do |str|
  212. "=?#{encoding}?Q?#{str.chomp.gsub(/ /, '_')}?="
  213. end.join(" ")
  214. end
  215. private
  216. # Decodes a Base64 string from the "=?UTF-8?B?VGhpcyBpcyDjgYIgc3RyaW5n?=" format
  217. #
  218. # Example:
  219. #
  220. # Encodings.b_value_decode("=?UTF-8?B?VGhpcyBpcyDjgYIgc3RyaW5n?=")
  221. # #=> 'This is あ string'
  222. def Encodings.b_value_decode(str)
  223. RubyVer.b_value_decode(str)
  224. end
  225. # Decodes a Quoted-Printable string from the "=?UTF-8?Q?This_is_=E3=81=82_string?=" format
  226. #
  227. # Example:
  228. #
  229. # Encodings.q_value_decode("=?UTF-8?Q?This_is_=E3=81=82_string?=")
  230. # #=> 'This is あ string'
  231. def Encodings.q_value_decode(str)
  232. RubyVer.q_value_decode(str)
  233. end
  234. def Encodings.split_encoding_from_string( str )
  235. match = str.match(/\=\?([^?]+)?\?[QB]\?(.+)?\?\=/mi)
  236. if match
  237. match[1]
  238. else
  239. nil
  240. end
  241. end
  242. def Encodings.find_encoding(str)
  243. RUBY_VERSION >= '1.9' ? str.encoding : $KCODE
  244. end
  245. end
  246. end