text.rb 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. # coding: utf-8
  2. ##
  3. # For RDoc::Text#to_html
  4. require 'strscan'
  5. ##
  6. # For RDoc::Text#snippet
  7. begin
  8. gem 'json'
  9. rescue Gem::LoadError
  10. end
  11. require 'json'
  12. ##
  13. # Methods for manipulating comment text
  14. module RDoc::Text
  15. ##
  16. # Maps markup formats to classes that can parse them. If the format is
  17. # unknown, "rdoc" format is used.
  18. MARKUP_FORMAT = {
  19. 'rdoc' => RDoc::Markup,
  20. 'rd' => RDoc::RD,
  21. 'tomdoc' => RDoc::TomDoc,
  22. }
  23. MARKUP_FORMAT.default = RDoc::Markup
  24. ##
  25. # Maps an encoding to a Hash of characters properly transcoded for that
  26. # encoding.
  27. #
  28. # See also encode_fallback.
  29. TO_HTML_CHARACTERS = Hash.new do |h, encoding|
  30. h[encoding] = {
  31. :close_dquote => encode_fallback('”', encoding, '"'),
  32. :close_squote => encode_fallback('’', encoding, '\''),
  33. :copyright => encode_fallback('©', encoding, '(c)'),
  34. :ellipsis => encode_fallback('…', encoding, '...'),
  35. :em_dash => encode_fallback('—', encoding, '---'),
  36. :en_dash => encode_fallback('–', encoding, '--'),
  37. :open_dquote => encode_fallback('“', encoding, '"'),
  38. :open_squote => encode_fallback('‘', encoding, '\''),
  39. :trademark => encode_fallback('®', encoding, '(r)'),
  40. }
  41. end if Object.const_defined? :Encoding
  42. ##
  43. # Transcodes +character+ to +encoding+ with a +fallback+ character.
  44. def self.encode_fallback character, encoding, fallback
  45. character.encode(encoding, :fallback => { character => fallback },
  46. :undef => :replace, :replace => fallback)
  47. end
  48. ##
  49. # Expands tab characters in +text+ to eight spaces
  50. def expand_tabs text
  51. expanded = []
  52. text.each_line do |line|
  53. line.gsub!(/^(.{8}*?)([^\t\r\n]{0,7})\t/) do
  54. r = "#{$1}#{$2}#{' ' * (8 - $2.size)}"
  55. r.force_encoding text.encoding if Object.const_defined? :Encoding
  56. r
  57. end until line !~ /\t/
  58. expanded << line
  59. end
  60. expanded.join
  61. end
  62. ##
  63. # Flush +text+ left based on the shortest line
  64. def flush_left text
  65. indent = 9999
  66. text.each_line do |line|
  67. line_indent = line =~ /\S/ || 9999
  68. indent = line_indent if indent > line_indent
  69. end
  70. empty = ''
  71. empty.force_encoding text.encoding if Object.const_defined? :Encoding
  72. text.gsub(/^ {0,#{indent}}/, empty)
  73. end
  74. ##
  75. # Convert a string in markup format into HTML.
  76. #
  77. # Requires the including class to implement #formatter
  78. def markup text
  79. parse(text).accept formatter
  80. end
  81. ##
  82. # Strips hashes, expands tabs then flushes +text+ to the left
  83. def normalize_comment text
  84. return text if text.empty?
  85. text = strip_stars text
  86. text = strip_hashes text
  87. text = expand_tabs text
  88. text = flush_left text
  89. text = strip_newlines text
  90. text
  91. end
  92. ##
  93. # Normalizes +text+ then builds a RDoc::Markup::Document from it
  94. def parse text, format = 'rdoc'
  95. return text if RDoc::Markup::Document === text
  96. return text.parse if RDoc::Comment === text
  97. text = normalize_comment text # TODO remove, should not be necessary
  98. return RDoc::Markup::Document.new if text =~ /\A\n*\z/
  99. MARKUP_FORMAT[format].parse text
  100. end
  101. ##
  102. # The first +limit+ characters of +text+ as HTML
  103. def snippet text, limit = 100
  104. document = parse text
  105. RDoc::Markup::ToHtmlSnippet.new(limit).convert document
  106. end
  107. ##
  108. # Strips leading # characters from +text+
  109. def strip_hashes text
  110. return text if text =~ /^(?>\s*)[^\#]/
  111. empty = ''
  112. empty.force_encoding text.encoding if Object.const_defined? :Encoding
  113. text.gsub(/^\s*(#+)/) { $1.tr '#', ' ' }.gsub(/^\s+$/, empty)
  114. end
  115. ##
  116. # Strips leading and trailing \n characters from +text+
  117. def strip_newlines text
  118. text.gsub(/\A\n*(.*?)\n*\z/m) do $1 end # block preserves String encoding
  119. end
  120. ##
  121. # Strips /* */ style comments
  122. def strip_stars text
  123. return text unless text =~ %r%/\*.*\*/%m
  124. encoding = text.encoding if Object.const_defined? :Encoding
  125. text = text.gsub %r%Document-method:\s+[\w:.#=!?]+%, ''
  126. space = ' '
  127. space.force_encoding encoding if encoding
  128. text.sub! %r%/\*+% do space * $&.length end
  129. text.sub! %r%\*+/% do space * $&.length end
  130. text.gsub! %r%^[ \t]*\*%m do space * $&.length end
  131. empty = ''
  132. empty.force_encoding encoding if encoding
  133. text.gsub(/^\s+$/, empty)
  134. end
  135. ##
  136. # Converts ampersand, dashes, ellipsis, quotes, copyright and registered
  137. # trademark symbols in +text+ to properly encoded characters.
  138. def to_html text
  139. if Object.const_defined? :Encoding then
  140. html = ''.encode text.encoding
  141. encoded = RDoc::Text::TO_HTML_CHARACTERS[text.encoding]
  142. else
  143. html = ''
  144. encoded = {
  145. :close_dquote => '”',
  146. :close_squote => '’',
  147. :copyright => '©',
  148. :ellipsis => '…',
  149. :em_dash => '—',
  150. :en_dash => '–',
  151. :open_dquote => '“',
  152. :open_squote => '‘',
  153. :trademark => '®',
  154. }
  155. end
  156. s = StringScanner.new text
  157. insquotes = false
  158. indquotes = false
  159. after_word = nil
  160. until s.eos? do
  161. case
  162. when s.scan(/<(tt|code)>.*?<\/\1>/) then # skip contents of tt
  163. html << s.matched.gsub('\\\\', '\\')
  164. when s.scan(/<(tt|code)>.*?/) then
  165. warn "mismatched <#{s[1]}> tag" # TODO signal file/line
  166. html << s.matched
  167. when s.scan(/<[^>]+\/?s*>/) then # skip HTML tags
  168. html << s.matched
  169. when s.scan(/\\(\S)/) then # unhandled suppressed crossref
  170. html << s[1]
  171. after_word = nil
  172. when s.scan(/\.\.\.(\.?)/) then
  173. html << s[1] << encoded[:ellipsis]
  174. after_word = nil
  175. when s.scan(/\(c\)/) then
  176. html << encoded[:copyright]
  177. after_word = nil
  178. when s.scan(/\(r\)/) then
  179. html << encoded[:trademark]
  180. after_word = nil
  181. when s.scan(/---/) then
  182. html << encoded[:em_dash]
  183. after_word = nil
  184. when s.scan(/--/) then
  185. html << encoded[:en_dash]
  186. after_word = nil
  187. when s.scan(/&quot;|"/) then
  188. html << encoded[indquotes ? :close_dquote : :open_dquote]
  189. indquotes = !indquotes
  190. after_word = nil
  191. when s.scan(/``/) then # backtick double quote
  192. html << encoded[:open_dquote]
  193. after_word = nil
  194. when s.scan(/''/) then # tick double quote
  195. html << encoded[:close_dquote]
  196. after_word = nil
  197. when s.scan(/'/) then # single quote
  198. if insquotes
  199. html << encoded[:close_squote]
  200. insquotes = false
  201. elsif after_word
  202. # Mary's dog, my parents' house: do not start paired quotes
  203. html << encoded[:close_squote]
  204. else
  205. html << encoded[:open_squote]
  206. insquotes = true
  207. end
  208. after_word = nil
  209. else # advance to the next potentially significant character
  210. match = s.scan(/.+?(?=[<\\.("'`&-])/) #"
  211. if match then
  212. html << match
  213. after_word = match =~ /\w$/
  214. else
  215. html << s.rest
  216. break
  217. end
  218. end
  219. end
  220. html
  221. end
  222. ##
  223. # Wraps +txt+ to +line_len+
  224. def wrap(txt, line_len = 76)
  225. res = []
  226. sp = 0
  227. ep = txt.length
  228. while sp < ep
  229. # scan back for a space
  230. p = sp + line_len - 1
  231. if p >= ep
  232. p = ep
  233. else
  234. while p > sp and txt[p] != ?\s
  235. p -= 1
  236. end
  237. if p <= sp
  238. p = sp + line_len
  239. while p < ep and txt[p] != ?\s
  240. p += 1
  241. end
  242. end
  243. end
  244. res << txt[sp...p] << "\n"
  245. sp = p
  246. sp += 1 while sp < ep and txt[sp] == ?\s
  247. end
  248. res.join.strip
  249. end
  250. end