parser.rb 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. ##
  2. # A parser is simple a class that subclasses RDoc::Parser and implements #scan
  3. # to fill in an RDoc::TopLevel with parsed data.
  4. #
  5. # The initialize method takes an RDoc::TopLevel to fill with parsed content,
  6. # the name of the file to be parsed, the content of the file, an RDoc::Options
  7. # object and an RDoc::Stats object to inform the user of parsed items. The
  8. # scan method is then called to parse the file and must return the
  9. # RDoc::TopLevel object. By calling super these items will be set for you.
  10. #
  11. # In order to be used by RDoc the parser needs to register the file extensions
  12. # it can parse. Use ::parse_files_matching to register extensions.
  13. #
  14. # require 'rdoc'
  15. #
  16. # class RDoc::Parser::Xyz < RDoc::Parser
  17. # parse_files_matching /\.xyz$/
  18. #
  19. # def initialize top_level, file_name, content, options, stats
  20. # super
  21. #
  22. # # extra initialization if needed
  23. # end
  24. #
  25. # def scan
  26. # # parse file and fill in @top_level
  27. # end
  28. # end
  29. class RDoc::Parser
  30. @parsers = []
  31. class << self
  32. ##
  33. # An Array of arrays that maps file extension (or name) regular
  34. # expressions to parser classes that will parse matching filenames.
  35. #
  36. # Use parse_files_matching to register a parser's file extensions.
  37. attr_reader :parsers
  38. end
  39. ##
  40. # Alias an extension to another extension. After this call, files ending
  41. # "new_ext" will be parsed using the same parser as "old_ext"
  42. def self.alias_extension(old_ext, new_ext)
  43. old_ext = old_ext.sub(/^\.(.*)/, '\1')
  44. new_ext = new_ext.sub(/^\.(.*)/, '\1')
  45. parser = can_parse "xxx.#{old_ext}"
  46. return false unless parser
  47. RDoc::Parser.parsers.unshift [/\.#{new_ext}$/, parser]
  48. true
  49. end
  50. ##
  51. # Determines if the file is a "binary" file which basically means it has
  52. # content that an RDoc parser shouldn't try to consume.
  53. def self.binary?(file)
  54. return false if file =~ /\.(rdoc|txt)$/
  55. s = File.read(file, 1024) or return false
  56. have_encoding = s.respond_to? :encoding
  57. if have_encoding then
  58. return false if s.encoding != Encoding::ASCII_8BIT and s.valid_encoding?
  59. end
  60. return true if s[0, 2] == Marshal.dump('')[0, 2] or s.index("\x00")
  61. if have_encoding then
  62. s.force_encoding Encoding.default_external
  63. not s.valid_encoding?
  64. else
  65. if 0.respond_to? :fdiv then
  66. s.count("\x00-\x7F", "^ -~\t\r\n").fdiv(s.size) > 0.3
  67. else # HACK 1.8.6
  68. (s.count("\x00-\x7F", "^ -~\t\r\n").to_f / s.size) > 0.3
  69. end
  70. end
  71. end
  72. ##
  73. # Processes common directives for CodeObjects for the C and Ruby parsers.
  74. #
  75. # Applies +directive+'s +value+ to +code_object+, if appropriate
  76. def self.process_directive code_object, directive, value
  77. warn "RDoc::Parser::process_directive is deprecated and wil be removed in RDoc 4. Use RDoc::Markup::PreProcess#handle_directive instead" if $-w
  78. case directive
  79. when 'nodoc' then
  80. code_object.document_self = nil # notify nodoc
  81. code_object.document_children = value.downcase != 'all'
  82. when 'doc' then
  83. code_object.document_self = true
  84. code_object.force_documentation = true
  85. when 'yield', 'yields' then
  86. # remove parameter &block
  87. code_object.params.sub!(/,?\s*&\w+/, '') if code_object.params
  88. code_object.block_params = value
  89. when 'arg', 'args' then
  90. code_object.params = value
  91. end
  92. end
  93. ##
  94. # Checks if +file+ is a zip file in disguise. Signatures from
  95. # http://www.garykessler.net/library/file_sigs.html
  96. def self.zip? file
  97. zip_signature = File.read file, 4
  98. zip_signature == "PK\x03\x04" or
  99. zip_signature == "PK\x05\x06" or
  100. zip_signature == "PK\x07\x08"
  101. end
  102. ##
  103. # Return a parser that can handle a particular extension
  104. def self.can_parse(file_name)
  105. parser = RDoc::Parser.parsers.find { |regexp,| regexp =~ file_name }.last
  106. # HACK Selenium hides a jar file using a .txt extension
  107. return if parser == RDoc::Parser::Simple and zip? file_name
  108. # The default parser must not parse binary files
  109. ext_name = File.extname file_name
  110. return parser if ext_name.empty?
  111. return if parser == RDoc::Parser::Simple and ext_name !~ /txt|rdoc/
  112. parser
  113. end
  114. ##
  115. # Finds and instantiates the correct parser for the given +file_name+ and
  116. # +content+.
  117. def self.for top_level, file_name, content, options, stats
  118. return if binary? file_name
  119. parser = use_markup content
  120. unless parser then
  121. # If no extension, look for shebang
  122. if file_name !~ /\.\w+$/ && content =~ %r{\A#!(.+)} then
  123. shebang = $1
  124. case shebang
  125. when %r{env\s+ruby}, %r{/ruby}
  126. file_name = "dummy.rb"
  127. end
  128. end
  129. parser = can_parse file_name
  130. end
  131. return unless parser
  132. parser.new top_level, file_name, content, options, stats
  133. end
  134. ##
  135. # Record which file types this parser can understand.
  136. #
  137. # It is ok to call this multiple times.
  138. def self.parse_files_matching(regexp)
  139. RDoc::Parser.parsers.unshift [regexp, self]
  140. end
  141. ##
  142. # If there is a <tt>markup: parser_name</tt> comment at the front of the
  143. # file, use it to determine the parser. For example:
  144. #
  145. # # markup: rdoc
  146. # # Class comment can go here
  147. #
  148. # class C
  149. # end
  150. #
  151. # The comment should appear as the first line of the +content+.
  152. #
  153. # If the content contains a shebang or editor modeline the comment may
  154. # appear on the second or third line.
  155. #
  156. # Any comment style may be used to hide the markup comment.
  157. def self.use_markup content
  158. markup = content.lines.first(3).grep(/markup:\s+(\w+)/) { $1 }.first
  159. return unless markup
  160. return RDoc::Parser::Ruby if markup == 'tomdoc'
  161. markup = Regexp.escape markup
  162. RDoc::Parser.parsers.find do |_, parser|
  163. /^#{markup}$/i =~ parser.name.sub(/.*:/, '')
  164. end.last
  165. end
  166. ##
  167. # Creates a new Parser storing +top_level+, +file_name+, +content+,
  168. # +options+ and +stats+ in instance variables. In +@preprocess+ an
  169. # RDoc::Markup::PreProcess object is created which allows processing of
  170. # directives.
  171. def initialize top_level, file_name, content, options, stats
  172. @top_level = top_level
  173. @top_level.parser = self.class
  174. @file_name = file_name
  175. @content = content
  176. @options = options
  177. @stats = stats
  178. @preprocess = RDoc::Markup::PreProcess.new @file_name, @options.rdoc_include
  179. @preprocess.options = @options
  180. end
  181. autoload :RubyTools, 'rdoc/parser/ruby_tools'
  182. autoload :Text, 'rdoc/parser/text'
  183. end
  184. # simple must come first in order to show up last in the parsers list
  185. require 'rdoc/parser/simple'
  186. require 'rdoc/parser/rd'
  187. require 'rdoc/parser/ruby'
  188. require 'rdoc/parser/c'