diff --git a/rdparse.rb b/rdparse.rb new file mode 100644 index 0000000000000000000000000000000000000000..1ed7de705e50a0bbd0c6c82a33d23ab62cbf1426 --- /dev/null +++ b/rdparse.rb @@ -0,0 +1,231 @@ +#!/usr/bin/env ruby + +# This file is called rdparse.rb because it implements a Recursive +# Descent Parser. Read more about the theory on e.g. +# http://en.wikipedia.org/wiki/Recursive_descent_parser + +# 2010-02-11 New version of this file for the 2010 instance of TDP007 +# which handles false return values during parsing, and has an easy way +# of turning on and off debug messages. +# 2014-02-16 New version that handles { false } blocks and :empty tokens. + +require 'logger' + +class Rule + + # A rule is created through the rule method of the Parser class, like this: + # rule :term do + # match(:term, '*', :dice) {|a, _, b| a * b } + # match(:term, '/', :dice) {|a, _, b| a / b } + # match(:dice) + # end + + Match = Struct.new :pattern, :block + + def initialize(name, parser) + @logger = parser.logger + # The name of the expressions this rule matches + @name = name + # We need the parser to recursively parse sub-expressions occurring + # within the pattern of the match objects associated with this rule + @parser = parser + @matches = [] + # Left-recursive matches + @lrmatches = [] + end + + # Add a matching expression to this rule, as in this example: + # match(:term, '*', :dice) {|a, _, b| a * b } + # The arguments to 'match' describe the constituents of this expression. + def match(*pattern, &block) + match = Match.new(pattern, block) + # If the pattern is left-recursive, then add it to the left-recursive set + if pattern[0] == @name + pattern.shift + @lrmatches << match + else + @matches << match + end + end + + def parse + # Try non-left-recursive matches first, to avoid infinite recursion + match_result = try_matches(@matches) + return nil if match_result.nil? + loop do + result = try_matches(@lrmatches, match_result) + return match_result if result.nil? + match_result = result + end + end + + private + + # Try out all matching patterns of this rule + def try_matches(matches, pre_result = nil) + match_result = nil + # Begin at the current position in the input string of the parser + start = @parser.pos + matches.each do |match| + # pre_result is a previously available result from evaluating expressions + result = pre_result.nil? ? [] : [pre_result] + + # We iterate through the parts of the pattern, which may be e.g. + # [:expr,'*',:term] + match.pattern.each_with_index do |token,index| + + # If this "token" is a compound term, add the result of + # parsing it to the "result" array + if @parser.rules[token] + result << @parser.rules[token].parse + if result.last.nil? + result = nil + break + end + @logger.debug("Matched '#{@name} = #{match.pattern[index..-1].inspect}'") + else + # Otherwise, we consume the token as part of applying this rule + nt = @parser.expect(token) + if nt + result << nt + if @lrmatches.include?(match.pattern) then + pattern = [@name]+match.pattern + else + pattern = match.pattern + end + @logger.debug("Matched token '#{nt}' as part of rule '#{@name} <= #{pattern.inspect}'") + else + result = nil + break + end + end # pattern.each + end # matches.each + if result + if match.block + match_result = match.block.call(*result) + else + match_result = result[0] + end + @logger.debug("'#{@parser.string[start..@parser.pos-1]}' matched '#{@name}' and generated '#{match_result.inspect}'") unless match_result.nil? + break + else + # If this rule did not match the current token list, move + # back to the scan position of the last match + @parser.pos = start + end + end + + return match_result + end +end + +class Parser + + attr_accessor :pos + attr_reader :rules, :string, :logger + + class ParseError < RuntimeError + end + + def initialize(language_name, &block) + @logger = Logger.new(STDOUT) + @lex_tokens = [] + @rules = {} + @start = nil + @language_name = language_name + instance_eval(&block) + end + + # Tokenize the string into small pieces + def tokenize(string) + @tokens = [] + @string = string.clone + until string.empty? + # Unless any of the valid tokens of our language are the prefix of + # 'string', we fail with an exception + raise ParseError, "unable to lex '#{string}" unless @lex_tokens.any? do |tok| + match = tok.pattern.match(string) + # The regular expression of a token has matched the beginning of 'string' + if match + @logger.debug("Token #{match[0]} consumed") + # Also, evaluate this expression by using the block + # associated with the token + @tokens << tok.block.call(match.to_s) if tok.block + # consume the match and proceed with the rest of the string + string = match.post_match + true + else + # this token pattern did not match, try the next + false + end # if + end # raise + end # until + end + + def parse(string) + # First, split the string according to the "token" instructions given. + # Afterwards @tokens contains all tokens that are to be parsed. + tokenize(string) + + # These variables are used to match if the total number of tokens + # are consumed by the parser + @pos = 0 + @max_pos = 0 + @expected = [] + # Parse (and evaluate) the tokens received + result = @start.parse + # If there are unparsed extra tokens, signal error + if @pos != @tokens.size + raise ParseError, "Parse error. expected: '#{@expected.join(', ')}', found '#{@tokens[@max_pos]}'" + end + return result + end + + def next_token + @pos += 1 + return @tokens[@pos - 1] + end + + # Return the next token in the queue + def expect(tok) + return tok if tok == :empty + t = next_token + if @pos - 1 > @max_pos + @max_pos = @pos - 1 + @expected = [] + end + return t if tok === t + @expected << tok if @max_pos == @pos - 1 && !@expected.include?(tok) + return nil + end + + def to_s + "Parser for #{@language_name}" + end + + private + + LexToken = Struct.new(:pattern, :block) + + def token(pattern, &block) + @lex_tokens << LexToken.new(Regexp.new('\\A' + pattern.source), block) + end + + def start(name, &block) + rule(name, &block) + @start = @rules[name] + end + + def rule(name,&block) + @current_rule = Rule.new(name, self) + @rules[name] = @current_rule + instance_eval &block # In practise, calls match 1..N times + @current_rule = nil + end + + def match(*pattern, &block) + # Basically calls memberfunction "match(*pattern, &block) + @current_rule.send(:match, *pattern, &block) + end + +end