Add new file

f3226a16 · Daniel Berg · a7ed3d0e · f3226a16
Commit f3226a16 authored 2 years ago by Daniel Berg
--- a/rdparse.rb
+++ b/rdparse.rb
+#!/usr/bin/env ruby
+# This file is called rdparse.rb because it implements a Recursive
+# Descent Parser. Read more about the theory on e.g.
+# http://en.wikipedia.org/wiki/Recursive_descent_parser
+# 2010-02-11 New version of this file for the 2010 instance of TDP007
+#   which handles false return values during parsing, and has an easy way
+#   of turning on and off debug messages.
+# 2014-02-16 New version that handles { false } blocks and :empty tokens.
+require 'logger'
+class Rule
+  # A rule is created through the rule method of the Parser class, like this:
+  #   rule :term do
+  #     match(:term, '*', :dice) {|a, _, b| a * b }
+  #     match(:term, '/', :dice) {|a, _, b| a / b }
+  #     match(:dice)
+  #   end
+  Match = Struct.new :pattern, :block
+  def initialize(name, parser)
+    @logger = parser.logger
+    # The name of the expressions this rule matches
+    @name = name
+    # We need the parser to recursively parse sub-expressions occurring 
+    # within the pattern of the match objects associated with this rule
+    @parser = parser
+    @matches = []
+    # Left-recursive matches
+    @lrmatches = []
+  end
+  # Add a matching expression to this rule, as in this example:
+  #   match(:term, '*', :dice) {|a, _, b| a * b }
+  # The arguments to 'match' describe the constituents of this expression.
+  def match(*pattern, &block)
+    match = Match.new(pattern, block)
+    # If the pattern is left-recursive, then add it to the left-recursive set
+    if pattern[0] == @name
+      pattern.shift
+      @lrmatches << match
+    else
+      @matches << match
+    end
+  end
+  def parse
+    # Try non-left-recursive matches first, to avoid infinite recursion
+    match_result = try_matches(@matches)
+    return nil if match_result.nil?
+    loop do
+      result = try_matches(@lrmatches, match_result)
+      return match_result if result.nil?
+      match_result = result
+    end
+  end
+  private
+  # Try out all matching patterns of this rule
+  def try_matches(matches, pre_result = nil)
+    match_result = nil
+    # Begin at the current position in the input string of the parser
+    start = @parser.pos
+    matches.each do |match|
+      # pre_result is a previously available result from evaluating expressions
+      result = pre_result.nil? ? [] : [pre_result]
+      # We iterate through the parts of the pattern, which may be e.g.
+      #   [:expr,'*',:term]
+      match.pattern.each_with_index do |token,index|
+        # If this "token" is a compound term, add the result of
+        # parsing it to the "result" array
+        if @parser.rules[token]
+          result << @parser.rules[token].parse
+          if result.last.nil?
+            result = nil
+            break
+          end
+          @logger.debug("Matched '#{@name} = #{match.pattern[index..-1].inspect}'")
+        else
+          # Otherwise, we consume the token as part of applying this rule
+          nt = @parser.expect(token)
+          if nt
+            result << nt
+            if @lrmatches.include?(match.pattern) then
+              pattern = [@name]+match.pattern
+            else
+              pattern = match.pattern
+            end
+            @logger.debug("Matched token '#{nt}' as part of rule '#{@name} <= #{pattern.inspect}'")
+          else
+            result = nil
+            break
+          end
+        end # pattern.each
+      end # matches.each
+      if result
+        if match.block
+          match_result = match.block.call(*result)
+        else
+          match_result = result[0]
+        end
+        @logger.debug("'#{@parser.string[start..@parser.pos-1]}' matched '#{@name}' and generated '#{match_result.inspect}'") unless match_result.nil?
+        break
+      else
+        # If this rule did not match the current token list, move
+        # back to the scan position of the last match
+        @parser.pos = start
+      end
+    end
+    return match_result
+  end
+end
+class Parser
+  attr_accessor :pos
+  attr_reader :rules, :string, :logger
+  class ParseError < RuntimeError
+  end
+  def initialize(language_name, &block)
+    @logger = Logger.new(STDOUT)
+    @lex_tokens = []
+    @rules = {}
+    @start = nil
+    @language_name = language_name
+    instance_eval(&block)
+  end
+  # Tokenize the string into small pieces
+  def tokenize(string)
+    @tokens = []
+    @string = string.clone
+    until string.empty?
+      # Unless any of the valid tokens of our language are the prefix of
+      # 'string', we fail with an exception
+      raise ParseError, "unable to lex '#{string}" unless @lex_tokens.any? do |tok|
+        match = tok.pattern.match(string)
+        # The regular expression of a token has matched the beginning of 'string'
+        if match
+          @logger.debug("Token #{match[0]} consumed")
+          # Also, evaluate this expression by using the block
+          # associated with the token
+          @tokens << tok.block.call(match.to_s) if tok.block
+          # consume the match and proceed with the rest of the string
+          string = match.post_match
+          true
+        else
+          # this token pattern did not match, try the next
+          false
+        end # if
+      end # raise
+    end # until
+  end
+  def parse(string)
+    # First, split the string according to the "token" instructions given.
+    # Afterwards @tokens contains all tokens that are to be parsed. 
+    tokenize(string)
+    # These variables are used to match if the total number of tokens
+    # are consumed by the parser
+    @pos = 0
+    @max_pos = 0
+    @expected = []
+    # Parse (and evaluate) the tokens received
+    result = @start.parse
+    # If there are unparsed extra tokens, signal error
+    if @pos != @tokens.size
+      raise ParseError, "Parse error. expected: '#{@expected.join(', ')}', found '#{@tokens[@max_pos]}'"
+    end
+    return result
+  end
+  def next_token
+    @pos += 1
+    return @tokens[@pos - 1]
+  end
+  # Return the next token in the queue
+  def expect(tok)
+    return tok if tok == :empty
+    t = next_token
+    if @pos - 1 > @max_pos
+      @max_pos = @pos - 1
+      @expected = []
+    end
+    return t if tok === t
+    @expected << tok if @max_pos == @pos - 1 && !@expected.include?(tok)
+    return nil
+  end
+  def to_s
+    "Parser for #{@language_name}"
+  end
+  private
+  LexToken = Struct.new(:pattern, :block)
+  def token(pattern, &block)
+    @lex_tokens << LexToken.new(Regexp.new('\\A' + pattern.source), block)
+  end
+  def start(name, &block)
+    rule(name, &block)
+    @start = @rules[name]
+  end
+  def rule(name,&block)
+    @current_rule = Rule.new(name, self)
+    @rules[name] = @current_rule
+    instance_eval &block # In practise, calls match 1..N times
+    @current_rule = nil
+  end
+  def match(*pattern, &block)
+    # Basically calls memberfunction "match(*pattern, &block)
+    @current_rule.send(:match, *pattern, &block)
+  end
+end