Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
Parser Tree Visualizer
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Daniel Berg
Parser Tree Visualizer
Commits
f3226a16
Commit
f3226a16
authored
2 years ago
by
Daniel Berg
Browse files
Options
Downloads
Patches
Plain Diff
Add new file
parent
a7ed3d0e
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
rdparse.rb
+231
-0
231 additions, 0 deletions
rdparse.rb
with
231 additions
and
0 deletions
rdparse.rb
0 → 100644
+
231
−
0
View file @
f3226a16
#!/usr/bin/env ruby
# This file is called rdparse.rb because it implements a Recursive
# Descent Parser. Read more about the theory on e.g.
# http://en.wikipedia.org/wiki/Recursive_descent_parser
# 2010-02-11 New version of this file for the 2010 instance of TDP007
# which handles false return values during parsing, and has an easy way
# of turning on and off debug messages.
# 2014-02-16 New version that handles { false } blocks and :empty tokens.
require
'logger'
class
Rule
# A rule is created through the rule method of the Parser class, like this:
# rule :term do
# match(:term, '*', :dice) {|a, _, b| a * b }
# match(:term, '/', :dice) {|a, _, b| a / b }
# match(:dice)
# end
Match
=
Struct
.
new
:pattern
,
:block
def
initialize
(
name
,
parser
)
@logger
=
parser
.
logger
# The name of the expressions this rule matches
@name
=
name
# We need the parser to recursively parse sub-expressions occurring
# within the pattern of the match objects associated with this rule
@parser
=
parser
@matches
=
[]
# Left-recursive matches
@lrmatches
=
[]
end
# Add a matching expression to this rule, as in this example:
# match(:term, '*', :dice) {|a, _, b| a * b }
# The arguments to 'match' describe the constituents of this expression.
def
match
(
*
pattern
,
&
block
)
match
=
Match
.
new
(
pattern
,
block
)
# If the pattern is left-recursive, then add it to the left-recursive set
if
pattern
[
0
]
==
@name
pattern
.
shift
@lrmatches
<<
match
else
@matches
<<
match
end
end
def
parse
# Try non-left-recursive matches first, to avoid infinite recursion
match_result
=
try_matches
(
@matches
)
return
nil
if
match_result
.
nil?
loop
do
result
=
try_matches
(
@lrmatches
,
match_result
)
return
match_result
if
result
.
nil?
match_result
=
result
end
end
private
# Try out all matching patterns of this rule
def
try_matches
(
matches
,
pre_result
=
nil
)
match_result
=
nil
# Begin at the current position in the input string of the parser
start
=
@parser
.
pos
matches
.
each
do
|
match
|
# pre_result is a previously available result from evaluating expressions
result
=
pre_result
.
nil?
?
[]
:
[
pre_result
]
# We iterate through the parts of the pattern, which may be e.g.
# [:expr,'*',:term]
match
.
pattern
.
each_with_index
do
|
token
,
index
|
# If this "token" is a compound term, add the result of
# parsing it to the "result" array
if
@parser
.
rules
[
token
]
result
<<
@parser
.
rules
[
token
].
parse
if
result
.
last
.
nil?
result
=
nil
break
end
@logger
.
debug
(
"Matched '
#{
@name
}
=
#{
match
.
pattern
[
index
..-
1
].
inspect
}
'"
)
else
# Otherwise, we consume the token as part of applying this rule
nt
=
@parser
.
expect
(
token
)
if
nt
result
<<
nt
if
@lrmatches
.
include?
(
match
.
pattern
)
then
pattern
=
[
@name
]
+
match
.
pattern
else
pattern
=
match
.
pattern
end
@logger
.
debug
(
"Matched token '
#{
nt
}
' as part of rule '
#{
@name
}
<=
#{
pattern
.
inspect
}
'"
)
else
result
=
nil
break
end
end
# pattern.each
end
# matches.each
if
result
if
match
.
block
match_result
=
match
.
block
.
call
(
*
result
)
else
match_result
=
result
[
0
]
end
@logger
.
debug
(
"'
#{
@parser
.
string
[
start
..
@parser
.
pos
-
1
]
}
' matched '
#{
@name
}
' and generated '
#{
match_result
.
inspect
}
'"
)
unless
match_result
.
nil?
break
else
# If this rule did not match the current token list, move
# back to the scan position of the last match
@parser
.
pos
=
start
end
end
return
match_result
end
end
class
Parser
attr_accessor
:pos
attr_reader
:rules
,
:string
,
:logger
class
ParseError
<
RuntimeError
end
def
initialize
(
language_name
,
&
block
)
@logger
=
Logger
.
new
(
STDOUT
)
@lex_tokens
=
[]
@rules
=
{}
@start
=
nil
@language_name
=
language_name
instance_eval
(
&
block
)
end
# Tokenize the string into small pieces
def
tokenize
(
string
)
@tokens
=
[]
@string
=
string
.
clone
until
string
.
empty?
# Unless any of the valid tokens of our language are the prefix of
# 'string', we fail with an exception
raise
ParseError
,
"unable to lex '
#{
string
}
"
unless
@lex_tokens
.
any?
do
|
tok
|
match
=
tok
.
pattern
.
match
(
string
)
# The regular expression of a token has matched the beginning of 'string'
if
match
@logger
.
debug
(
"Token
#{
match
[
0
]
}
consumed"
)
# Also, evaluate this expression by using the block
# associated with the token
@tokens
<<
tok
.
block
.
call
(
match
.
to_s
)
if
tok
.
block
# consume the match and proceed with the rest of the string
string
=
match
.
post_match
true
else
# this token pattern did not match, try the next
false
end
# if
end
# raise
end
# until
end
def
parse
(
string
)
# First, split the string according to the "token" instructions given.
# Afterwards @tokens contains all tokens that are to be parsed.
tokenize
(
string
)
# These variables are used to match if the total number of tokens
# are consumed by the parser
@pos
=
0
@max_pos
=
0
@expected
=
[]
# Parse (and evaluate) the tokens received
result
=
@start
.
parse
# If there are unparsed extra tokens, signal error
if
@pos
!=
@tokens
.
size
raise
ParseError
,
"Parse error. expected: '
#{
@expected
.
join
(
', '
)
}
', found '
#{
@tokens
[
@max_pos
]
}
'"
end
return
result
end
def
next_token
@pos
+=
1
return
@tokens
[
@pos
-
1
]
end
# Return the next token in the queue
def
expect
(
tok
)
return
tok
if
tok
==
:empty
t
=
next_token
if
@pos
-
1
>
@max_pos
@max_pos
=
@pos
-
1
@expected
=
[]
end
return
t
if
tok
===
t
@expected
<<
tok
if
@max_pos
==
@pos
-
1
&&
!
@expected
.
include?
(
tok
)
return
nil
end
def
to_s
"Parser for
#{
@language_name
}
"
end
private
LexToken
=
Struct
.
new
(
:pattern
,
:block
)
def
token
(
pattern
,
&
block
)
@lex_tokens
<<
LexToken
.
new
(
Regexp
.
new
(
'\\A'
+
pattern
.
source
),
block
)
end
def
start
(
name
,
&
block
)
rule
(
name
,
&
block
)
@start
=
@rules
[
name
]
end
def
rule
(
name
,
&
block
)
@current_rule
=
Rule
.
new
(
name
,
self
)
@rules
[
name
]
=
@current_rule
instance_eval
&
block
# In practise, calls match 1..N times
@current_rule
=
nil
end
def
match
(
*
pattern
,
&
block
)
# Basically calls memberfunction "match(*pattern, &block)
@current_rule
.
send
(
:match
,
*
pattern
,
&
block
)
end
end
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment