Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
T
tdde09_project
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
HuginMunin
tdde09_project
Commits
65a87756
Commit
65a87756
authored
11 months ago
by
Shaoxuan Yin
Browse files
Options
Downloads
Patches
Plain Diff
72%
parent
50bf14de
Branches
Branches containing commit
Tags
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
Aug12.py
+69
-31
69 additions, 31 deletions
Aug12.py
with
69 additions
and
31 deletions
Aug12.py
+
69
−
31
View file @
65a87756
...
...
@@ -561,48 +561,78 @@ class FixedWindowParser(ArcStandardParser):
x
[
5
]
=
tags
[
stack
[
-
2
]]
if
len
(
stack
)
>=
2
else
PAD_IDX
return
x
# def predict(self, words, tags):
# words = [self.w2i.get(w, UNK_IDX) for w in words]
# tags = [self.t2i.get(t, UNK_IDX) for t in tags]
# initial_config = self.initial_config(len(words))
# beam = [BeamState(initial_config, 0, [])]
# final_states = []
# while beam and len(final_states) < self.beam_size:
# new_beam = []
# for state in beam:
# if self.is_final_config(state.config):
# final_states.append(state)
# continue
# features = self.featurize(words, tags, state.config)
# with torch.no_grad():
# logits = self.model(features.unsqueeze(0))
# log_probs = F.log_softmax(logits, dim=1).squeeze(0)
# for move in self.valid_moves(state.config):
# new_config = self.next_config(state.config, move)
# new_score = state.score + log_probs[move].item()
# new_actions = state.actions + [move]
# new_beam.append(BeamState(new_config, new_score, new_actions))
# # Normalize scores by sequence length
# for state in new_beam:
# state.score /= len(state.actions)
# # Keep top-k states
# beam = sorted(new_beam, key=lambda x: x.score, reverse=True)[:self.beam_size]
# if final_states:
# best_state = max(final_states, key=lambda x: x.score)
# _, _, heads = best_state.config
# return heads
# else:
# # If no final state is found, return the best partial parse
# best_state = max(beam, key=lambda x: x.score)
# _, _, heads = best_state.config
# return heads
def
predict
(
self
,
words
,
tags
):
words
=
[
self
.
w2i
.
get
(
w
,
UNK_IDX
)
for
w
in
words
]
tags
=
[
self
.
t2i
.
get
(
t
,
UNK_IDX
)
for
t
in
tags
]
initial_config
=
self
.
initial_config
(
len
(
words
))
beam
=
[
BeamState
(
initial_config
,
0
,
[])]
final_states
=
[]
beam
=
[(
0
,
initial_config
,
[])]
# (score, config, actions)
while
beam
and
len
(
final_states
)
<
self
.
beam_size
:
while
beam
:
new_beam
=
[]
for
state
in
beam
:
if
self
.
is_final_config
(
state
.
config
):
final_states
.
append
(
state
)
continue
for
score
,
config
,
actions
in
beam
:
if
self
.
is_final_config
(
config
):
return
config
[
2
]
# Return heads for the first completed parse
features
=
self
.
featurize
(
words
,
tags
,
state
.
config
)
features
=
self
.
featurize
(
words
,
tags
,
config
)
with
torch
.
no_grad
():
logits
=
self
.
model
(
features
.
unsqueeze
(
0
))
log_probs
=
F
.
log_softmax
(
logits
,
dim
=
1
).
squeeze
(
0
)
for
move
in
self
.
valid_moves
(
state
.
config
):
new_config
=
self
.
next_config
(
state
.
config
,
move
)
new_score
=
state
.
score
+
log_probs
[
move
].
item
()
new_actions
=
state
.
actions
+
[
move
]
new_beam
.
append
(
BeamState
(
new_co
nfig
,
new_
s
co
re
,
new_actions
))
for
move
in
self
.
valid_moves
(
config
):
new_config
=
self
.
next_config
(
config
,
move
)
new_score
=
score
+
log_probs
[
move
].
item
()
new_actions
=
actions
+
[
move
]
new_beam
.
append
((
new_
s
co
re
,
new_co
nfig
,
new_actions
))
# Normalize scores by sequence length
for
state
in
new_beam
:
state
.
score
/=
len
(
state
.
actions
)
# Keep top-k states
beam
=
sorted
(
new_beam
,
key
=
lambda
x
:
x
.
score
,
reverse
=
True
)[:
self
.
beam_size
]
# Sort by score and keep top-k
beam
=
sorted
(
new_beam
,
key
=
lambda
x
:
x
[
0
],
reverse
=
True
)[:
self
.
beam_size
]
if
final_states
:
best_state
=
max
(
final_states
,
key
=
lambda
x
:
x
.
score
)
_
,
_
,
heads
=
best_state
.
config
return
heads
else
:
# If no final state is found, return the best partial parse
best_state
=
max
(
beam
,
key
=
lambda
x
:
x
.
score
)
_
,
_
,
heads
=
best_state
.
config
return
heads
# If no final config is found, return the heads from the highest scoring partial parse
return
beam
[
0
][
1
][
2
]
# def predict(self, words, tags):
# words = [self.w2i.get(w, UNK_IDX) for w in words]
# tags = [self.t2i.get(t, UNK_IDX) for t in tags]
...
...
@@ -812,8 +842,8 @@ def uas(parser, gold_sentences):
# In[ ]:
PARSER
=
train_parser
(
EN_TRAIN_DATA
,
n_epochs
=
2
,
batch_size
=
64
,
lr
=
5e-4
,
beam_size
=
BEAM_SIZE
,
hidden_dim
=
300
)
print
(
'
{:.4f}
'
.
format
(
uas
(
PARSER
,
EN_DEV_DATA
)))
#
PARSER =train_parser(EN_TRAIN_DATA, n_epochs=2, batch_size=64, lr=5e-4, beam_size=BEAM_SIZE, hidden_dim=300)
#
print('{:.4f}'.format(uas(PARSER, EN_DEV_DATA)))
# The unlabelled attachment score on the development data (with gold-standard tags) should be around 70%.
...
...
@@ -851,4 +881,12 @@ def evaluate(tagger, parser, gold_sentences):
# The tagging accuracy and unlabelled attachment score on the development data should be around 88% and 65%, respectively.
# %%
\ No newline at end of file
# %%
for
beam_size
in
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
]:
parser
=
train_parser
(
EN_TRAIN_DATA
,
n_epochs
=
2
,
batch_size
=
64
,
lr
=
5e-4
,
beam_size
=
beam_size
,
hidden_dim
=
300
)
score
=
uas
(
parser
,
EN_DEV_DATA
)
print
(
f
"
Beam size
{
beam_size
}
: UAS =
{
score
:
.
4
f
}
"
)
# Train the final model with the best beam size
# PARSER = train_parser(EN_TRAIN_DATA, n_epochs=2, batch_size=64, lr=5e-4, beam_size=best_beam_size, hidden_dim=300)
# print(f"Final UAS score: {uas(PARSER, EN_DEV_DATA):.4f}")
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment