Skip to content
Snippets Groups Projects
Commit 53fdd7b0 authored by Love Arreborn's avatar Love Arreborn
Browse files

pushing latest changes before temporary deploy

parent 9677e955
No related branches found
No related tags found
No related merge requests found
Pipeline #141380 skipped
No preview for this file type
...@@ -10,9 +10,14 @@ def run_connectives(doc: Doc) -> dict: ...@@ -10,9 +10,14 @@ def run_connectives(doc: Doc) -> dict:
""" """
connectives = doc._.connectives connectives = doc._.connectives
print(connectives)
n_words = doc._.scream_metrics["surface_metrics"]["n_words"] n_words = doc._.scream_metrics["surface_metrics"]["n_words"]
total = 0
for key in connectives:
total = connectives[key]
connectives["CNCAll"] = total
for key in connectives: for key in connectives:
connectives[key] = connectives[key] / n_words * 1000 connectives[key] = connectives[key] / n_words * 1000
......
...@@ -136,16 +136,9 @@ def run_pipeline(args: str, test: bool) -> list: ...@@ -136,16 +136,9 @@ def run_pipeline(args: str, test: bool) -> list:
# ================== PARSING ================== # ================== PARSING ==================
start_time = time.time() start_time = time.time()
doc, node_tree = process_file(filename) doc, node_tree = process_file(filename)
# doc = process_file(filename)
if test: if test:
time_checker(start_time, "spaCy", timestamps) time_checker(start_time, "spaCy", timestamps)
# pprint.pp(doc._.scream_metrics)
# pprint.pp(doc._.coh_metrix)
# pprint.pp(doc._.synonyms)
# return doc
# ================== STILETT ================== # ================== STILETT ==================
start_time = time.time() start_time = time.time()
simplified_text, sentence_transformations = ud_text_simplifier.simplify_text( simplified_text, sentence_transformations = ud_text_simplifier.simplify_text(
...@@ -156,7 +149,7 @@ def run_pipeline(args: str, test: bool) -> list: ...@@ -156,7 +149,7 @@ def run_pipeline(args: str, test: bool) -> list:
# ================== SYNONYMS ================== # ================== SYNONYMS ==================
# start_time = time.time() # start_time = time.time()
# synonym_dict = synonyms.run_synonyms(processed['parsed']) # synonym_dict = synonyms.run_synonyms(processed["parsed"])
# time_checker(start_time, "Synonyms", timestamps) # time_checker(start_time, "Synonyms", timestamps)
result.append( result.append(
......
...@@ -272,14 +272,6 @@ def scream_metrics(doc: Doc) -> Doc: ...@@ -272,14 +272,6 @@ def scream_metrics(doc: Doc) -> Doc:
for category in config.CONNECTIVES.keys(): for category in config.CONNECTIVES.keys():
for connective in config.CONNECTIVES[category]: for connective in config.CONNECTIVES[category]:
if connective in sent.text: if connective in sent.text:
print(
"Found",
category,
"connective",
connective,
"in sentence",
sent.text,
)
connectives[category] += 1 connectives[category] += 1
# unfortunate double looping, unavoidable if we need to skip punctuation # unfortunate double looping, unavoidable if we need to skip punctuation
......
...@@ -150,20 +150,20 @@ def contains_name_predicate(sentence: Tree) -> bool: ...@@ -150,20 +150,20 @@ def contains_name_predicate(sentence: Tree) -> bool:
return False return False
def read_collocation_list(): # def read_collocation_list():
""" # """
read the list of collocations # read the list of collocations
""" # """
collocation_list = [] # collocation_list = []
with open("../synonyms_textad/resources/collocations_lmi.txt", "r") as f: # with open("../synonyms_textad/resources/collocations_lmi.txt", "r") as f:
for row in f: # for row in f:
# splitta först på tab och sen på space # # splitta först på tab och sen på space
collocations = row.split("\t")[0].lower().split(" ") # collocations = row.split("\t")[0].lower().split(" ")
collocation_list.append(collocations) # collocation_list.append(collocations)
return collocation_list # return collocation_list
collocation_list = read_collocation_list() # collocation_list = read_collocation_list()
def check_bigram(curr_lemma, next_lemma): def check_bigram(curr_lemma, next_lemma):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment