From 53fdd7b03110910357be5700cb564f0bdd1894e5 Mon Sep 17 00:00:00 2001 From: Love Arreborn <love.arreborn@liu.se> Date: Wed, 2 Oct 2024 10:08:08 +0200 Subject: [PATCH] pushing latest changes before temporary deploy --- .DS_Store | Bin 8196 -> 8196 bytes coh_metrix/connectives.py | 7 ++++++- pipeline.py | 9 +-------- scream2/scream2.py | 8 -------- stilett/helpers.py | 28 ++++++++++++++-------------- 5 files changed, 21 insertions(+), 31 deletions(-) diff --git a/.DS_Store b/.DS_Store index 7dee366b0623845d59fa3942e69f1c878cff591d..e19996e5be4293cac296003c9cbf80077ac30693 100644 GIT binary patch delta 54 zcmZp1XmOa}&nU7nU^hRb$Yve^PR7Y4LK&M`gfkd7&k$*1oE#yRGWm*(*2YR9=FRL9 L-&r<RvM~by(k>Al delta 428 zcmZp1XmOa}&nUGqU^hRb)Mg$5PR4pchGd3(h75*yhFpeJh7yJ%hD?SEhIoc-hBAf{ zhD?SWh8)kF{N$vZ{3Hej1_1^JrmGAL40`{;fB^)EFd?bDxBzOx&ZL5z%;FLQgKLaT z%q*;vbA*$HB&(|p4Gnb^OwDR_6siqP%`GNB5Ef<Zp8QQ%-nfT>fq@7s%7TmXa`N-i z85kHCCkqK?OR_LzF(fjSF(jfopLyfvJi&5CMK*?FhGd2!hE#?`hFnx7teGGmZoVbd z%c#%KP|Q%tkjIeEkjGHTkjqfa5YJG;kjhZOP{NSNkb<g_DSdK;Nb$xJZ^q5+65m-i Mdkf2uX@UnM0B#>?_y7O^ diff --git a/coh_metrix/connectives.py b/coh_metrix/connectives.py index ce1be144..1d54f039 100644 --- a/coh_metrix/connectives.py +++ b/coh_metrix/connectives.py @@ -10,9 +10,14 @@ def run_connectives(doc: Doc) -> dict: """ connectives = doc._.connectives - print(connectives) n_words = doc._.scream_metrics["surface_metrics"]["n_words"] + total = 0 + for key in connectives: + total = connectives[key] + + connectives["CNCAll"] = total + for key in connectives: connectives[key] = connectives[key] / n_words * 1000 diff --git a/pipeline.py b/pipeline.py index 86141ce9..738a6c6e 100755 --- a/pipeline.py +++ b/pipeline.py @@ -136,16 +136,9 @@ def run_pipeline(args: str, test: bool) -> list: # ================== PARSING ================== start_time = time.time() doc, node_tree = process_file(filename) - # doc = process_file(filename) if test: time_checker(start_time, "spaCy", timestamps) - # pprint.pp(doc._.scream_metrics) - # pprint.pp(doc._.coh_metrix) - # pprint.pp(doc._.synonyms) - - # return doc - # ================== STILETT ================== start_time = time.time() simplified_text, sentence_transformations = ud_text_simplifier.simplify_text( @@ -156,7 +149,7 @@ def run_pipeline(args: str, test: bool) -> list: # ================== SYNONYMS ================== # start_time = time.time() - # synonym_dict = synonyms.run_synonyms(processed['parsed']) + # synonym_dict = synonyms.run_synonyms(processed["parsed"]) # time_checker(start_time, "Synonyms", timestamps) result.append( diff --git a/scream2/scream2.py b/scream2/scream2.py index da1bd3cb..5ca41cc3 100644 --- a/scream2/scream2.py +++ b/scream2/scream2.py @@ -272,14 +272,6 @@ def scream_metrics(doc: Doc) -> Doc: for category in config.CONNECTIVES.keys(): for connective in config.CONNECTIVES[category]: if connective in sent.text: - print( - "Found", - category, - "connective", - connective, - "in sentence", - sent.text, - ) connectives[category] += 1 # unfortunate double looping, unavoidable if we need to skip punctuation diff --git a/stilett/helpers.py b/stilett/helpers.py index 975b3461..6cb27eb2 100644 --- a/stilett/helpers.py +++ b/stilett/helpers.py @@ -150,20 +150,20 @@ def contains_name_predicate(sentence: Tree) -> bool: return False -def read_collocation_list(): - """ - read the list of collocations - """ - collocation_list = [] - with open("../synonyms_textad/resources/collocations_lmi.txt", "r") as f: - for row in f: - # splitta först på tab och sen på space - collocations = row.split("\t")[0].lower().split(" ") - collocation_list.append(collocations) - return collocation_list - - -collocation_list = read_collocation_list() +# def read_collocation_list(): +# """ +# read the list of collocations +# """ +# collocation_list = [] +# with open("../synonyms_textad/resources/collocations_lmi.txt", "r") as f: +# for row in f: +# # splitta först på tab och sen på space +# collocations = row.split("\t")[0].lower().split(" ") +# collocation_list.append(collocations) +# return collocation_list + + +# collocation_list = read_collocation_list() def check_bigram(curr_lemma, next_lemma): -- GitLab