diff --git a/.DS_Store b/.DS_Store index 7dee366b0623845d59fa3942e69f1c878cff591d..e19996e5be4293cac296003c9cbf80077ac30693 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/coh_metrix/connectives.py b/coh_metrix/connectives.py index ce1be1442e6d644a6d16cd9b6f242dd434568d42..1d54f039d2c43f01712ad4c5fba0f36d95564e36 100644 --- a/coh_metrix/connectives.py +++ b/coh_metrix/connectives.py @@ -10,9 +10,14 @@ def run_connectives(doc: Doc) -> dict: """ connectives = doc._.connectives - print(connectives) n_words = doc._.scream_metrics["surface_metrics"]["n_words"] + total = 0 + for key in connectives: + total = connectives[key] + + connectives["CNCAll"] = total + for key in connectives: connectives[key] = connectives[key] / n_words * 1000 diff --git a/pipeline.py b/pipeline.py index 86141ce99d64ce511679b314e3cde94019fe2723..738a6c6e70ff42b29a703ed2782399d0668fd88b 100755 --- a/pipeline.py +++ b/pipeline.py @@ -136,16 +136,9 @@ def run_pipeline(args: str, test: bool) -> list: # ================== PARSING ================== start_time = time.time() doc, node_tree = process_file(filename) - # doc = process_file(filename) if test: time_checker(start_time, "spaCy", timestamps) - # pprint.pp(doc._.scream_metrics) - # pprint.pp(doc._.coh_metrix) - # pprint.pp(doc._.synonyms) - - # return doc - # ================== STILETT ================== start_time = time.time() simplified_text, sentence_transformations = ud_text_simplifier.simplify_text( @@ -156,7 +149,7 @@ def run_pipeline(args: str, test: bool) -> list: # ================== SYNONYMS ================== # start_time = time.time() - # synonym_dict = synonyms.run_synonyms(processed['parsed']) + # synonym_dict = synonyms.run_synonyms(processed["parsed"]) # time_checker(start_time, "Synonyms", timestamps) result.append( diff --git a/scream2/scream2.py b/scream2/scream2.py index da1bd3cb4e78d61965cbcb96d99d6ac2984c13e9..5ca41cc33eba47dfa0e0fa9ed660a246665a83f5 100644 --- a/scream2/scream2.py +++ b/scream2/scream2.py @@ -272,14 +272,6 @@ def scream_metrics(doc: Doc) -> Doc: for category in config.CONNECTIVES.keys(): for connective in config.CONNECTIVES[category]: if connective in sent.text: - print( - "Found", - category, - "connective", - connective, - "in sentence", - sent.text, - ) connectives[category] += 1 # unfortunate double looping, unavoidable if we need to skip punctuation diff --git a/stilett/helpers.py b/stilett/helpers.py index 975b34610d121ed13d639a92e018602969f8ea97..6cb27eb238782bb20fa3a4a162fcea28f6bd7ee9 100644 --- a/stilett/helpers.py +++ b/stilett/helpers.py @@ -150,20 +150,20 @@ def contains_name_predicate(sentence: Tree) -> bool: return False -def read_collocation_list(): - """ - read the list of collocations - """ - collocation_list = [] - with open("../synonyms_textad/resources/collocations_lmi.txt", "r") as f: - for row in f: - # splitta först på tab och sen på space - collocations = row.split("\t")[0].lower().split(" ") - collocation_list.append(collocations) - return collocation_list - - -collocation_list = read_collocation_list() +# def read_collocation_list(): +# """ +# read the list of collocations +# """ +# collocation_list = [] +# with open("../synonyms_textad/resources/collocations_lmi.txt", "r") as f: +# for row in f: +# # splitta först på tab och sen på space +# collocations = row.split("\t")[0].lower().split(" ") +# collocation_list.append(collocations) +# return collocation_list + + +# collocation_list = read_collocation_list() def check_bigram(curr_lemma, next_lemma):