From 53fdd7b03110910357be5700cb564f0bdd1894e5 Mon Sep 17 00:00:00 2001
From: Love Arreborn <love.arreborn@liu.se>
Date: Wed, 2 Oct 2024 10:08:08 +0200
Subject: [PATCH] pushing latest changes before temporary deploy

---
 .DS_Store                 | Bin 8196 -> 8196 bytes
 coh_metrix/connectives.py |   7 ++++++-
 pipeline.py               |   9 +--------
 scream2/scream2.py        |   8 --------
 stilett/helpers.py        |  28 ++++++++++++++--------------
 5 files changed, 21 insertions(+), 31 deletions(-)

diff --git a/.DS_Store b/.DS_Store
index 7dee366b0623845d59fa3942e69f1c878cff591d..e19996e5be4293cac296003c9cbf80077ac30693 100644
GIT binary patch
delta 54
zcmZp1XmOa}&nU7nU^hRb$Yve^PR7Y4LK&M`gfkd7&k$*1oE#yRGWm*(*2YR9=FRL9
L-&r<RvM~by(k>Al

delta 428
zcmZp1XmOa}&nUGqU^hRb)Mg$5PR4pchGd3(h75*yhFpeJh7yJ%hD?SEhIoc-hBAf{
zhD?SWh8)kF{N$vZ{3Hej1_1^JrmGAL40`{;fB^)EFd?bDxBzOx&ZL5z%;FLQgKLaT
z%q*;vbA*$HB&(|p4Gnb^OwDR_6siqP%`GNB5Ef<Zp8QQ%-nfT>fq@7s%7TmXa`N-i
z85kHCCkqK?OR_LzF(fjSF(jfopLyfvJi&5CMK*?FhGd2!hE#?`hFnx7teGGmZoVbd
z%c#%KP|Q%tkjIeEkjGHTkjqfa5YJG;kjhZOP{NSNkb<g_DSdK;Nb$xJZ^q5+65m-i
Mdkf2uX@UnM0B#>?_y7O^

diff --git a/coh_metrix/connectives.py b/coh_metrix/connectives.py
index ce1be144..1d54f039 100644
--- a/coh_metrix/connectives.py
+++ b/coh_metrix/connectives.py
@@ -10,9 +10,14 @@ def run_connectives(doc: Doc) -> dict:
     """
 
     connectives = doc._.connectives
-    print(connectives)
     n_words = doc._.scream_metrics["surface_metrics"]["n_words"]
 
+    total = 0
+    for key in connectives:
+        total = connectives[key]
+
+    connectives["CNCAll"] = total
+
     for key in connectives:
         connectives[key] = connectives[key] / n_words * 1000
 
diff --git a/pipeline.py b/pipeline.py
index 86141ce9..738a6c6e 100755
--- a/pipeline.py
+++ b/pipeline.py
@@ -136,16 +136,9 @@ def run_pipeline(args: str, test: bool) -> list:
         # ================== PARSING ==================
         start_time = time.time()
         doc, node_tree = process_file(filename)
-        # doc = process_file(filename)
         if test:
             time_checker(start_time, "spaCy", timestamps)
 
-        # pprint.pp(doc._.scream_metrics)
-        # pprint.pp(doc._.coh_metrix)
-        # pprint.pp(doc._.synonyms)
-
-        # return doc
-
         # ================== STILETT ==================
         start_time = time.time()
         simplified_text, sentence_transformations = ud_text_simplifier.simplify_text(
@@ -156,7 +149,7 @@ def run_pipeline(args: str, test: bool) -> list:
 
         # ================== SYNONYMS ==================
         # start_time = time.time()
-        # synonym_dict = synonyms.run_synonyms(processed['parsed'])
+        # synonym_dict = synonyms.run_synonyms(processed["parsed"])
         # time_checker(start_time, "Synonyms", timestamps)
 
         result.append(
diff --git a/scream2/scream2.py b/scream2/scream2.py
index da1bd3cb..5ca41cc3 100644
--- a/scream2/scream2.py
+++ b/scream2/scream2.py
@@ -272,14 +272,6 @@ def scream_metrics(doc: Doc) -> Doc:
         for category in config.CONNECTIVES.keys():
             for connective in config.CONNECTIVES[category]:
                 if connective in sent.text:
-                    print(
-                        "Found",
-                        category,
-                        "connective",
-                        connective,
-                        "in sentence",
-                        sent.text,
-                    )
                     connectives[category] += 1
 
         # unfortunate double looping, unavoidable if we need to skip punctuation
diff --git a/stilett/helpers.py b/stilett/helpers.py
index 975b3461..6cb27eb2 100644
--- a/stilett/helpers.py
+++ b/stilett/helpers.py
@@ -150,20 +150,20 @@ def contains_name_predicate(sentence: Tree) -> bool:
     return False
 
 
-def read_collocation_list():
-    """
-    read the list of collocations
-    """
-    collocation_list = []
-    with open("../synonyms_textad/resources/collocations_lmi.txt", "r") as f:
-        for row in f:
-            # splitta först på tab och sen på space
-            collocations = row.split("\t")[0].lower().split(" ")
-            collocation_list.append(collocations)
-    return collocation_list
-
-
-collocation_list = read_collocation_list()
+# def read_collocation_list():
+#     """
+#     read the list of collocations
+#     """
+#     collocation_list = []
+#     with open("../synonyms_textad/resources/collocations_lmi.txt", "r") as f:
+#         for row in f:
+#             # splitta först på tab och sen på space
+#             collocations = row.split("\t")[0].lower().split(" ")
+#             collocation_list.append(collocations)
+#     return collocation_list
+
+
+# collocation_list = read_collocation_list()
 
 
 def check_bigram(curr_lemma, next_lemma):
-- 
GitLab