From 4fe0d8e61d692bf6e0c0822436c46b5f9427be74 Mon Sep 17 00:00:00 2001
From: thecookingsenpai <elmastrococinero@gmail.com>
Date: Fri, 12 Jan 2024 20:54:35 +0100
Subject: [PATCH] formatted and better coded

---
 .trunk/.gitignore                 |  9 ++++
 .trunk/configs/.isort.cfg         |  2 +
 .trunk/configs/.markdownlint.yaml | 10 ++++
 .trunk/configs/.shellcheckrc      |  7 +++
 .trunk/configs/ruff.toml          |  5 ++
 .trunk/trunk.yaml                 | 31 ++++++++++++
 README.md                         |  4 +-
 install.sh                        |  3 +-
 main.py                           | 82 ++++++++++++++++++-------------
 summarizer.py                     | 35 +++++++------
 10 files changed, 136 insertions(+), 52 deletions(-)
 create mode 100644 .trunk/.gitignore
 create mode 100644 .trunk/configs/.isort.cfg
 create mode 100644 .trunk/configs/.markdownlint.yaml
 create mode 100644 .trunk/configs/.shellcheckrc
 create mode 100644 .trunk/configs/ruff.toml
 create mode 100644 .trunk/trunk.yaml

diff --git a/.trunk/.gitignore b/.trunk/.gitignore
new file mode 100644
index 0000000..15966d0
--- /dev/null
+++ b/.trunk/.gitignore
@@ -0,0 +1,9 @@
+*out
+*logs
+*actions
+*notifications
+*tools
+plugins
+user_trunk.yaml
+user.yaml
+tmp
diff --git a/.trunk/configs/.isort.cfg b/.trunk/configs/.isort.cfg
new file mode 100644
index 0000000..b9fb3f3
--- /dev/null
+++ b/.trunk/configs/.isort.cfg
@@ -0,0 +1,2 @@
+[settings]
+profile=black
diff --git a/.trunk/configs/.markdownlint.yaml b/.trunk/configs/.markdownlint.yaml
new file mode 100644
index 0000000..fb94039
--- /dev/null
+++ b/.trunk/configs/.markdownlint.yaml
@@ -0,0 +1,10 @@
+# Autoformatter friendly markdownlint config (all formatting rules disabled)
+default: true
+blank_lines: false
+bullet: false
+html: false
+indentation: false
+line_length: false
+spaces: false
+url: false
+whitespace: false
diff --git a/.trunk/configs/.shellcheckrc b/.trunk/configs/.shellcheckrc
new file mode 100644
index 0000000..8c7b1ad
--- /dev/null
+++ b/.trunk/configs/.shellcheckrc
@@ -0,0 +1,7 @@
+enable=all
+source-path=SCRIPTDIR
+disable=SC2154
+
+# If you're having issues with shellcheck following source, disable the errors via:
+# disable=SC1090
+# disable=SC1091
diff --git a/.trunk/configs/ruff.toml b/.trunk/configs/ruff.toml
new file mode 100644
index 0000000..f5a235c
--- /dev/null
+++ b/.trunk/configs/ruff.toml
@@ -0,0 +1,5 @@
+# Generic, formatter-friendly config.
+select = ["B", "D3", "E", "F"]
+
+# Never enforce `E501` (line length violations). This should be handled by formatters.
+ignore = ["E501"]
diff --git a/.trunk/trunk.yaml b/.trunk/trunk.yaml
new file mode 100644
index 0000000..5f2415c
--- /dev/null
+++ b/.trunk/trunk.yaml
@@ -0,0 +1,31 @@
+# This file controls the behavior of Trunk: https://docs.trunk.io/cli
+# To learn more about the format of this file, see https://docs.trunk.io/reference/trunk-yaml
+version: 0.1
+cli:
+  version: 1.19.0
+# Trunk provides extensibility via plugins. (https://docs.trunk.io/plugins)
+plugins:
+  sources:
+    - id: trunk
+      ref: v1.4.2
+      uri: https://github.com/trunk-io/plugins
+# Many linters and tools depend on runtimes - configure them here. (https://docs.trunk.io/runtimes)
+runtimes:
+  enabled:
+    - go@1.21.0
+    - node@18.12.1
+    - python@3.10.8
+# This is the section where you manage your linters. (https://docs.trunk.io/check/configuration)
+lint:
+  enabled:
+    - bandit@1.7.6
+    - black@23.12.1
+    - git-diff-check
+    - isort@5.13.2
+    - markdownlint@0.38.0
+    - osv-scanner@1.5.0
+    - prettier@3.1.1
+    - ruff@0.1.12
+    - shellcheck@0.9.0
+    - shfmt@3.6.0
+    - trufflehog@3.63.8
diff --git a/README.md b/README.md
index cd57fce..d007dd2 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@ Your trustworthy unbiased news scraper.
 
 ## Disclaimer
 
-This tool is made for personal use, and should be used carefully. Being a scraper for (AllSides)[https://allsides.com], all the material downloaded, used and reworked by this software is property of AllSides.
+This tool is made for personal use, and should be used carefully. Being a scraper for [AllSides](https://allsides.com), all the material downloaded, used and reworked by this software is property of AllSides.
 
 This tool is intended to be used to quickly grasp an overview of the daily news.
 Please check AllSides ToS for more information.
@@ -25,4 +25,4 @@ Personally, I find their API pricing way better than OpenAI ones. If you are a p
 
 ## Run
 
-    python main.py
\ No newline at end of file
+    python main.py
diff --git a/install.sh b/install.sh
index e180c9f..bfa0473 100644
--- a/install.sh
+++ b/install.sh
@@ -1,8 +1,9 @@
 #!/bin/bash
+
 pip install -r requirements.txt
 mkdir news
 cp .env.example .env
 echo "You should now open your .env file and insert your Perplexity API Key."
 echo "You can get one at: https://www.perplexity.ai/settings/api"
 echo "Then, launch main.py and wait for it to finish."
-echo "allsides.html contains an overview of all the news."
\ No newline at end of file
+echo "allsides.html contains an overview of all the news."
diff --git a/main.py b/main.py
index 3afb53d..fe8268d 100644
--- a/main.py
+++ b/main.py
@@ -1,11 +1,13 @@
-import requests
-from bs4 import BeautifulSoup
 import os
 
+import requests
+from bs4 import BeautifulSoup
+
+
 def extract_data(url):
-    response = requests.get(url)
-    soup = BeautifulSoup(response.text, 'html.parser')
-    news_items = soup.find_all('div', class_='news-item')
+    response = requests.get(url, timeout=5)
+    soup = BeautifulSoup(response.text, "html.parser")
+    news_items = soup.find_all("div", class_="news-item")
     datas = []
     tot_articles = len(news_items)
     print("[+] Total news: " + str(tot_articles))
@@ -13,53 +15,62 @@ def extract_data(url):
     counter = 0
     for news_item in news_items:
         # Extract the article link and title
-        article_link = news_item.find_all('a')[0].get('href')
-        if not "allsides.com" in article_link:
+        article_link = news_item.find_all("a")[0].get("href")
+        if "allsides.com" not in article_link:
             tot_articles -= 1
             continue
-        counter += 1        
-        print("[+] Processing news: " + str(counter) + "/" + str(tot_articles) )
-        article_title = news_item.find('div', class_="news-title").text.strip()
+        counter += 1
+        print("[+] Processing news: " + str(counter) + "/" + str(tot_articles))
+        article_title = news_item.find("div", class_="news-title").text.strip()
         print("[*] Summarizing: " + article_link)
         # Summarize the article
         with open("link", "w+") as f:
             f.write(article_link)
+        # trunk-ignore(bandit/B605)
+        # trunk-ignore(bandit/B607)
         os.system("python summarizer.py")
         print("[OK] Done. Proceeding...")
         with open("response", "r") as f:
             article_summary = f.read().strip()
-        #with open(article_title, "w+") as f:
-             #f.write(article_summary)
+        # with open(article_title, "w+") as f:
+        # f.write(article_summary)
         # Extract the source and media bias rating
         try:
-            source_name = news_item.find('span').text
-        except:
+            source_name = news_item.find("span").text
+        except Exception:
             source_name = "Unknown"
 
-        try:           
-            media_bias_rating = news_item.find('img').get('alt').replace("AllSides Media Bias Rating: ", "").lower()
-        except:
+        try:
+            media_bias_rating = (
+                news_item.find("img")
+                .get("alt")
+                .replace("AllSides Media Bias Rating: ", "")
+                .lower()
+            )
+        except Exception:
             media_bias_rating = "Unknown"
 
         # Build the JSON
         data = {
-            'article_link': article_link,
-            'article_title': article_title,
-            'article_summary': article_summary,
-            'source_name': source_name,
-            'media_bias_rating': media_bias_rating
+            "article_link": article_link,
+            "article_title": article_title,
+            "article_summary": article_summary,
+            "source_name": source_name,
+            "media_bias_rating": media_bias_rating,
         }
 
         datas.append(data)
 
     return datas
 
+
 def handle_pagination(soup):
-    next_page = soup.find('a', {'rel': 'next'})
+    next_page = soup.find("a", {"rel": "next"})
     if next_page:
-        return next_page['href']
+        return next_page["href"]
     return None
 
+
 def main():
     url = "https://www.allsides.com/unbiased-balanced-news"
     all_data = []
@@ -67,7 +78,9 @@ def main():
     while url:
         data = extract_data(url)
         all_data.extend(data)
-        url = handle_pagination(BeautifulSoup(requests.get(url).text, 'html.parser'))
+        url = handle_pagination(
+            BeautifulSoup(requests.get(url, timeout=5).text, "html.parser")
+        )
 
     # Prepare a nice CSS for the viewing page (nice and clean)
     css = """
@@ -93,11 +106,11 @@ def main():
     html += "<style>" + css + "</style>"
     html += "</head><body>"
     for item in all_data:
-        html += "<h1>" + item['article_title'] + "</h1>"
-        html += "<h2>" + item['source_name'] + "</h2>"
-        html += "<h3>" + item['media_bias_rating'] + "</h3>"
-        html += "<p>" + item['article_summary'] + "</p>"
-        html += "<a href='" + item['article_link'] + "'>Read the full article</a>"
+        html += "<h1>" + item["article_title"] + "</h1>"
+        html += "<h2>" + item["source_name"] + "</h2>"
+        html += "<h3>" + item["media_bias_rating"] + "</h3>"
+        html += "<p>" + item["article_summary"] + "</p>"
+        html += "<a href='" + item["article_link"] + "'>Read the full article</a>"
         html += "<hr>"
     html += "</body></html>"
     with open("allsides.html", "w+") as f:
@@ -107,15 +120,16 @@ def main():
     # Do some math to find the number of articles per bias rating
     bias_ratings = {}
     for item in all_data:
-        if item['media_bias_rating'] in bias_ratings:
-            bias_ratings[item['media_bias_rating']] += 1
+        if item["media_bias_rating"] in bias_ratings:
+            bias_ratings[item["media_bias_rating"]] += 1
         else:
-            bias_ratings[item['media_bias_rating']] = 1
+            bias_ratings[item["media_bias_rating"]] = 1
     # Assign percentages
     for key in bias_ratings:
         bias_ratings[key] = round(bias_ratings[key] / len(all_data) * 100, 2)
-        
+
     print(bias_ratings)
 
+
 if __name__ == "__main__":
     main()
diff --git a/summarizer.py b/summarizer.py
index d6c32f0..1b3c549 100644
--- a/summarizer.py
+++ b/summarizer.py
@@ -1,6 +1,7 @@
+import os
+
 import requests
 from dotenv import load_dotenv
-import os
 
 load_dotenv()
 
@@ -10,32 +11,36 @@ model = os.getenv("MODEL")
 with open("link", "r") as f:
     article_link = f.read().strip()
 
-
 headers = {
-    'accept': 'application/json',
-    'authorization': 'Bearer ' + pplx_api_key,
-    'content-type': 'application/json',
+    "accept": "application/json",
+    "authorization": "Bearer " + pplx_api_key,
+    "content-type": "application/json",
 }
 
 json_data = {
-    'model': model,
-    'messages': [
+    "model": model,
+    "messages": [
         {
-            'role': 'system',
-            'content': 'Be precise, concise and clear',
+            "role": "system",
+            "content": "Be precise, concise and clear",
         },
         {
-            'role': 'user',
-            'content': 'Search and summarize: ' + article_link,
+            "role": "user",
+            "content": "Search and summarize: " + article_link,
         },
     ],
 }
 
-response = requests.post('https://api.perplexity.ai/chat/completions', headers=headers, json=json_data)
+response = requests.post(
+    "https://api.perplexity.ai/chat/completions",
+    headers=headers,
+    json=json_data,
+    timeout=5,
+)
 
 response = response.json()
-#print(response)
+# print(response)
 
-#print(response["choices"][0]["message"]["content"])
+# print(response["choices"][0]["message"]["content"])
 with open("response", "w+") as response_file:
-    response_file.write(response["choices"][0]["message"]["content"])
\ No newline at end of file
+    response_file.write(response["choices"][0]["message"]["content"])