added euobserver and remade apnews

2025-07-29 05:51:54 +00:00 · 2024-01-13 00:50:55 +01:00 · 2024-01-13 00:50:55 +01:00 · 35969b087d
commit 35969b087d
parent c683a0ac6e
5 changed files with 40 additions and 50 deletions
--- a/README.md
+++ b/README.md
@ -8,7 +8,12 @@ I maintain a daily updated (if I use this software) archive section. This is mad

 ## Disclaimer

-MySides is a personal tool designed to scrape news from APNews. Please note that all material downloaded, used, and reworked by this software is the property of APNews. This tool aims to provide a quick overview of daily news. For more information, please refer to the APNews Terms of Service.
+MySides is a personal tool designed to scrape news from various sources. Please note that all material downloaded, used, and reworked by this software is the property of various sources. This tool aims to provide a quick overview of daily news. For more information, please refer to the various sources Terms of Service.
+
+## Built-in sites
+
+[x] APNews (world news)
+[x] EuObserver (rss feed)

 ## Work In Progress

@ -17,7 +22,7 @@ Stay tuned.

 ## TLDR

-MySides scrapes the latest news from APNews and uses Perplexity AI APIs to summarize them into a concise, single-page format.
+MySides scrapes the latest news from various sources and uses Perplexity AI APIs to summarize them into a concise, single-page format.

 ## Perplexity AI?

--- a/pycache/apnews.cpython-311.pyc
+++ b/pycache/apnews.cpython-311.pyc
--- a/apnews.py
+++ b/apnews.py
@ -1,6 +1,13 @@
+from bs4 import BeautifulSoup
+import requests

-def fetchAndDigest(soup):
+def getSoup():
+    response = requests.get("https://apnews.com/world-news", timeout=5)
+    soup = BeautifulSoup(response.text, "html.parser")
+    return soup

+def fetchAndDigest():
+    soup = getSoup()
    news_items = soup.find_all("div", class_="PagePromo")
    print("[+] Filtering out invalid articles...")
    links = []
--- a/euobserver.py
+++ b/euobserver.py
@ -0,0 +1,13 @@
+import feedparser
+
+def fetchAndDigest():
+    links = []
+    feed = feedparser.parse("https://xml.euobserver.com/rss.xml")
+    for entry in feed.entries:
+
+        article_title = entry.title
+        article_link = entry.link
+        links.append([article_title, article_link])
+
+    print("[+] Total news: " + str(len(links)))
+    return links
--- a/main.py
+++ b/main.py
@ -1,48 +1,17 @@
 import os
-import requests
-from bs4 import BeautifulSoup
 from dotenv import load_dotenv

 # Our modules
 import apnews
+import euobserver
 import summarizer

 load_dotenv()

 # Loading environment variables
-news_type = os.getenv("NEWS")
 pplx_api_key = os.getenv("PPLX_API_KEY")
 model = os.getenv("MODEL")

-# Main menu
-def menu():
-    global news_type
-    available_news = os.getenv("POSSIBLE_NEWS_VALUES")
-    available_news = available_news.split(",")
-    print("[ Welcome to MySides ]")
-    print("[+] Available news: ")
-    counter = 0
-    for avail in available_news:
-        counter += 1
-        print(str(counter) + ") " + avail.strip().replace('"', ""))
-
-    print("[+] Current news: " + news_type)
-    print("[+] Press enter to continue or type a number to change the news type.")
-    news_type_n = input().strip()
-    if news_type_n == "":
-        return
-    try:
-        news_type_n = int(news_type_n)
-    except Exception:
-        menu()
-        print("[!] Invalid news type.")
-    news_type_n -= 1
-    try:
-        news_type = available_news[news_type_n]
-    except Exception:
-        menu()
-        print("[!] Invalid news type.")
-
 # Fetch and summarize the article
 def transform_links(links):
    datas = []
@ -70,10 +39,14 @@ def transform_links(links):
    return datas

 # Downloads the site and extracting the data using the appropriate module
-def extract_data(url):
-    response = requests.get(url, timeout=5)
-    soup = BeautifulSoup(response.text, "html.parser")
-    links = apnews.fetchAndDigest(soup)
+def extract_data():
+    links = []
+
+    # Plug in your module here (links.extend(your_module.fetchAndDigest())
+    links.extend(apnews.fetchAndDigest())
+    links.extend(euobserver.fetchAndDigest())
+
+    print("[+] Total news: " + str(len(links)))
    datas = transform_links(links)
    return datas

@ -85,16 +58,10 @@ def handle_pagination(soup):


 def main():
-    global news_type
-    url = "https://apnews.com/" + news_type
    all_data = []

-    while url:
-        datas = extract_data(url)
-        all_data.extend(datas)
-        url = handle_pagination(
-            BeautifulSoup(requests.get(url, timeout=5).text, "html.parser")
-        )
+    datas = extract_data()
+    all_data.extend(datas)

    # Prepare a nice CSS for the viewing page (nice and clean)
    css = """
@ -116,7 +83,7 @@ def main():
    """

    # Create a nice HTML view of all the articles each one in its own page
-    html = "<html><head><title>APNews Unbiased News</title>"
+    html = "<html><head><title>Unbiased News</title>"
    html += "<style>" + css + "</style>"
    html += "</head><body>"
    for item in all_data:
@ -135,6 +102,4 @@ def main():


 if __name__ == "__main__":
-    menu()
-    print("[+] News type: " + news_type)
    main()