From 35969b087d05cf2a6b199a145e3a06bbaa05326b Mon Sep 17 00:00:00 2001 From: thecookingsenpai Date: Sat, 13 Jan 2024 00:50:55 +0100 Subject: [PATCH] added euobserver and remade apnews --- README.md | 9 ++++- __pycache__/apnews.cpython-311.pyc | Bin 1698 -> 2162 bytes apnews.py | 9 ++++- euobserver.py | 13 +++++++ main.py | 59 ++++++----------------------- 5 files changed, 40 insertions(+), 50 deletions(-) create mode 100644 euobserver.py diff --git a/README.md b/README.md index 6627cfb..3ad9bfc 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,12 @@ I maintain a daily updated (if I use this software) archive section. This is mad ## Disclaimer -MySides is a personal tool designed to scrape news from APNews. Please note that all material downloaded, used, and reworked by this software is the property of APNews. This tool aims to provide a quick overview of daily news. For more information, please refer to the APNews Terms of Service. +MySides is a personal tool designed to scrape news from various sources. Please note that all material downloaded, used, and reworked by this software is the property of various sources. This tool aims to provide a quick overview of daily news. For more information, please refer to the various sources Terms of Service. + +## Built-in sites + +[x] APNews (world news) +[x] EuObserver (rss feed) ## Work In Progress @@ -17,7 +22,7 @@ Stay tuned. ## TLDR -MySides scrapes the latest news from APNews and uses Perplexity AI APIs to summarize them into a concise, single-page format. +MySides scrapes the latest news from various sources and uses Perplexity AI APIs to summarize them into a concise, single-page format. ## Perplexity AI? diff --git a/__pycache__/apnews.cpython-311.pyc b/__pycache__/apnews.cpython-311.pyc index 8c37f1ae00e04d5c7a00e8b4a6f5a77c73f328a3..83c1c6ac10ed050dff09a4aa69d2025500b7c45f 100644 GIT binary patch delta 928 zcmZuwPe>GD6n}62wCm2UqnWj|Eh$E}n68J=tSGc4tw0JdAwg`;tgg)NPTzcsA}cQD zB@ja9V22LYp+6K|x^yfRx&$dm$bhE~;U$(l)v5PQYFWLR@6GSM?|c8hc^vsxGxanQ z2?0^@<0CVy0DKn04oTZ!>o|dVFu*_yayW=6uw+Stk|EE)plm3_6+;?;gt{q=66i&G z%yf}wGev8lP_+Apwte{@3IsB(t*M*D=YgZI_SxtzHqek>%1yf9IDb0dAiq>_Dq0|in>{Z zG@@Lml~@@z^h2|yl_O{0USH9BSM}aCy|>)Ap+z4=AI6_WR<({bt)rY;*Yxt$Ei!;T zZ3~jZZ`&t|Bf93k!sxz9D8T~G-Dp8|Zsr2w+;YNwkM-^w)ZI=z=x$&Qs_s{8j@@qp z9u{7I(J^4t2vty$7to!+|68@x?X_q`nt*#!ij&VPQYQx7X(<`|V3eg??}Hc2Wb?+Yv}Jj!&9Zs!$#%i^6rp9JIQNTuRArh#VdYYiiO^nk#^ggI-FYLG z9i}m`CQ??>a_-Vip^j?%?{&>~QPW;8DC0PFnzr7l-Fi>Gv{)ZXHL8mZ-TK9LWhsdy zEVZ9V#RAJwiSY7a6gVae+#q%nswdxePWdxt0dnN0?MHkLtXe2UNCN$(_uClS`Pz zCU0OCV(nLEDB+qc$Sew0XT^+NotQA#p&a&@Gk&5+9fu85uvYh%gF%U_d6gA^aj~pc(*IEL?{G diff --git a/apnews.py b/apnews.py index b79d89c..276bee9 100644 --- a/apnews.py +++ b/apnews.py @@ -1,6 +1,13 @@ +from bs4 import BeautifulSoup +import requests -def fetchAndDigest(soup): +def getSoup(): + response = requests.get("https://apnews.com/world-news", timeout=5) + soup = BeautifulSoup(response.text, "html.parser") + return soup +def fetchAndDigest(): + soup = getSoup() news_items = soup.find_all("div", class_="PagePromo") print("[+] Filtering out invalid articles...") links = [] diff --git a/euobserver.py b/euobserver.py new file mode 100644 index 0000000..1776476 --- /dev/null +++ b/euobserver.py @@ -0,0 +1,13 @@ +import feedparser + +def fetchAndDigest(): + links = [] + feed = feedparser.parse("https://xml.euobserver.com/rss.xml") + for entry in feed.entries: + + article_title = entry.title + article_link = entry.link + links.append([article_title, article_link]) + + print("[+] Total news: " + str(len(links))) + return links \ No newline at end of file diff --git a/main.py b/main.py index 9de5d15..ac4c7bb 100644 --- a/main.py +++ b/main.py @@ -1,48 +1,17 @@ import os -import requests -from bs4 import BeautifulSoup from dotenv import load_dotenv # Our modules import apnews +import euobserver import summarizer load_dotenv() # Loading environment variables -news_type = os.getenv("NEWS") pplx_api_key = os.getenv("PPLX_API_KEY") model = os.getenv("MODEL") -# Main menu -def menu(): - global news_type - available_news = os.getenv("POSSIBLE_NEWS_VALUES") - available_news = available_news.split(",") - print("[ Welcome to MySides ]") - print("[+] Available news: ") - counter = 0 - for avail in available_news: - counter += 1 - print(str(counter) + ") " + avail.strip().replace('"', "")) - - print("[+] Current news: " + news_type) - print("[+] Press enter to continue or type a number to change the news type.") - news_type_n = input().strip() - if news_type_n == "": - return - try: - news_type_n = int(news_type_n) - except Exception: - menu() - print("[!] Invalid news type.") - news_type_n -= 1 - try: - news_type = available_news[news_type_n] - except Exception: - menu() - print("[!] Invalid news type.") - # Fetch and summarize the article def transform_links(links): datas = [] @@ -70,10 +39,14 @@ def transform_links(links): return datas # Downloads the site and extracting the data using the appropriate module -def extract_data(url): - response = requests.get(url, timeout=5) - soup = BeautifulSoup(response.text, "html.parser") - links = apnews.fetchAndDigest(soup) +def extract_data(): + links = [] + + # Plug in your module here (links.extend(your_module.fetchAndDigest()) + links.extend(apnews.fetchAndDigest()) + links.extend(euobserver.fetchAndDigest()) + + print("[+] Total news: " + str(len(links))) datas = transform_links(links) return datas @@ -85,16 +58,10 @@ def handle_pagination(soup): def main(): - global news_type - url = "https://apnews.com/" + news_type all_data = [] - while url: - datas = extract_data(url) - all_data.extend(datas) - url = handle_pagination( - BeautifulSoup(requests.get(url, timeout=5).text, "html.parser") - ) + datas = extract_data() + all_data.extend(datas) # Prepare a nice CSS for the viewing page (nice and clean) css = """ @@ -116,7 +83,7 @@ def main(): """ # Create a nice HTML view of all the articles each one in its own page - html = "APNews Unbiased News" + html = "Unbiased News" html += "" html += "" for item in all_data: @@ -135,6 +102,4 @@ def main(): if __name__ == "__main__": - menu() - print("[+] News type: " + news_type) main()