mirror of
https://github.com/tcsenpai/mysides.git
synced 2025-06-02 17:20:05 +00:00
29 lines
1.0 KiB
Python
29 lines
1.0 KiB
Python
from bs4 import BeautifulSoup
|
|
import requests
|
|
|
|
def getSoup():
|
|
response = requests.get("https://apnews.com/world-news", timeout=5)
|
|
soup = BeautifulSoup(response.text, "html.parser")
|
|
return soup
|
|
|
|
def fetchAndDigest():
|
|
soup = getSoup()
|
|
news_items = soup.find_all("div", class_="PagePromo")
|
|
print("[+] Filtering out invalid articles...")
|
|
links = []
|
|
for news_item in news_items:
|
|
article_title = news_item['data-gtm-region']
|
|
# Extract the article link and title
|
|
try:
|
|
article_link = news_item.find_all("div", class_="PagePromo-media").pop().find("a").get("href")
|
|
except Exception:
|
|
try:
|
|
article_link = news_item.find_all("h3", class_="PagePromo-title").pop().find("a").get("href")
|
|
except Exception:
|
|
print("[!] Invalid article. Skipping...")
|
|
print(news_item)
|
|
continue
|
|
links.append([article_title, article_link])
|
|
|
|
print("[+] Total news: " + str(len(links)))
|
|
return links |