mysides/apnews.py
2024-01-13 00:50:55 +01:00

29 lines
1.0 KiB
Python

from bs4 import BeautifulSoup
import requests
def getSoup():
response = requests.get("https://apnews.com/world-news", timeout=5)
soup = BeautifulSoup(response.text, "html.parser")
return soup
def fetchAndDigest():
soup = getSoup()
news_items = soup.find_all("div", class_="PagePromo")
print("[+] Filtering out invalid articles...")
links = []
for news_item in news_items:
article_title = news_item['data-gtm-region']
# Extract the article link and title
try:
article_link = news_item.find_all("div", class_="PagePromo-media").pop().find("a").get("href")
except Exception:
try:
article_link = news_item.find_all("h3", class_="PagePromo-title").pop().find("a").get("href")
except Exception:
print("[!] Invalid article. Skipping...")
print(news_item)
continue
links.append([article_title, article_link])
print("[+] Total news: " + str(len(links)))
return links