mysides/apnews.py

from bs4 import BeautifulSoup
import requests

def getSoup():
    response = requests.get("https://apnews.com/world-news", timeout=5)
    soup = BeautifulSoup(response.text, "html.parser")
    return soup

def fetchAndDigest():
    soup = getSoup()
    news_items = soup.find_all("div", class_="PagePromo")
    print("[+] Filtering out invalid articles...")
    links = []
    for news_item in news_items:
        article_title = news_item['data-gtm-region']
        # Extract the article link and title
        try:
            article_link = news_item.find_all("div", class_="PagePromo-media").pop().find("a").get("href")
        except Exception:
            try:
                article_link = news_item.find_all("h3", class_="PagePromo-title").pop().find("a").get("href")
            except Exception:
                print("[!] Invalid article. Skipping...")
                print(news_item)
            continue
        links.append([article_title, article_link])

    print("[+] Total news: " + str(len(links)))
    return links