import os from dotenv import load_dotenv # Our modules import apnews import rss import summarizer load_dotenv() # Loading environment variables pplx_api_key = os.getenv("PPLX_API_KEY") model = os.getenv("MODEL") # Fetch and summarize the article def transform_links(links): datas = [] counter = 0 print("[+] Extracting data from articles...") for link in links: counter += 1 print("[+] Article " + str(counter) + " of " + str(len(links))) article_title = link[0] article_link = link[1] print("[ " + article_title + " ]") print("[+] Extracting data from: " + article_link) try: article_summary = summarizer.summarize(article_link, pplx_api_key, model) except Exception as e: print(e) print("[!] Invalid article. Skipping...") continue datas.append( { "article_title": article_title, "article_link": article_link, "article_summary": article_summary, }) return datas # Downloads the site and extracting the data using the appropriate module def extract_data(): links = [] # Add your rss link to the .env file rss_links = os.getenv("ENABLED_RSS").split(",") links.extend(rss.fetchAndDigest(rss_links)) # Plug in your module here (links.extend(your_module.fetchAndDigest()) # TODO Programmatically scan and import modules links.extend(apnews.fetchAndDigest()) print("[+] Total news: " + str(len(links))) datas = transform_links(links) return datas def handle_pagination(soup): next_page = soup.find("a", {"rel": "next"}) if next_page: return next_page["href"] return None def main(): all_data = [] datas = extract_data() all_data.extend(datas) # Prepare a nice CSS for the viewing page (nice and clean) css = """ body { font-family: sans-serif (Helvetica, Arial); } h1 { font-size: 2em; } h2 { font-size: 1.5em; } h3 { font-size: 1.2em; } p { font-size: 1em; } """ # Create a nice HTML view of all the articles each one in its own page html = "
" + item["article_summary"] + "
" html += "Read the full article" html += "