"""Utility functions for scraping.""" from typing import List import requests from bs4 import BeautifulSoup from .linkutils import Link def download_link_texts( links: List[Link], class_: str, directory: str, encoding: str = None ): """Download link texts contained HTML elements with the given class to a dir.""" for i, link in enumerate(links): print(f"Downloading {link.url} ({i+1}/{len(links)})") text = get_link_text(link.url, class_, encoding) with open(f"{directory}/{link.date}_{i}.txt", "w+") as f: f.write(text) def get_link_text(link: str, class_: str, encoding: str = None) -> str: """Get the text of a div with a given classname on a webpage.""" request = requests.get(link) if encoding: request.encoding = encoding soup = BeautifulSoup(request.text, "html.parser") return soup.find(class_=class_).get_text().strip()