Override reported text encoding for qinghai

This commit is contained in:
Tristan Daniël Maat 2022-04-09 18:58:58 +01:00
parent e61a31154f
commit 3858d2a556
Signed by: tlater
GPG key ID: 49670FD774E43268

View file

@ -61,7 +61,9 @@ def get_article_links(page: int) -> List[Tuple[str, str]]:
def download_article_text(link: str) -> str: def download_article_text(link: str) -> str:
"""Get the text of an article from its link.""" """Get the text of an article from its link."""
soup = BeautifulSoup(requests.get(link).text, "html.parser") request = requests.get(link)
request.encoding = "gbk" # The website responds with the wrong encoding
soup = BeautifulSoup(request.text, "html.parser")
return soup.find(class_="page_text").get_text().strip() return soup.find(class_="page_text").get_text().strip()