diff --git a/qinghai/scrape.py b/qinghai/scrape.py index b465a53..561d92d 100644 --- a/qinghai/scrape.py +++ b/qinghai/scrape.py @@ -61,7 +61,9 @@ def get_article_links(page: int) -> List[Tuple[str, str]]: def download_article_text(link: str) -> str: """Get the text of an article from its link.""" - soup = BeautifulSoup(requests.get(link).text, "html.parser") + request = requests.get(link) + request.encoding = "gbk" # The website responds with the wrong encoding + soup = BeautifulSoup(request.text, "html.parser") return soup.find(class_="page_text").get_text().strip()