Override reported text encoding for qinghai
This commit is contained in:
parent
e61a31154f
commit
3858d2a556
|
@ -61,7 +61,9 @@ def get_article_links(page: int) -> List[Tuple[str, str]]:
|
||||||
|
|
||||||
def download_article_text(link: str) -> str:
|
def download_article_text(link: str) -> str:
|
||||||
"""Get the text of an article from its link."""
|
"""Get the text of an article from its link."""
|
||||||
soup = BeautifulSoup(requests.get(link).text, "html.parser")
|
request = requests.get(link)
|
||||||
|
request.encoding = "gbk" # The website responds with the wrong encoding
|
||||||
|
soup = BeautifulSoup(request.text, "html.parser")
|
||||||
return soup.find(class_="page_text").get_text().strip()
|
return soup.find(class_="page_text").get_text().strip()
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue