Compare commits

...

2 Commits

1 changed files with 4 additions and 2 deletions

View File

@ -38,7 +38,7 @@ def main():
print(f"Downloading {link[0]} ({i}/{len(links)})")
text = download_article_text(link[0])
with open(f"articles-qinghai/{link[1]}_{i}.txt") as f:
with open(f"articles-qinghai/{link[1]}_{i}.txt", "w+") as f:
f.write(text)
@ -61,7 +61,9 @@ def get_article_links(page: int) -> List[Tuple[str, str]]:
def download_article_text(link: str) -> str:
"""Get the text of an article from its link."""
soup = BeautifulSoup(requests.get(link).text, "html.parser")
request = requests.get(link)
request.encoding = "gbk" # The website responds with the wrong encoding
soup = BeautifulSoup(request.text, "html.parser")
return soup.find(class_="page_text").get_text().strip()