From e61a31154f650e73f1fa8e7a23ede718f9489980 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tristan=20Dani=C3=ABl=20Maat?= <tm@tlater.net>
Date: Sat, 9 Apr 2022 18:58:40 +0100
Subject: [PATCH 1/2] Fix missing write setting on file

---
 qinghai/scrape.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qinghai/scrape.py b/qinghai/scrape.py
index db65ae4..b465a53 100644
--- a/qinghai/scrape.py
+++ b/qinghai/scrape.py
@@ -38,7 +38,7 @@ def main():
         print(f"Downloading {link[0]} ({i}/{len(links)})")
 
         text = download_article_text(link[0])
-        with open(f"articles-qinghai/{link[1]}_{i}.txt") as f:
+        with open(f"articles-qinghai/{link[1]}_{i}.txt", "w+") as f:
             f.write(text)
 
 

From 3858d2a5561e23d2f5e85b4887db03b996fe35a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tristan=20Dani=C3=ABl=20Maat?= <tm@tlater.net>
Date: Sat, 9 Apr 2022 18:58:58 +0100
Subject: [PATCH 2/2] Override reported text encoding for qinghai

---
 qinghai/scrape.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/qinghai/scrape.py b/qinghai/scrape.py
index b465a53..561d92d 100644
--- a/qinghai/scrape.py
+++ b/qinghai/scrape.py
@@ -61,7 +61,9 @@ def get_article_links(page: int) -> List[Tuple[str, str]]:
 
 def download_article_text(link: str) -> str:
     """Get the text of an article from its link."""
-    soup = BeautifulSoup(requests.get(link).text, "html.parser")
+    request = requests.get(link)
+    request.encoding = "gbk"  # The website responds with the wrong encoding
+    soup = BeautifulSoup(request.text, "html.parser")
     return soup.find(class_="page_text").get_text().strip()