From dcb665cde4c1a8b69df75568590d24bdcd010587 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tristan=20Dani=C3=ABl=20Maat?= <tm@tlater.net>
Date: Sat, 9 Apr 2022 16:50:15 +0100
Subject: [PATCH 1/3] Structure the project a bit better

---
 extract-urls.js => guangdong/extract-urls.js | 0
 {links => guangdong/links}/links.txt         | 0
 scrape.py => guangdong/scrape.py             | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename extract-urls.js => guangdong/extract-urls.js (100%)
 rename {links => guangdong/links}/links.txt (100%)
 rename scrape.py => guangdong/scrape.py (100%)

diff --git a/extract-urls.js b/guangdong/extract-urls.js
similarity index 100%
rename from extract-urls.js
rename to guangdong/extract-urls.js
diff --git a/links/links.txt b/guangdong/links/links.txt
similarity index 100%
rename from links/links.txt
rename to guangdong/links/links.txt
diff --git a/scrape.py b/guangdong/scrape.py
similarity index 100%
rename from scrape.py
rename to guangdong/scrape.py

From 60d7eec53f9c2cf0a5ccf4c895a9b6a960c2ebcc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tristan=20Dani=C3=ABl=20Maat?= <tm@tlater.net>
Date: Sat, 9 Apr 2022 17:43:37 +0100
Subject: [PATCH 2/3] Add typescript-language-server

---
 flake.nix                 |  2 ++
 guangdong/extract-urls.js | 20 ++++++++++----------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/flake.nix b/flake.nix
index d801027..5a4daa6 100644
--- a/flake.nix
+++ b/flake.nix
@@ -16,6 +16,8 @@
       in {
         devShell = pkgs.mkShell {
           nativeBuildInputs = with pkgs; [
+            nodePackages.typescript-language-server
+
             (python39.withPackages (pypkgs:
               with pypkgs; [
                 beautifulsoup4
diff --git a/guangdong/extract-urls.js b/guangdong/extract-urls.js
index 7081ecb..94e2824 100644
--- a/guangdong/extract-urls.js
+++ b/guangdong/extract-urls.js
@@ -15,17 +15,17 @@
  * @param {string} fileName - The filename to give the file
  */
 function downloadString(text, fileType, fileName) {
-  var blob = new Blob([text], { type: fileType });
+    var blob = new Blob([text], { type: fileType });
 
-  var a = document.createElement('a');
-  a.download = fileName;
-  a.href = URL.createObjectURL(blob);
-  a.dataset.downloadurl = [fileType, a.download, a.href].join(':');
-  a.style.display = "none";
-  document.body.appendChild(a);
-  a.click();
-  document.body.removeChild(a);
-  setTimeout(function() { URL.revokeObjectURL(a.href); }, 1500);
+    var a = document.createElement('a');
+    a.download = fileName;
+    a.href = URL.createObjectURL(blob);
+    a.dataset.downloadurl = [fileType, a.download, a.href].join(':');
+    a.style.display = "none";
+    document.body.appendChild(a);
+    a.click();
+    document.body.removeChild(a);
+    setTimeout(function() { URL.revokeObjectURL(a.href); }, 1500);
 }
 
 /**

From 9030da9a0c34f4547070b8dae7160966fea341a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tristan=20Dani=C3=ABl=20Maat?= <tm@tlater.net>
Date: Sat, 9 Apr 2022 17:43:47 +0100
Subject: [PATCH 3/3] Add Readme

---
 Readme.md | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 Readme.md

diff --git a/Readme.md b/Readme.md
new file mode 100644
index 0000000..51ef5c8
--- /dev/null
+++ b/Readme.md
@@ -0,0 +1,23 @@
+# Province article scraping
+
+A couple of scripts to scrape article text from various provinces for
+a text analysis university course.
+
+We need:
+
+Qinghai
+: page 14-75
+
+Ningxia
+: page 11-42
+
+Shanxi
+: page 2-18
+
+Xinjiang
+: page 10-20
+
+The websites all have subtle differences, so there's simply a folder +
+scripts for each (the scripts are simple enough that there's no need
+for deduplication or anything complex). Written in python/js where
+necessary for educational purposes.