diff --git a/Readme.md b/Readme.md deleted file mode 100644 index 51ef5c8..0000000 --- a/Readme.md +++ /dev/null @@ -1,23 +0,0 @@ -# Province article scraping - -A couple of scripts to scrape article text from various provinces for -a text analysis university course. - -We need: - -Qinghai -: page 14-75 - -Ningxia -: page 11-42 - -Shanxi -: page 2-18 - -Xinjiang -: page 10-20 - -The websites all have subtle differences, so there's simply a folder + -scripts for each (the scripts are simple enough that there's no need -for deduplication or anything complex). Written in python/js where -necessary for educational purposes. diff --git a/guangdong/extract-urls.js b/extract-urls.js similarity index 73% rename from guangdong/extract-urls.js rename to extract-urls.js index 94e2824..7081ecb 100644 --- a/guangdong/extract-urls.js +++ b/extract-urls.js @@ -15,17 +15,17 @@ * @param {string} fileName - The filename to give the file */ function downloadString(text, fileType, fileName) { - var blob = new Blob([text], { type: fileType }); + var blob = new Blob([text], { type: fileType }); - var a = document.createElement('a'); - a.download = fileName; - a.href = URL.createObjectURL(blob); - a.dataset.downloadurl = [fileType, a.download, a.href].join(':'); - a.style.display = "none"; - document.body.appendChild(a); - a.click(); - document.body.removeChild(a); - setTimeout(function() { URL.revokeObjectURL(a.href); }, 1500); + var a = document.createElement('a'); + a.download = fileName; + a.href = URL.createObjectURL(blob); + a.dataset.downloadurl = [fileType, a.download, a.href].join(':'); + a.style.display = "none"; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + setTimeout(function() { URL.revokeObjectURL(a.href); }, 1500); } /** diff --git a/flake.nix b/flake.nix index 5a4daa6..d801027 100644 --- a/flake.nix +++ b/flake.nix @@ -16,8 +16,6 @@ in { devShell = pkgs.mkShell { nativeBuildInputs = with pkgs; [ - nodePackages.typescript-language-server - (python39.withPackages (pypkgs: with pypkgs; [ beautifulsoup4 diff --git a/guangdong/links/links.txt b/links/links.txt similarity index 100% rename from guangdong/links/links.txt rename to links/links.txt diff --git a/guangdong/scrape.py b/scrape.py similarity index 100% rename from guangdong/scrape.py rename to scrape.py