diff --git a/qinghai/2020-02-08_275.html b/qinghai/2020-02-08_275.html
deleted file mode 100644
index f5ee8b5..0000000
--- a/qinghai/2020-02-08_275.html
+++ /dev/null
@@ -1,125 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-国家卫生健康委关于新型冠状病毒肺炎暂命名事宜的通知
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
国家卫生健康委关于新型冠状病毒肺炎暂命名事宜的通知
-
-
-
发布时间:
- 2020-02-08
-
-
来源:
- 医政医管局
-
-
-
-
-
-
-
国卫医函〔2020〕42号
-
-
-
各省、自治区、直辖市人民政府,新疆生产建设兵团,国务院应对新型冠状病毒肺炎疫情联防联控机制成员:
- 现决定将“新型冠状病毒感染的肺炎”暂命名为“新型冠状病毒肺炎”,简称“新冠肺炎”;英文名称为“Novel Coronavirus Pneumonia”,简称“NCP”。
-
-
-
-
国家卫生健康委
-
2020年2月7日
-
-
(信息公开形式:主动公开)
-
-
-
-
-
-
-
-
-
-
-
-
地址:北京市西城区西直门外南路1号 邮编:100044 电话:010-68792114 ICP备案编号:京ICP备18052910号 京公网安备 11010202000005号
-
中华人民共和国国家卫生健康委员会 版权所有 技术支持:国家卫生健康委员会统计信息中心 网站标识码:bm24000006
-
-
-
-normal
\ No newline at end of file
diff --git a/qinghai/README.md b/qinghai/README.md
deleted file mode 100644
index 18cea8b..0000000
--- a/qinghai/README.md
+++ /dev/null
@@ -1,53 +0,0 @@
-## Qinghai scraping
-
-A few links don't exist anymore. They have the indexes 210, 453, 681,
-703, 791, 871, 913, 914, 915 in `links.csv`.
-
-There are a few small files again, mostly pdf links:
-
-```console
-.rw-r--r-- 101 tlater 9 Apr 19:26 ./2016-09-28_923.txt
-.rw-r--r-- 133 tlater 9 Apr 19:26 ./2016-09-28_924.txt
-.rw-r--r-- 116 tlater 9 Apr 19:26 ./2016-09-28_925.txt
-.rw-r--r-- 147 tlater 9 Apr 19:26 ./2016-09-28_926.txt
-.rw-r--r-- 111 tlater 9 Apr 19:23 ./2017-03-16_838.txt
-.rw-r--r-- 36 tlater 9 Apr 19:20 ./2017-07-07_745.txt
-.rw-r--r-- 82 tlater 9 Apr 19:17 ./2017-08-14_723.txt
-.rw-r--r-- 211 tlater 9 Apr 19:17 ./2017-09-12_704.txt
-.rw-r--r-- 97 tlater 9 Apr 19:14 ./2017-11-15_587.txt
-.rw-r--r-- 156 tlater 9 Apr 19:13 ./2017-11-20_580.txt
-.rw-r--r-- 283 tlater 9 Apr 19:13 ./2017-11-23_575.txt
-.rw-r--r-- 39 tlater 9 Apr 19:13 ./2017-12-29_566.txt
-.rw-r--r-- 39 tlater 9 Apr 19:13 ./2018-01-12_561.txt
-.rw-r--r-- 165 tlater 9 Apr 19:12 ./2018-05-30_505.txt
-.rw-r--r-- 145 tlater 9 Apr 19:12 ./2018-05-30_507.txt
-.rw-r--r-- 391 tlater 9 Apr 19:11 ./2018-07-25_475.txt
-.rw-r--r-- 158 tlater 9 Apr 19:11 ./2018-09-13_467.txt
-.rw-r--r-- 204 tlater 9 Apr 19:04 ./2020-03-09_254.txt
-.rw-r--r-- 124 tlater 9 Apr 19:04 ./2020-03-18_248.txt
-.rw-r--r-- 228 tlater 9 Apr 19:04 ./2020-03-20_245.txt
-.rw-r--r-- 186 tlater 9 Apr 19:03 ./2020-04-01_221.txt
-.rw-r--r-- 67 tlater 9 Apr 19:02 ./2020-04-21_208.txt
-.rw-r--r-- 174 tlater 9 Apr 19:01 ./2020-04-30_194.txt
-.rw-r--r-- 147 tlater 9 Apr 19:01 ./2020-05-08_186.txt
-.rw-r--r-- 189 tlater 9 Apr 19:01 ./2020-05-12_182.txt
-.rw-r--r-- 82 tlater 9 Apr 19:01 ./2020-05-15_180.txt
-.rw-r--r-- 119 tlater 9 Apr 19:00 ./2020-06-04_139.txt
-.rw-r--r-- 201 tlater 9 Apr 19:00 ./2020-07-01_114.txt
-.rw-r--r-- 113 tlater 9 Apr 18:59 ./2020-07-20_90.txt
-.rw-r--r-- 115 tlater 9 Apr 18:59 ./2020-07-21_86.txt
-.rw-r--r-- 99 tlater 9 Apr 18:58 ./2020-08-27_36.txt
-.rw-r--r-- 99 tlater 9 Apr 18:58 ./2020-08-27_37.txt
-.rw-r--r-- 130 tlater 9 Apr 18:58 ./2020-08-27_38.txt
-.rw-r--r-- 130 tlater 9 Apr 18:58 ./2020-08-27_39.txt
-.rw-r--r-- 190 tlater 9 Apr 18:58 ./2020-08-27_40.txt
-.rw-r--r-- 190 tlater 9 Apr 18:58 ./2020-08-27_41.txt
-.rw-r--r-- 184 tlater 9 Apr 18:58 ./2020-08-27_42.txt
-.rw-r--r-- 184 tlater 9 Apr 18:58 ./2020-08-27_43.txt
-.rw-r--r-- 127 tlater 9 Apr 18:58 ./2020-08-27_44.txt
-.rw-r--r-- 127 tlater 9 Apr 18:58 ./2020-08-27_45.txt
-.rw-r--r-- 94 tlater 9 Apr 18:58 ./2020-08-27_46.txt
-.rw-r--r-- 94 tlater 9 Apr 18:58 ./2020-08-27_47.txt
-.rw-r--r-- 88 tlater 9 Apr 18:58 ./2020-09-12_20.txt
-.rw-r--r-- 200 tlater 9 Apr 18:57 ./2020-09-21_11.txt
-```
diff --git a/qinghai/scrape-iframe.py b/qinghai/scrape-iframe.py
deleted file mode 100644
index a9a6f6d..0000000
--- a/qinghai/scrape-iframe.py
+++ /dev/null
@@ -1,24 +0,0 @@
-"""Script to scrape contents from a specific article.
-
-This is for
-http://www.nhc.gov.cn/xcs/zhengcwj/202002/18c1bb43965a4492907957875de02ae7.shtml.
-
-For whatever reason, this article is implemented as an iframe, so
-requires downloading with a full-featured browser. It's just one
-though, so let's parse it.
-"""
-
-from bs4 import BeautifulSoup
-
-
-def main():
- """Scrape html site."""
- with open("2020-02-08_275.html", "r") as f:
- soup = BeautifulSoup(f.read(), "html.parser")
- text = soup.find(class_="w1024").get_text().strip()
- with open("articles-qinghai/2020-02-08_275.txt", "w+") as f:
- f.write(text)
-
-
-if __name__ == "__main__":
- main()
diff --git a/qinghai/scrape.py b/qinghai/scrape.py
index f6b8025..812973a 100644
--- a/qinghai/scrape.py
+++ b/qinghai/scrape.py
@@ -35,11 +35,8 @@ def main():
writer.writerow(i, link[0], link[1])
for i, link in enumerate(links):
- # Broken links
- #
- # 275 was available as an iframe, and is parsed separately in
- # scrape-iframe.py
- if i in (210, 275, 453, 681, 703, 791, 871, 913, 914, 915):
+ # Broken link
+ if i == 210:
continue
print(f"Downloading {link[0]} ({i}/{len(links)})")