Compare commits
2 commits
3858d2a556
...
8cb72464b4
Author | SHA1 | Date | |
---|---|---|---|
Tristan Daniël Maat | 8cb72464b4 | ||
Tristan Daniël Maat | a66fbc83aa |
|
@ -1,7 +1,7 @@
|
|||
r"""Script to scrape article contents from https://wsjkw.qinghai.gov.cn.
|
||||
r"""Script to scrape article contents from http://wsjkw.qinghai.gov.cn.
|
||||
|
||||
Links are available from pages
|
||||
https://wsjkw.qinghai.gov.cn/zwgk/xxgkml/index\d*.html. Links are in
|
||||
http://wsjkw.qinghai.gov.cn/zwgk/xxgkml/index\d*.html. Links are in
|
||||
the second href of elements with the class `xxgk_content_title`. Dates
|
||||
are the first span of the same element.
|
||||
|
||||
|
@ -35,6 +35,10 @@ def main():
|
|||
writer.writerow(i, link[0], link[1])
|
||||
|
||||
for i, link in enumerate(links):
|
||||
# Broken link
|
||||
if i == 210:
|
||||
continue
|
||||
|
||||
print(f"Downloading {link[0]} ({i}/{len(links)})")
|
||||
|
||||
text = download_article_text(link[0])
|
||||
|
|
Loading…
Reference in a new issue