Compare commits

..

2 commits

View file

@ -1,7 +1,7 @@
r"""Script to scrape article contents from https://wsjkw.qinghai.gov.cn. r"""Script to scrape article contents from http://wsjkw.qinghai.gov.cn.
Links are available from pages Links are available from pages
https://wsjkw.qinghai.gov.cn/zwgk/xxgkml/index\d*.html. Links are in http://wsjkw.qinghai.gov.cn/zwgk/xxgkml/index\d*.html. Links are in
the second href of elements with the class `xxgk_content_title`. Dates the second href of elements with the class `xxgk_content_title`. Dates
are the first span of the same element. are the first span of the same element.
@ -35,6 +35,10 @@ def main():
writer.writerow(i, link[0], link[1]) writer.writerow(i, link[0], link[1])
for i, link in enumerate(links): for i, link in enumerate(links):
# Broken link
if i == 210:
continue
print(f"Downloading {link[0]} ({i}/{len(links)})") print(f"Downloading {link[0]} ({i}/{len(links)})")
text = download_article_text(link[0]) text = download_article_text(link[0])