Compare commits
No commits in common. "8cb72464b45c8b059a52d629e5037405aff70e08" and "3858d2a5561e23d2f5e85b4887db03b996fe35a7" have entirely different histories.
8cb72464b4
...
3858d2a556
|
@ -1,7 +1,7 @@
|
||||||
r"""Script to scrape article contents from http://wsjkw.qinghai.gov.cn.
|
r"""Script to scrape article contents from https://wsjkw.qinghai.gov.cn.
|
||||||
|
|
||||||
Links are available from pages
|
Links are available from pages
|
||||||
http://wsjkw.qinghai.gov.cn/zwgk/xxgkml/index\d*.html. Links are in
|
https://wsjkw.qinghai.gov.cn/zwgk/xxgkml/index\d*.html. Links are in
|
||||||
the second href of elements with the class `xxgk_content_title`. Dates
|
the second href of elements with the class `xxgk_content_title`. Dates
|
||||||
are the first span of the same element.
|
are the first span of the same element.
|
||||||
|
|
||||||
|
@ -35,10 +35,6 @@ def main():
|
||||||
writer.writerow(i, link[0], link[1])
|
writer.writerow(i, link[0], link[1])
|
||||||
|
|
||||||
for i, link in enumerate(links):
|
for i, link in enumerate(links):
|
||||||
# Broken link
|
|
||||||
if i == 210:
|
|
||||||
continue
|
|
||||||
|
|
||||||
print(f"Downloading {link[0]} ({i}/{len(links)})")
|
print(f"Downloading {link[0]} ({i}/{len(links)})")
|
||||||
|
|
||||||
text = download_article_text(link[0])
|
text = download_article_text(link[0])
|
||||||
|
|
Loading…
Reference in a new issue