20 lines
469 B
Python
20 lines
469 B
Python
|
"""Script to scrape article text from http://sxwjw.shaanxi.gov.cn.
|
||
|
|
||
|
Article contents are in a div with the class `message-box`.
|
||
|
"""
|
||
|
|
||
|
from utils.linkutils import read_links
|
||
|
from utils.scrapeutils import download_link_texts
|
||
|
|
||
|
|
||
|
def main():
|
||
|
"""Collect and output article text."""
|
||
|
with open("articles-shanxi/links.csv", "r") as f:
|
||
|
links = read_links(f)
|
||
|
|
||
|
download_link_texts(links, "message-box", "articles-shanxi")
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main()
|