scrape-yuanyuan/utils/linkutils.py

33 lines
843 B
Python

"""Utility functions for handling links."""
import csv
from typing import List, NamedTuple, TextIO
class Link(NamedTuple):
"""A type for links - contains its url and date."""
url: str
date: str
def dump_links(links: List[Link], f: TextIO):
"""Dump links to a file in csv format."""
writer = csv.writer(f)
writer.writerow(["index", "link", "date"])
for i, link in enumerate(links):
writer.writerow([i, link[0], link[1]])
def read_links(f: TextIO) -> List[Link]:
"""Read links from a csv format."""
reader = csv.reader(f)
next(reader) # Skip the header
return [Link(link[1], link[2]) for link in reader]
def absolutize_link(link: str, page_base: str) -> str:
"""Ensure we have an absolute url."""
if link.startswith("./"):
link = page_base + link[2:]
return link