2022-04-09 22:33:43 +01:00
|
|
|
"""Utility functions for handling links."""
|
|
|
|
import csv
|
|
|
|
from typing import List, NamedTuple, TextIO
|
|
|
|
|
|
|
|
|
|
|
|
class Link(NamedTuple):
|
|
|
|
"""A type for links - contains its url and date."""
|
|
|
|
|
|
|
|
url: str
|
|
|
|
date: str
|
|
|
|
|
|
|
|
|
|
|
|
def dump_links(links: List[Link], f: TextIO):
|
|
|
|
"""Dump links to a file in csv format."""
|
|
|
|
writer = csv.writer(f)
|
|
|
|
writer.writerow(["index", "link", "date"])
|
|
|
|
for i, link in enumerate(links):
|
|
|
|
writer.writerow([i, link[0], link[1]])
|
|
|
|
|
|
|
|
|
|
|
|
def read_links(f: TextIO) -> List[Link]:
|
|
|
|
"""Read links from a csv format."""
|
|
|
|
reader = csv.reader(f)
|
|
|
|
next(reader) # Skip the header
|
|
|
|
return [Link(link[1], link[2]) for link in reader]
|
2022-04-09 23:06:16 +01:00
|
|
|
|
|
|
|
|
|
|
|
def absolutize_link(link: str, page_base: str) -> str:
|
|
|
|
"""Ensure we have an absolute url."""
|
|
|
|
if link.startswith("./"):
|
|
|
|
link = page_base + link[2:]
|
|
|
|
return link
|