import re import subprocess import sys from argparse import ArgumentParser from collections import Counter from datetime import date from http import HTTPStatus from pathlib import Path from typing import Iterator import minify_html import rcssmin import ssg from marko import Markdown from markupsafe import Markup from pygments.formatters import HtmlFormatter from ssg import Generator def split_spans(s: str) -> Iterator[tuple[int, int]]: for match in re.finditer(r"[\S-]+", s): yield match.start(), match.end() def find_title_case_problems(s: str) -> list[int]: # fmt: off minor_words = { "a", "an", "and", "as", "at", "but", "by", "for", "if", "in", "nor", "of", "off", "on", "or", "per", "so", "the", "to", "up", "via", "yet", } # fmt: on problems = list() next_cap = True for start, end in split_spans(s): is_cap = s[start].isupper() if is_cap and not next_cap: word = s[start:end] if word.lower() in minor_words: problems.append(start) elif not is_cap: if next_cap or end - start > 3: problems.append(start) next_cap = s[end - 1] == ":" return problems def lint(m) -> list[str]: problems = list() lint = set(m.get("lint", set())) if not "no-title-case" in lint and "title" in m: tc_problems = set(find_title_case_problems(m["title"])) if tc_problems: markers = "".join( "^" if i in tc_problems else " " for i, _ in enumerate(m["title"]) ) problems.append(f"title-case:\n{m['title']}\n{markers}") if not "no-missing-date" in lint and "date" not in m: problems.append("missing-date") return problems def min_html(s: str | bytes) -> str: if isinstance(s, bytes): s = s.decode() return minify_html.minify(s, do_not_minify_doctype=True) def min_css(s: str | bytes) -> str | bytes: return rcssmin.cssmin(s, keep_bang_comments=True) def main(): markdown = Markdown(extensions=["toc", "codehilite", "footnote"]) def markdown_to_html(md: str) -> str: text = markdown.convert(md) toc = markdown.renderer.render_toc() return Markup(text.replace("[TOC]", toc)) ap = ArgumentParser() ap.add_argument( "--include-hidden", help="Include hidden pages", action="store_true", ) args = ssg.parse_args(ap) gen = Generator(args) gen.minifiers[".html"] = min_html gen.minifiers[".css"] = min_css gen.env.filters["format_date"] = lambda d, f="%Y-%m-%d": d.strftime(f) gen.env.filters["markdown_to_html"] = markdown_to_html gen.env.globals["website_url"] = "https://kramkow.ski" gen.env.globals["today"] = date.today() subprocess.run(["rm", "-rf", str(gen.output)]) gen.copy("css/normalize.css") gen.copy("css/style.css") gen.copy("robots.txt") gen.copy("images/icon.svg") gen.copy("pubkey.asc") gen.copy(".well-known/openpgpkey/hu/5i647wb3g5ocghxnaqigkxgisgme1wob", "pubkey.bin") gen.write(".well-known/openpgpkey/policy", b"") icons = [ ("icon", 16), ("icon", 32), ("icon", 48), ("icon", 192), ("apple-touch-icon", 167), ("apple-touch-icon", 180), ] for _, size in icons: proc = subprocess.run( [ "inkscape", "--export-type=png", "--export-filename=-", f"--export-width={size}", f"--export-height={size}", "images/icon.svg", ], capture_output=True, ) gen.write(f"images/icon{size}.png", proc.stdout) gen.env.globals["icons"] = icons errors = {400, 403, 404, 500, 503} for error in errors: gen.generate( f"error/{error}.html", template="error.html", env={"error": HTTPStatus(error)}, ) def parse(path): try: m = ssg.parse(path) lint_messages = lint(m) if lint_messages: messages = "\n".join(lint_messages) print(f"Linter messages for {path}:\n{messages}\n", file=sys.stderr) m.setdefault("date", date.today()) year = f"{m['date'].year:04}" month = f"{m['date'].month:02}" day = f"{m['date'].day:02}" filename = path.stem + ".html" m["path"] = "/".join(("article", year, month, day, filename)) except Exception as e: raise ValueError(f"Failed to parse {path}") from e return m def find_files(directory: Path) -> Iterator[Path]: if args.include_hidden: return directory.glob("*.md") else: return directory.glob("[!_]*.md") articles = [parse(article) for article in find_files(Path("articles"))] tag_counts = Counter() for a in articles: if a.get("unlisted", False): continue tag_counts.update(set(a["tags"])) tags = [] for tag, count in tag_counts.items(): if count <= 1: continue tags.append(tag) gen.generate( f"article/tag/{tag}.html", template="tag.html", env={"articles": articles, "tag": tag}, ) tags.sort() for article in articles: gen.generate( article["path"], template="article.html", env={"article": article, "all_tags": tags}, ) articles.sort(key=lambda e: e["date"], reverse=True) gen.generate("archive.html", env={"articles": articles, "tags": tags}) def gen_collection(entity_name: str): entities = [] for entity in find_files(Path(f"{entity_name}s")): parsed = ssg.parse(entity) parsed["path"] = f"{entity_name}/{entity.stem}.html" gen.generate( parsed["path"], template=f"{entity_name}.html", env={entity_name: parsed}, ) if not parsed.get("unlisted", False): entities.append(parsed) entities.sort(key=lambda e: e["name"].lower()) gen.generate(f"{entity_name}s.html", env={f"{entity_name}s": entities}) return entities projects = gen_collection("project") recipes = gen_collection("recipe") gen.generate("index.html", env={"articles": articles}) gen.generate("atom.xml", env={"articles": articles}) def gen_sitemap(dest: str): gen.generate( dest, env={"articles": articles, "projects": projects, "recipes": recipes}, ) gen_sitemap("sitemap.txt") gen_sitemap("sitemap.xml") gen.generate( "css/pygments.css", env={ "light": HtmlFormatter(style="default").get_style_defs(".highlight"), "dark": HtmlFormatter(style="github-dark").get_style_defs(".highlight"), }, ) if __name__ == "__main__": main()