Data Models for Sites, Web Features, and Feeds
python source: :tangle arcology/models.pyfrom __future__ import annotations from typing import Optional, List from django.db import models from django.conf import settings from django_prometheus.models import ExportModelOperationsMixin as EMOM import pathlib import arrow import arroyo.arroyo_rs as native from arcology.cache_decorator import cache import roam.models import logging logger = logging.getLogger(__name__) logger.setLevel(logging.WARN) # used for some memoization class hashabledict(dict): def __hash__(self): return hash(tuple(sorted(self.items())))
arcology.models.Site
A Site has many SiteDomain's. It has a routing key, and a title, and some CSS and customization. There are a few helper classmethods to take an input request or routing key and output a Site object based on the SiteDomain or whatnot. I'm not sure I want the async definitions to stick around, there needs to be some consideration of what should be async in this system and where asgi can be relied on for concurrency.
Sites are created in the Arcology Seed Command .
python source: :tangle arcology/models.py# Sites and SiteDomains are created in django-admin or a seed rather than from arroyo parser, no create_from_arroyo..! class Site(EMOM('site'), models.Model): key = models.CharField(max_length=512, primary_key=True) title = models.CharField(max_length=512) # add choices css_file = models.CharField(max_length=512, blank=True, default=None) # this is used in sitemap, and maybe links.. link_color = models.CharField(max_length=8, blank=True, default=None) def urlize_page(self, page: Page, heading: Optional[roam.models.Heading] = None): domain = self.sitedomain_set.first().domain key_rest = page.route_key.split("/", 1)[1] url = f"https://{domain}/{key_rest}" if heading is not None: url = url + f"#{heading.node_id}" return url def urlize_feed(self, feed: Feed): domain = self.sitedomain_set.first().domain key_rest = feed.route_key.split("/", 1)[1] url = f"https://{domain}/{key_rest}" return url @classmethod def from_route(cls: Site, route_key: str) -> Site: site_key = route_key.split("/")[0] site = cls.objects.get(key=site_key) assert site is not None return site @classmethod def from_hostname(cls: Site, host) -> Site: site = cls.objects.filter(sitedomain__domain=host).first() assert site is not None return site @classmethod def from_request(cls: Site, request) -> Site: host = request.headers.get("Host") return cls.from_hostname(host) class SiteDomain(EMOM('site_domain'), models.Model): site = models.ForeignKey( Site, on_delete=models.CASCADE, ) domain = models.CharField(max_length=512)
Base migration
python source:migrations.CreateModel( name="Site", fields=[ ( "key", models.CharField(max_length=512, primary_key=True, serialize=False), ), ("title", models.CharField(max_length=512)), ( "css_file", models.CharField(blank=True, default=None, max_length=512), ), ( "link_color", models.CharField(blank=True, default=None, max_length=8), ), ], ), migrations.CreateModel( name="SiteDomain", fields=[ ( "id", models.BigAutoField( auto_created=True, primary_key=True, serialize=False, verbose_name="ID", ), ), ( "site", models.ForeignKey( on_delete=django.db.models.deletion.CASCADE, to="arcology.site" ), ), ("domain", models.CharField(default="localhost", max_length=512)), ], ),
arcology.models.Page
A site has many pages. Pages have a routing key defined by the ARCOLOGY_KEY keyword, a title based on the TITLE keyword stored and some metadata besides that.
python source: :tangle arcology/models.pyimport os class Page(EMOM('page'), models.Model): file = models.ForeignKey( roam.models.File, on_delete=models.CASCADE, ) route_key = models.CharField(max_length=512, primary_key=True) root_heading = models.ForeignKey(roam.models.Heading, on_delete=models.CASCADE) site = models.ForeignKey( Site, on_delete=models.CASCADE, ) title = models.CharField(max_length=512) allow_crawl = models.BooleanField(default=False)
These are created using the =create_from_arroyo= pattern which makes it easy for the Arcology =ingestfiles= Command to include new functionality in to the system. In short, a section within the Arcology Project Configuration signals to the ingestfiles command to call Page.create_from_arroyo() with a bundle of document metadata from the parser.
python source: :tangle arcology/models.py@classmethod def create_from_arroyo(cls, doc: native.Document) -> Page: f = roam.models.File.objects.get(path=doc.path) route_key = next(iter(doc.collect_keywords("ARCOLOGY_KEY")), "") allow_crawl = ( next(iter(doc.collect_keywords("ARCOLOGY_ALLOW_CRAWL")), False) is not False ) site = Site.from_route(route_key) root_heading = f.heading_set.filter(level=0)[0] title = root_heading.title or "" return cls.objects.get_or_create( file=f, route_key=route_key, allow_crawl=allow_crawl, site=site, root_heading=root_heading, title=title, )[0]
The Page has helpers to return URLs and domain-relative URLs as well if you're feeling cheeky:
python source: :tangle arcology/models.pydef to_url(self): site = self.site return site.urlize_page(self) def to_url_path(self): key_rest = self.route_key.split("/", 1)[1] return f"/{key_rest}"
There's a grip of functions that return ... unfortunately a mix of QuerySet and list of child-objects... really want to refactor this some day!!
python source: :tangle arcology/models.pydef collect_keywords(self): # returns queryset... return self.file.keyword_set def collect_tags(self) -> List[str]: heading_ids = self.file.heading_set.exclude(tag__tag__in=['noexport', 'NOEXPORT']).values_list("node_id") tags = roam.models.Tag.objects.filter( heading_id__in=heading_ids ).values_list("tag").distinct() return [ h[0] for h in tags ] def collect_references(self) -> List[Reference]: return [ reference for heading in self.file.heading_set.all() for reference in heading.reference_set.all() ] def collect_links(self, include_attachments=True): # returns typed dict (id/file-name -> URL) my_headings = self.file.heading_set.exclude(tag__tag__in=['noexport', 'NOEXPORT']) link_objs = self.file.outbound_links.all() ret = { h.node_id: url for h in my_headings if (url := h.to_url()) is not None } for el in link_objs: try: h = el.dest_heading if h is not None: url = h.to_url() if url is not None: ret[h.node_id] = url logger.info(f"link {url} from {el}") except roam.models.Heading.DoesNotExist: logger.info(f"{el} does not have dest") if include_attachments: for heading in self.file.heading_set.all(): common_path = os.path.dirname(heading.path.path) for attachment in heading.attachment_set.all(): rel_path = f"file:{attachment.raw_file.removeprefix(common_path)[1:]}" attach_uri = f"attachment:{os.path.basename(attachment.raw_file)}" crushed_file = attachment.crushed_file.url ret[rel_path] = crushed_file ret[attach_uri] = crushed_file return ret def collect_attachments(self): """ pull include_attachments branch above in to here... need to return enough to populate the ret? ugh generator that returns a tuple??? augh. """ for attachment in heading.attachment_set.all(): rel_path = f"file:{attachment.raw_file.removeprefix(common_path)[1:]}" attach_uri = f"attachment:{os.path.basename(attachment.raw_file)}" crushed_file = attachment.crushed_file.url yield rel_path, crushed_file yield attach_uri, crushed_file def collect_backlinks(self) -> List[Link]: my_headings = self.file.heading_set.exclude(tag__tag__in=['noexport', 'NOEXPORT']) return list(set(roam.models.Link.objects.filter(dest_heading__in=my_headings)))
The Page is also of course a thing which can be transformed in to HTML. That's what a Page is there to do, become HTML. The links are passed in to the Arroyo Org Exporter so that id: links are http: links to folks on the web.
The call is memoized with a call in to the low-level Django cache via a custom decorator so that repeated views of the same page are served from disk rather than a CPU-based parse operation. I hope that is a good idea! I used an in-memory cache too for a while, but this thing seems to work basically well enough and I refuse to further micro-optimize. It would be nice to use the higher-level view caching stuff a bit more rather than this low-level abstraction, i guess?
python source: :tangle arcology/models.pydef to_html(self, links, heading=None, include_subheadings=False): return self._to_html_memoized(hashabledict(links), heading, include_subheadings, self.file.digest) @cache(key_prefix="page_html", expire_secs=60*60*24*7) def _to_html_memoized(self, links, heading, include_subheadings, _file_digest): if heading is not None: headings = [heading] else: headings = [] opts = native.ExportOptions( link_retargets=links, limit_headings=headings, include_subheadings=include_subheadings, ignore_tags=settings.IGNORED_ROAM_TAGS, todo_keywords=( ["NEXT", "INPROGRESS", "WAITING"], ["DONE", "CANCELLED"] ), ) return native.htmlize_file(self.file.path, opts)
arcology.models.Page.find_by_url() is a nice helper that you can feed a URL in to and get a Page object back.
python source: :tangle arcology/models.py@classmethod def find_by_url(cls, url: str) -> Optional[Page]: import urllib.parse res = urllib.parse.urlparse(url) site = Site.from_hostname(res.hostname) key = res.path[1:] if site.key == "localhost": full_key = key new_site_key = key.split("/")[0] site = Site.objects.filter(key=new_site_key).first() else: full_key = f"{site.key}/{key}" return Page.objects.get(route_key=full_key)
Base migration:
python source:migrations.CreateModel( name="Page", fields=[ ( "route_key", models.CharField(max_length=512, primary_key=True, serialize=False), ), ("title", models.CharField(max_length=512)), ("allow_crawl", models.BooleanField(default=False)), ( "file", models.ForeignKey( on_delete=django.db.models.deletion.CASCADE, to="roam.file" ), ), ( "root_heading", models.ForeignKey( on_delete=django.db.models.deletion.CASCADE, to="roam.heading" ), ), ( "site", models.ForeignKey( on_delete=django.db.models.deletion.CASCADE, to="arcology.site" ), ), ], ),
arcology.models.Feed
Pages can define an Atom feed + Feediverse feeds by tagging a page with ARCOLOGY_FEED keyword and making sure the headings have a PUBDATE an ID property. This feature relies on Pandoc right now, I'll need to write a custom Atom exporter in The arroyo_rs Native Org Parser when it comes time to implement these feeds.
These are also created using the =create_from_arroyo= pattern which makes it easy for the Arcology ingest_files Command to include new functionality in to the system.
python source: :tangle arcology/models.pyclass Feed(EMOM('feed'), models.Model): POST_VISIBILITY = [ ("unlisted", "Unlisted"), ("private", "Private"), ("public", "Public"), ("direct", "direct"), # might be different, XXX ] file = models.ForeignKey( roam.models.File, on_delete=models.CASCADE, ) route_key = models.CharField(max_length=512, primary_key=True) site = models.ForeignKey( Site, on_delete=models.CASCADE, ) title = models.CharField(max_length=512) visibility = models.CharField(max_length=512, choices=POST_VISIBILITY) def url(self): return self.site.urlize_feed(self) @classmethod def create_from_arroyo(cls, doc: native.Document) -> Feed | None: route_key = next(iter(doc.collect_keywords("ARCOLOGY_FEED")), None) if not route_key: return None visibility = next( iter(doc.collect_keywords("ARCOLOGY_TOOT_VISIBILITY")), "private" ) f = roam.models.File.objects.get(path=doc.path) site = Site.from_route(route_key) root_heading = f.heading_set.filter(level=0)[0] title = root_heading.title return cls.objects.get_or_create( file=f, route_key=route_key, title=title, visibility=visibility, site=site, )[0] @classmethod async def aget(cls, **kwargs): return await cls.objects.prefetch_related("file", "site").aget( **kwargs )
Base migration
python source:migrations.CreateModel( name="Feed", fields=[ ( "route_key", models.CharField(max_length=512, primary_key=True, serialize=False), ), ("title", models.CharField(max_length=512)), ( "visibility", models.CharField( choices=[ ("unlisted", "Unlisted"), ("private", "Private"), ("public", "Public"), ("direct", "direct"), ], max_length=512, ), ), ( "file", models.ForeignKey( on_delete=django.db.models.deletion.CASCADE, to="roam.file" ), ), ( "site", models.ForeignKey( on_delete=django.db.models.deletion.CASCADE, to="arcology.site" ), ), ], ),
arcology.models.FeedEntry
A FeedEntry is a Heading with a PUBDATE property that exists on a page w/ ARCOLOGY_FEED Keyword. These are used to construct Feeds
python source: :tangle arcology/models.pyclass FeedEntry(EMOM('feed_entry'), models.Model): POST_VISIBILITY = [ ("unlisted", "Unlisted"), ("private", "Private"), ("public", "Public"), ("direct", "direct"), # might be different, XXX ] heading = models.ForeignKey( roam.models.Heading, on_delete=models.CASCADE, ) feed = models.ForeignKey( Feed, on_delete=models.CASCADE, ) route_key = models.CharField(max_length=512) site = models.ForeignKey( Site, on_delete=models.CASCADE, ) title = models.CharField(max_length=512) visibility = models.CharField(max_length=512, choices=POST_VISIBILITY) pubdate = models.DateTimeField(auto_now=False) def to_html(self, links): return FeedEntry._to_html_memoized( self.heading.path.path, self.heading.node_id, hashabledict(links), self.heading.path.digest ) @classmethod @cache(key_prefix="feedentry_html", expire_secs=60*60*24*7) def _to_html_memoized(cls, path, heading_id, links, _file_digest): opts = native.ExportOptions( limit_headings=[heading_id], link_retargets=links, include_subheadings=True, ignore_tags=settings.IGNORED_ROAM_TAGS, todo_keywords=( ["NEXT", "INPROGRESS", "WAITING"], ["DONE", "CANCELLED"] ), ) return native.htmlize_file(path, opts) @classmethod def create_from_arroyo(cls, doc: native.Document) -> List[Feed] | None: route_key = next(iter(doc.collect_keywords("ARCOLOGY_FEED")), None) if not route_key: return None visibility = next( iter(doc.collect_keywords("ARCOLOGY_TOOT_VISIBILITY")), "private" ) site = Site.from_route(route_key) # f = roam.models.File.objects.get(path=doc.path) feed = Feed.objects.get(route_key=route_key) rets = [] for nheading in doc.headings: if nheading.id is not None: heading = roam.models.Heading.objects.get(node_id=nheading.id) pdqs = heading.headingproperty_set.filter(keyword="PUBDATE") if not pdqs.exists(): continue v = pdqs.first().value pubdate = arrow.get(v, "YYYY-MM-DD ddd H:mm").format(arrow.FORMAT_RFC3339) title = heading.title rets += [cls.objects.get_or_create( heading=heading, feed=feed, route_key=route_key, title=title, pubdate=pubdate, visibility=visibility, site=site, )[0]] # root_heading = f.heading_set.filter(level=0)[0] # title = root_heading.title return rets
NEXT arcology.models.Attachment
Here's a plan for a whole ass project. I sure would like to have my Photo Mode tour of the Art of Rally Career Mode work...! π’
INPROGRESS data model
look at all the Arcology =roam.models.Link= 's for each =arcology.models.Page= to see if any of them are local file links, and if they are then they should have thumbnails etc generated, and DB entries created
db tracks: the page route, the heading id, the attachment file path, the digest of the attachment, the attachment type
we want to be able to load all the attachment file paths and digests for a given page, and pass that to the The arroyo_rs Native Org Parser , such that it can do a process similar if not identical to the existing link rewriting/retargeting infrastructure, where the
srcattribute with a link to the CDN or an entireimgsetis swapped in during export
python source: :tangle arcology/models.pyfrom django.core.files import File from roam.models import Link, calculate_hash from arcology.attachment_utils import maybe_crush_file class Attachment(EMOM('attachment'), models.Model): heading = models.ForeignKey( roam.models.Heading, on_delete=models.CASCADE, ) raw_file = models.CharField(max_length=512) crushed_file = models.FileField(null=True, upload_to="attachments") digest = models.CharField(max_length=512) @classmethod def create_from_arroyo(cls, doc: native.Document) -> List[Attachment] | None: ret = [] for heading in doc.headings: for attachment in heading.attachments or []: logger.info(f"ATTACH: {attachment}") crushed_file = maybe_crush_file(attachment.file_path) with pathlib.Path(crushed_file).open(mode="rb") as f: obj = cls.objects.get_or_create( heading_id=attachment.node_id, raw_file=attachment.file_path, digest=calculate_hash(attachment.file_path), )[0] obj.crushed_file=File(f, name=crushed_file.name) obj.save() logger.info(f"ORM: {obj}") ret += [obj] return ret
INPROGRESS processing pipeline
for now it'll just web-optimize a single copy of the file:
but some day configuration for: thumbnail sizes, default imgset, optimization, metadata stripping
does attachment processing happen async? or in watchsync/ingestfiles?
probably fine to just do it in watchsync calling out to imagemagick in a task queue especially if you're good about making sure the files end up in a syncthing dir and only need to be written once each time a file changes...
how should the syncthing dir work? should it be its own CAS directory or write thumbnails as dotfiles to ~/org/data or whatever which the web server has access to (but fontkeming does not)
python source: :tangle arcology/attachment_utils.pyimport pathlib import subprocess import os from django.conf import settings from roam.models import calculate_hash import logging logger = logging.getLogger(__name__) def make_crushed_path(path: str) -> pathlib.Path: p = pathlib.Path(path) if p.is_dir(): raise Exception(f"bad path to attachment {p}") parts = p.parts[-3:] hashed = calculate_hash(path) crushed_path = pathlib.Path(settings.ATTACHMENT_DIR).joinpath(f"{hashed}{p.suffix}") return crushed_path # XXX: out of band channel thingy for this... def maybe_crush_file(path: str, copy_uncrushable=True) -> str: crushed_file = make_crushed_path(path) if not crushed_file.exists(): logger.info(f"crush path {path} to {crushed_file}") if not crushed_file.parent.exists(): crushed_file.parent.mkdir(parents=True) if crushed_file.suffix in [".jpg", ".jpeg", ".png", ".webp", ".avif"]: subprocess.check_call([ "magick", path, "-auto-orient", "-strip", "-adaptive-resize", "1024", "-interlace", "Plane", "-quality", "80", crushed_file ]) # elif crushed_file.suffix == ".png": # subprocess.check_call([ # "pngcrush", # path, crushed_file # ]) elif copy_uncrushable: import shutil logger.warning(f"uncrushable file {path} copied verbatim") shutil.copy(path, crushed_file) else: logger.debug(f"{crushed_file} exists already") logger.info(f"'{crushed_file}' '{path}'") return crushed_file
INPROGRESS Web Serving and content distribution
file storage and CAS based on source file digest(??)
we already have an FS path in the source file which we could transform in to a relative path or slug, tho... hardlink farm(??)
the The Arcology Web Server stuff to set up serving the web-optimized images
the
fontkemingconfiguration to cache them atsea1.wobscale.website
making sure The arroyo_rs Native Org Parser can rewrite imgs in the HTML export...
handle both
attachment:URLs andfile:βdataURLs
this is a whole mess all over the place, hmm...
The Web Server
These are the route urlpatterns:
python source: :tangle arcology/urls.pyfrom django.contrib import admin from django.urls import path, re_path, include from django.conf import settings from django.contrib.staticfiles.urls import staticfiles_urlpatterns from django.conf.urls.static import static from arcology import views urlpatterns = [ path("admin/", admin.site.urls), path("", views.index), path("robots.txt", views.robots, name="robots_txt"), path("404", views.unpublished, name="page_not_found"), path("sites.css", views.site_css, name="site-css"), path("feeds.json", views.feed_list, name="feed-list"), path("reservations/", include("reservations.urls")), path("oracle/", include("oracle.urls")), path("", include("django_prometheus.urls")), path("", include("sitemap.urls")), # ensure these ones are last because they're greedy! re_path("(?P<key>[0-9a-zA-Z/_\-]+\.xml)", views.feed, name="feed"), re_path("(?P<key>[0-9a-zA-Z/_\-\.]+)", views.org_page, name="org-page"), ] if settings.ARCOLOGY_ENVIRONMENT != "production": urlpatterns = [ path("api/v1/", include("localapi.urls")), #] + staticfiles_urlpatterns() + urlpatterns ] + staticfiles_urlpatterns() \ + static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT) \ + urlpatterns
This is the topmatter for the views described below:
python source: :tangle arcology/views.pyimport logging from django.http import HttpResponse, HttpResponseNotFound, Http404 from django.shortcuts import render, get_object_or_404 from arcology.models import Page, Feed, Site from roam.models import Link from prometheus_client import Counter, Histogram logger = logging.getLogger(__name__)
GET / Arcology site index
this will just call the Org Page rendering function for the site's index page. render_page is defined below.
python source: :tangle arcology/views.pydef index(request): site = Site.from_request(request) full_key = f"{site.key}/index" return render_page(request, site, full_key)
Arcology Org Page handler
This constructs a page key from the request, tries to load that page and its HTML, and renders that along with a bunch of other metadata stored in relation to the Page object in the DB.
python source: :tangle arcology/views.pydef org_page(request, key): site = Site.from_request(request) if site.key == "localhost": full_key = key new_site_key = key.split("/")[0] site = Site.objects.filter(key=new_site_key).first() else: full_key = f"{site.key}/{key}" return render_page(request, site, full_key)
This render_page function is shared between the index request and the more complicated route handler.
It's manually instrumented with a few Prometheus Client counters and gauges to be emitted on top of what comes out of django-prometheus already. This extra instrumentation is just enough to make a per-site and per-page hit chart, along with some very rudimentary User-Agent break-down to filter out most of the automated traffic.
python source: :tangle arcology/views.pypage_counter = Counter("arcology_page", "Hit counter for each page", ["site", "page", "status", "agent_type"]) render_latency = Histogram("arcology_page_render_seconds", "Latency for render_page func.", ["page", "site", "agent_type"]) from arcology.agent_utils import AgentClassification from django.template import loader def render_page(request, site, full_key): agent = AgentClassification.from_request(request) with render_latency.labels(page=full_key, site=site.key, agent_type=agent).time(): try: the_page = Page.objects.get(route_key=full_key) except Page.DoesNotExist: page_counter.labels(page=full_key, status=404, site=site.key, agent_type=agent).inc() template = loader.get_template("404.html") context = dict( missing_key=full_key ) return HttpResponseNotFound( template.render(context, request) ) links = the_page.collect_links() page_html = the_page.to_html(links) feeds = site.feed_set.all() page_counter.labels(page=full_key, status=200, site=site.key, agent_type=agent).inc() template_kw = the_page.file.keyword_set \ .filter(keyword="ARCOLOGY_PAGE_TEMPLATE") \ .first() if template_kw: page_template = template_kw.value else: page_template = "arcology/page.html" logger.info(page_template) return render(request, page_template, dict( site=site, page=the_page, feeds=feeds, head_title=f"{the_page.title} - {site.title}", html_content=page_html, headings=the_page.file.heading_set.exclude(tag__tag__in=['noexport', 'NOEXPORT']), backlinks=the_page.collect_backlinks(), keywords=the_page.collect_keywords().all(), references=the_page.collect_references(), tags=the_page.collect_tags(), ))
arcology/page.html extends app.html to embed the Org page and its metadata
The page template extends the app template defined below, which provides four blocks to inject content in to:
jinja2 source: :tangle arcology/templates/arcology/page.html{% extends "arcology/app.html" %}
The tab title is assembled from the page and site title:
jinja2 source: :tangle arcology/templates/arcology/page.html{% block title %}{{ head_title }}{% endblock %}
If the site has any feeds, they're injected in to the <head> along with any particular web-crawler rules.
jinja2 source: :tangle arcology/templates/arcology/page.html{% block extra_head %} {% for feed in feeds %} <link rel="alternate" type="application/atom+xml" href="{{ feed.url }}" title="{{ feed.title }}" /> {% endfor %} {% if page.allow_crawl is none or page.allow_crawl is '"nil"' %} <meta name="robots" content="noarchive noimageindex noindex nofollow"/> {% else %} <meta name="robots" content=""/> {% endif %} {% endblock %}
The main content block contains the <main> generated by the native parser, and a sidebar containing backlinks, and page metadata, and other crap.
jinja2 source: :tangle arcology/templates/arcology/page.html{% load cache %} {% block content %} {# HTML is sent through without HTML Escaping via | safe #} {{ html_content |safe }} </main> {% cache 604800 sidebar page.file.digest %} <section class="sidebar"> {% if headings|length > 1 %} <div class="headings"> <h3>Interesting Headings</h3> <ul class="headings"> {% for heading in headings %} <li>{{ heading.to_html|safe }}</li> {% endfor %} </ul> </div> {% endif %} {% if backlinks|length > 0 %} <div class="backlinks"> <h3>Pages Linking Here</h3> <ul class="backlinks"> {% for backlink in backlinks %} <li>{{ backlink.to_backlink_html|safe }}</li> {% endfor %} </ul> </div> {% endif %} {% if tags|length > 0 %} <div class="tags"> <h3>Page Tags</h3> <ul class="tags"> {% for tag in tags %} <li><a href="/tags/{{ tag }}">{{tag}}</a></li> {% endfor %} </ul> </div> {% endif %} {% if references|length > 0 %} <div class="references"> <h3>External References</h3> <ul class="references"> {% for ref in references %} <li><a target="_blank" href="{{ ref.ref }}">{{ref.ref}}</a></li> {% endfor %} </ul> </div> {% endif %} {% if keywords|length > 0 %} <div class="keywords"> <h3>Page Metadata Keywords</h3> <ul class="keywords"> {% for keyword in keywords %} <pre>#+{{ keyword.keyword }}: {{ keyword.value }}</pre> {% endfor %} </ul> </div> {% endif %} </section> {% endcache %} {% endblock %}
Here's a really simple 404 template, too.
jinja2 source: :tangle arcology/templates/404.html{% extends "arcology/app.html" %} {% block title %}Page Not Found{% endblock %} {% block h1 %}<h1>Page Not Found</h1>{% endblock %} {% block content %} <section> <p> The page you tried to open either has not been written by the author or the author has chosen to not publish it at this time. Please contact the author and include the URL of both the page you clicked the link on, as well as the link you'd like to read. You may just want to <a href="javascript:history.back()">Go Back</a>, too. </p> <p> If you're interested in a particular reference, you might of course have more luck using a public search engine like <a href="https://duckduckgo.com">DuckDuckGo</a> or <a href="https://kagi.com">Kagi</a>. </p> <pre>MISSING KEY = {{ missing_key }}</pre> </section> {% endblock %}
arcology/wide_format.html moves the sidebar elements around
The page template extends the app template defined below, which provides four blocks to inject content in to:
jinja2 source: :tangle arcology/templates/arcology/wide_format.html{% extends "arcology/app.html" %}
The tab title is assembled from the page and site title:
jinja2 source: :tangle arcology/templates/arcology/wide_format.html{% block title %}{{ head_title }}{% endblock %}
If the site has any feeds, they're injected in to the <head> along with any particular web-crawler rules.
jinja2 source: :tangle arcology/templates/arcology/wide_format.html{% block extra_head %} {% for feed in feeds %} <link rel="alternate" type="application/atom+xml" href="{{ feed.url }}" title="{{ feed.title }}" /> {% endfor %} {% if page.allow_crawl is none or page.allow_crawl is '"nil"' %} <meta name="robots" content="noarchive noimageindex noindex nofollow"/> {% else %} <meta name="robots" content=""/> {% endif %} <style> .content { max-width: 100ch; display: block !important; } .content::before { align-self: stretch; border: none !important; margin-top: 0em !important; margin-bottom: 0em !important; } .content .main { display: block !important; } </style> {% endblock %}
The main content block contains the <main> generated by the native parser, and a sidebar containing backlinks, and page metadata, and other crap.
jinja2 source: :tangle arcology/templates/arcology/wide_format.html{% load cache %} {% block content %} {% if headings|length > 1 %} <div class="headings"> <ul class="headings"> {% for heading in headings %} <li>{{ heading.to_html|safe }}</li> {% endfor %} </ul> </div> {% endif %} {# HTML is sent through without HTML Escaping via | safe #} {{ html_content |safe }} </main> {% cache 604800 wide page.file.digest %} {% if backlinks|length > 0 %} <div class="backlinks"> <h3>Pages Linking Here</h3> <ul class="backlinks"> {% for backlink in backlinks %} <li>{{ backlink.to_backlink_html|safe }}</li> {% endfor %} </ul> </div> {% endif %} {% if tags|length > 0 %} <div class="tags"> <h3>Page Tags</h3> <ul class="tags"> {% for tag in tags %} <li><a href="/tags/{{ tag }}">{{tag}}</a></li> {% endfor %} </ul> </div> {% endif %} {% if references|length > 0 %} <div class="references"> <h3>External References</h3> <ul class="references"> {% for ref in references %} <li><a target="_blank" href="{{ ref.ref }}">{{ref.ref}}</a></li> {% endfor %} </ul> </div> {% endif %} {% if keywords|length > 0 %} <div class="keywords"> <h3>Page Metadata Keywords</h3> <ul class="keywords"> {% for keyword in keywords %} <pre>#+{{ keyword.keyword }}: {{ keyword.value }}</pre> {% endfor %} </ul> </div> {% endif %} {% endcache %} {% endblock %}
Org Page-specific CSS Stylings
Most of the page CSS is defined below as part of the app.html, but the content-specific CSS is here, nearer the actual implementation of the flexbox above.
css source: :tangle arcology/static/arcology/css/app.css :mkdirp yes.content { margin-left: auto; margin-right: auto; padding: 1em; padding-top: 0; display: flex; flex-flow: row wrap; max-width: 120ch; } .content > section, main { display: inline-block; flex-grow: 1; flex-shrink: 1; flex-basis: 40em; padding: 1em; overflow: auto; } .content > section.sidebar { flex-grow: 0; flex-shrink: 1; flex-basis: 30ch; }
The sidebar itself is a vertical flexbox, pushing everything but the backlinks towards the bottom of the page.
css source: :tangle arcology/static/arcology/css/app.css :mkdirp yessection.sidebar { display: flex; flex-flow: column wrap; } section.sidebar > div.backlinks { flex-grow: 1; }
Here are some hacks to put a line between the main content flexbox and the sidebar. I'm not sure I'll keep this, but it's nice to have a delimeter.
css source: :tangle arcology/static/arcology/css/app.css :mkdirp yes.content::before { align-self: stretch; content: ''; border: 1px dotted var(--medium-gray); margin-top: 1em; margin-bottom: 1em; } .content > *:first-child { order: -1; }
And some simple image wrangling:
css source: :tangle arcology/static/arcology/css/app.css :mkdirp yes.content img { display: block; width: 80%; margin: 0 auto; }
These rules annotate task headings by inserting an icon before them.
css source: :tangle arcology/static/arcology/css/app.css :mkdirp yes.task.task-DONE::before {content: '\002611 ';} .task.task-DONE {color: var(--success);} .task.task-NEXT::before {content: '\01F195 ';} .task.task-NEXT {color: var(--primary);} .task.task-INPROGRESS::before {content: '\01F51C ';} .task.task-INPROGRESS {color: var(--secondary);} .task.task-WAITING::before {content: '\00231A ';} .task.task-WAITING {color: var(--warning);} .task.task-CANCELLED::before {content: '\002612 ';} .task.task-CANCELLED {color: var(--alert);} .task { font-family: "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol"; font-style: normal; }
This will display the header arguments to org-babel source blocks: You're staring right at one!
css source: :tangle arcology/static/arcology/css/app.css :mkdirp yespre { background-color: var(--light-gray); } pre > span { margin: 0.5em; } pre > span.source-params { font-style: normal; margin-bottom: 0; background: var(--light-gray); } pre > code { display: block; border: 2px solid var(--light-gray); background-color: var(--white); font-style: normal; overflow: scroll; margin-top: 0; tab-size: 4ch; padding-top: 0; padding-left: 0.5em; padding-bottom: 1em; padding-right: 0.5em; } pre > br { display: none; }
Arcology Atom Feed Handler/Generator
This uses the sub-feature of the HTML exporter to export only certain sub-headings in The arroyo_rs Native Org Parser . The FeedEntry's defined above are used to construct the feed.
I do some gnarly stuff including just stuffing a custom Django template filter in to there so that I can keep a bunch of node ID -> $thing maps so that when I make the feed entries I can just reach in to a few dicts instead of shaping that all on the handler. But δ»ζΉγγͺγ , this is ugly but some day i'll rewrite it.
python source: :tangle arcology/views.pyimport arrow import roam.models def feed(request, key): # Get the site and construct the route key site = Site.from_request(request) if site.key == "localhost": full_key = key new_site_key = key.split("/")[0] site = Site.objects.filter(key=new_site_key).first() else: full_key = f"{site.key}/{key}" # Fetch page metadata the_feed = get_object_or_404(Feed, route_key=full_key) entries = the_feed.feedentry_set.order_by("-pubdate").all()[:10] if len(entries) == 0: return Http404() try: page_author = roam.models.Keyword.objects.get(keyword="AUTHOR", path=the_feed.file).value except roam.models.Keyword.DoesNotExist: logger.warn(f"Feed {key} does not have an AUTHOR!") page_author = "Arcology User" page_url = the_feed.file.page_set.first().to_url() updated_at = arrow.get(entries[0].pubdate).format(arrow.FORMAT_RFC3339) # entries is already sorted # node-id -> URL links = the_feed.file.page_set.first().collect_links() # node-id -> HTML html_map = { entry.heading.node_id: entry.to_html(links=links) for entry in entries } # node-id -> PUBDATE heading property pubdate_map = { entry.heading.node_id: arrow.get(entry.pubdate).format(arrow.FORMAT_RFC3339) for entry in entries } # return HttpResponse("",content_type="application/atom+xml") return render(request, "arcology/feed.xml", dict( title=the_feed.title, page_url=page_url, author=page_author, updated_at=updated_at, feed_entries=entries, htmls=html_map, pubdates=pubdate_map, links=links, ), content_type="application/atom+xml")
An Atom feed is pretty simple, it's an XML document with multiple <entry>'s and the metadata we collected above. For once i'm glad that Python templating treats strings as HTML-Unsafe and escapes the generated HTML used in the Summary for me. This bit me in the past, with the FastAPI version -- the stuff that goes inside of type = "html" elements isn't necessarily valid XML so it needs to get escaped.
jinja2 source: :tangle arcology/templates/arcology/feed.xml :mkdirp yes<?xml version="1.0" encoding="utf-8"?> <feed xmlns="http://www.w3.org/2005/Atom"> <title>{{ title }}</title> <link href="{{ page_url }}"/> <updated>{{ updated_at }}</updated> <author> <name>{{ author }}</name> </author> <id>{{ page_url }}</id> {% for entry in feed_entries %} <entry> <title>{{ entry.title }}</title> <link href="{{ links | get_item:entry.heading.node_id }}"/> <id>urn:uid:{{ entry.heading.node_id }}</id> <updated>{{ pubdates | get_item:entry.heading.node_id }}</updated> <summary type="html">{{ htmls | get_item:entry.heading.node_id }}</summary> </entry> {% endfor %} </feed>
NEXT add category/tags to the entries
NEXT move this function to somewhere else more reasonable
This template relies on this custom Django template i nicked from StackOverflow to access a dict with a variable key.
python source: :tangle arcology/views.pyfrom django.template.defaulttags import register @register.filter def get_item(dictionary, key): return dictionary.get(key)
CANCELLED see if the IDs are consistent with the old generator
404 unpublished/not found endpoint
There are plenty of links inside the Arcology which aren't meant to be clicked. roam: stub links will of course
python source: :tangle arcology/views.pydef unpublished(request): key = request.GET.get("key") if key is None: key = "NOT_SUPPLIED" # query links etc to create a JSON doc for SigmaJS template = loader.get_template("404.html") context = dict( missing_key=key ) return HttpResponseNotFound( template.render(context, request) )
GET /robots.txt Endpoint
robots.txt is the roam:Robots Exclusion Protocol , a standard used by websites to indicate to visiting web crawlers and other web robots which portions of the website they are allowed to visit.
Disallow all GPT-alikes on all pages, I will add more to this list as necessary. Probably will pull these in to Arcology Project Configuration sooner or later.
Show all pages with a truthy
ARCOLOGY_ALLOW_CRAWL=roam.models.Keyword=If we're on local development, it will show all pages, otherwise only ones for the site being queried.
python source: :tangle arcology/views.pydef robots(request): site = Site.from_request(request) public_pages = Page.objects \ .filter(allow_crawl=True) if site.key != "localhost": public_pages = public_pages \ .filter(site=site) public_pages = public_pages.all() return render(request, "arcology/robots.txt", dict( disallow_all_agents=["GPTBot", "ChatGPT-User", "Google-Extended", "CCBot", "anthropic-ai"], pages=public_pages, ), content_type="text/plain")
Those values are passed to the Jinja template:
jinja2 source: :tangle arcology/templates/arcology/robots.txt :mkdirp yes{% for agent in disallow_all_agents %} User-agent: {{ agent }} Disallow: / {% endfor %} User-agent: * Disallow: / {% for page in pages %}Allow: {{ page.to_url_path }} {% endfor %}
GET /feeds.json Feed discovery endpoint
python source: :tangle arcology/views.pyimport json def feed_list(request): site = Site.from_request(request) feeds = Feed.objects.all() ret = [ dict( key=feed.route_key, url=feed.site.urlize_feed(feed), title=feed.title, site=feed.site.key, visibility=feed.visibility, ) for feed in feeds ] return HttpResponse(json.dumps(ret), content_type="application/json")
GET /sites.css Per-Site link color dynamic CSS endpoint
This endpoint generates a dynamic CSS file that colorizes internal URLs based on the The Arcology's Site List which is stored in the database. It does something extremely wicked to make the page links less jarring until you hover over them by faking an alpha-channel in to the color.
python source: :tangle arcology/views.pydef site_css(request): sites = Site.objects.all() stanzas = [] for site in sites: for domain in site.sitedomain_set.all(): stanzas.append(f''' a[href*="//{domain.domain}"] {{ border-radius: 0.25em; padding: 0.1em; background-color: {site.link_color}66; }} a[href*="//{domain.domain}"]:hover {{ background-color: {site.link_color}FF !important; }} ''') stanzas.append(f''' a[href*="/404"] {{ /* color: var(--alert); */ text-decoration: none; }} /* a[href*="/404"]::after {{ content: " β "; }} a[href*="/404"]::before {{ content: "β "; }} */ ''') return HttpResponse(stanzas, content_type="text/css")
app.html Arcology Site Templates
In short, there are four blocks that the page template and other templates will use to embed content in the rendered web page:
titleis the<title>element, the name of the tab.h1is the displayed site/page title and only needs to be extended if some page wants to do something strange (like site index pages only showing the site title)extra_headis inside<head>and can be used to stuff more metadata in therecontentis where the content goes.
for now it's largely lifted from Base HTML Template and Page HTML Templates from the FastAPI prototype with some nips and tucks to make it more streamlined and legible.
jinja2 source: :tangle arcology/templates/arcology/app.html :mkdirp yes<!DOCTYPE html> <html> <head>
The base template provides some basic information and loads the CSS sheets necessary to make things look nice, along with some page and author metadata. It provides a template block extra_head so that child templates can shove more <head> elements in here.
jinja2 source: :tangle arcology/templates/arcology/app.html :mkdirp yes{% load static %} {% load django_htmx %} <link rel="stylesheet" href="{% static 'arcology/css/app.css' %}"/> <link rel="stylesheet" href="{% static 'arcology/css/vulf.css' %}"/> <link rel="stylesheet" href="{% static 'arcology/css/default-colors.css' %}"/> <link rel="stylesheet" href="{% url 'site-css' %}"/> {% if site and site.css_file %} <link rel="stylesheet" href="{% static site.css_file %}"/> {% endif %} <meta name="author" content="Ryan Rix"/> <meta name="generator" content="Arcology Site Engine https://engine.arcology.garden/"/> <meta name="viewport" content="width=device-width, initial-scale=1"> <title>{% block title %}{{head_title | default:"The Arcology Project" }}{% endblock %}</title> {% block extra_head %}{% endblock %} </head>
The body consists of a header which has the site and page title (which can be overridden for example in the index handler to only show the site title) and links to the other sites. These should be loaded from the DB eventually.
jinja2 source: :tangle arcology/templates/arcology/app.html :mkdirp yes<body> <header> <div class="header-content"> {% block h1 %} <h1><a href='/'>{{ site.title }}</a></h1> <h2>{{ page.title }}</h2> {% endblock %} <div> • <a class="internal" href="https://thelionsrear.com">Life</a> • <a class="internal" href="https://arcology.garden">Tech</a> • <a class="internal" href="https://cce.whatthefuck.computer">Emacs</a> • <a class="internal" href="https://engine.arcology.garden">Arcology</a> • </div> </div> </header>
The content block is used in child templates to hide a <main>; the content div should be a main element instead but The arroyo_rs Native Org Parser wants to output a <main> and i'm not going to stop it, so the div is there to make the body's flexbox layout work.
jinja2 source: :tangle arcology/templates/arcology/app.html :mkdirp yes<div class="content"> {% block content %}{% endblock %} </div>
A footer contains the oh-so-important copyright notice and a limited privacy policy which I should update before I ship this, along with links to the sitemap and to my fediring neighbors.
jinja2 source: :tangle arcology/templates/arcology/app.html :mkdirp yes<footer> <hr/> © 02026 <a href="https://arcology.garden/people/rrix">Ryan Rix</a> <<a href="mailto:site@whatthefuck.computer">site@whatthefuck.computer</a>> <br/> <p> Care has been taken to publish accurate information to long-lived URLs, but context and content as well as URLs may change without notice. </p> <p> This site collects no personal information from visitors, nor stores any identifying tokens. If you or your personal information ended up in public notes please email me for correction or removal. </p> <p> Email me with questions, comments, insights, kind criticism. blow horn, good luck. </p> <p> View the <a href="/sitemap">Site Map</a> or the <a href="/tags">Tag Index</a> to explore the sites. </p> <p> <a href="https://fediring.net/previous?host=arcology.garden">←</a> <a href="https://fediring.net/">Fediring</a> <a href="https://fediring.net/next?host=arcology.garden">→</a> </p>
The FastaAPI site had a "boredom mode" which would disable fonts and colors because some nerds were mean to me. This one will not have that until some nerds are mean to me.
jinja2 source: :tangle arcology/templates/arcology/app.html :mkdirp yes<!-- <p> <input type="checkbox" id="boredom-mode"><label for="boredom-mode">I do not like your aesthetic sensibilities!!</label> </p> <script type="text/javascript"> <<boredom>> </script> --> </footer> </body> </html>
CSS
this will be extended.
rather than using emoji for each site, it would be nice to subtly color them based on the link_color... will need to Do Some Bullshit to make that work though maybe.
css source: :tangle arcology/static/arcology/css/app.css :mkdirp yesbody { font-family: "Vulf Mono", monospace; font-style: italic; font-size: medium; background-color: var(--white); color: var(--black); margin: 0; }
css source: :tangle arcology/static/arcology/css/app.css :mkdirp yesheader { background-color: var(--light-gray); border-radius: 0.25em; margin-top: 0; border-bottom: 2px solid var(--dark-gray); } header > .header-content { padding: 1em; max-width: 120ch; margin-left: auto; margin-right: auto; } header h1, header h2 { margin-top: 0; display: inline; } header h2:before { content: " β "; }
css source: :tangle arcology/static/arcology/css/app.css :mkdirp yesfooter { margin-left: auto; margin-right: auto; max-width: 120ch; font-size: smaller; text-align: center; } footer a { font-weight: 500; }
css source: :tangle arcology/static/arcology/css/app.css :mkdirp yesa { color: var(--primary); } a::visited { color: var(--secondary); } code { font-style: normal; }
css source: :tangle arcology/static/arcology/css/app.css :mkdirp yesblockquote { border-left: 3pt solid var(--secondary); padding-left: 0.5em; } .verse { white-space: pre-wrap; } .fixed { white-space: pre-wrap; }
There are per-site CSS in The Arcology's Site List .
Generating @font-face rules for a bunch of fonts
Vulfpeck Fonts are pulled in with this code-gen because writing @font-face rules does not bring joy and I don't have the right to redistribute these files, so I won't check it in at all.
| VulfSans | Regular | 500 | | | VulfMono | Regular | 500 | | | VulfSans | Bold | 800 | | | VulfMono | Bold | 800 | | | VulfSans | Italic | 500 | italic | | VulfMono | Italic | 500 | italic | | VulfSans | BoldItalic | 800 | italic | | VulfMono | BoldItalic | 800 | italic | | VulfSans | Light | 300 | | | VulfMono | Light | 300 | | | VulfSans | LightItalic | 500 | italic | | VulfMono | LightItalic | 500 | italic |
gen_font_faceselisp source: :var tbl=font-face-tbl :results none(with-temp-buffer (-map (pcase-lambda (`(,first ,second ,weight ,style)) (insert (s-join "\n" (list "@font-face {" "font-family: " (if (equal first "VulfMono") "\"Vulf Mono\"" "\"Vulf Sans\"") "; src:" (concat "url('/static/arcology/fonts/" first "-" second ".woff') format('woff'),") (concat "url('/static/arcology/fonts/" first "-" second ".woff2') format('woff2'),") (concat "url('/static/arcology/fonts/" first "-" second ".ttf') format('truetype');") "font-weight: " (number-to-string weight) ";" (unless (equal style "") (concat "font-style: " style ";")) "}")))) tbl) (write-file "~/org/arcology-django/arcology/static/arcology/css/vulf.css"))
NEXT this is a lever for restructuring the arcology
app.html template would be provided by a configuration-module repo that a user should set up on a template that depends on arroyo, arcology, roam modules. It would be the one responsible for setting up gunicorn etc, and also provide the command line wrapper
NEXT Testing
site fromrequest and fromkey need to be tested
site urlize page function needs to be tested too
page collect functions at least need type annotations...
to_htmlinstance method needs to be tested (and the memoization too)create_from_arroyotoofeedandfeedentryboth the
create_from_arroyo,to_html
the feed generator stuff in the view probably should go in to a model class, but test it.
page handler view logic, test that 404s work, check that localhost loads work
check optional sidebar stuff in the view logic
sitemap when i write it
per-site link color css endpoint