This is a way to abstract URL logic between the development domain and the production domain without so much fuss. It's a pair of modules that have an identical export. Each one has:
decorate_app(app: FastAPI) -> FastAPI
- this is called in the FastAPI server setup to inject different routes depending on thearcology_env
Configuration
I've considered putting URL generation in these and may yet do so but
that is in the arcology.key.ArcologyKey
. Probably a
mistake.
Shared Functions
Load up the template and the imports that render_page_from_key
uses…
from fastapi import Request
from sqlmodel import Session
from fastapi.templating import Jinja2Templates
from typing import Optional
from arcology.arroyo import Page, engine
import arcology.html as html
from arcology.config import get_settings, Environment
= "arcology/templates"
templ_dir = Jinja2Templates(directory=templ_dir) templates
This thing is responsible for loading the Arcology Page, and generating an HTML response and packaging it in to a FastAPI response format. It does a lot and the fact that it's pulling modules from all over the code base gives me great anxiety! this is probably something to really consider refactoring or putting better abstractions in to the Page module… or maybe not.
import asyncio
from fastapi import HTTPException
from arcology.feeds import make_feed_entries
async def render_page_from_key(request: Request, key: str, engine, site) -> Optional[templates.TemplateResponse]:
with Session(engine) as session:
= Page.from_key(key, session)
p if p is None:
raise HTTPException(status_code=404, detail="Page not found.")
= await asyncio.create_task(p.document_html())
dhtml = await asyncio.create_task(p.backlink_html())
bhtml = html.rewrite_html(dhtml, session)
document = html.rewrite_html(bhtml, session)
backlink = make_feed_entries(p.get_site().key, session)
feeds
return templates.TemplateResponse("page.html.j2", dict(
=site,
site=p,
page=document,
document=backlink,
backlink=request,
request=feeds,
feeds ))
NEXT This routing split between local and prod doesn't work because the routes aren't domain aware. and very greedy.
[#A] Arcology Domain-Aware Routing
There are only a handful of routes here, and they're, frankly, janky.
A handful of static routes feed functionality in other parts of the
site, but most of the heavy-lifting is done below in the public_router
function…
from fastapi import FastAPI, Request
from sqlmodel import Session
from fastapi.responses import HTMLResponse, FileResponse, Response
from arcology.routing.util import render_page_from_key, templates, crawlable_pages_for_site
from arcology.feeds import render_feed_from_file
from arcology.arroyo import engine
from arcology.key import ArcologyKey
from arcology.arroyo import Keyword
from arcology.sigma import sigma
def decorate_app(app: FastAPI) -> FastAPI:
<<robots-txt>>
<<sitemap>>
<<feedgen>>
<<primary-route>>
return app
GET /robots.txt
Inside of the cache whether a Page contains an #+ARCOLOGY_ALLOW_CRAWL
file property which is
not set to nil
. Based on
this, it's possible to create an robots.txt
entry which only exposes pages I
explicitly want crawled.
import logging
@app.get("/robots.txt", response_class=FileResponse, name="robots-txt")
async def robots_txt(request: Request):
with Session(engine) as session:
= ArcologyKey.from_request(request).site_key
key = crawlable_pages_for_site(key, session)
crawlable_pages return templates.TemplateResponse("robots.txt.j2", dict(
=request,
request=crawlable_pages,
pages ))
User-Agent: *
Disallow: /
Allow: /static/css
{%- for page in pages %}
{%- set key = page.get_arcology_key() %}
{%- if key.rest == "index" %}
Allow: /$
{%- else %}
Allow: /{{key.rest}}$
{%- endif %}
{%- endfor %}
this goes in the Shared Functions module because I'm bad at information architecture.
from sqlmodel import select, SQLModel, Session
import arcology.arroyo as arroyo
from arcology.parse import *
def crawlable_pages_for_site(site_key: str, session: Session):
= select(arroyo.Page) \
q ==print_sexp(site_key)) \
.where(arroyo.Page.site!="nil")
.where(arroyo.Page.allow_crawlreturn session.exec(q).all()
GET /sitemap
and /sitemap/json
Use SigmaJS to generate the Arcology Sitemap.
@app.get("/sitemap/", response_class=HTMLResponse, name="sitemap_page")
async def sitemap(request: Request):
= ArcologyKey.from_request(request).site
site return templates.TemplateResponse("sitemap.html.j2", dict(
=request,
request=site,
site
))
@app.get("/sitemap/json", name="sitemap_json")
async def sitemap(request: Request):
return sigma(engine)
GET /feeds.json
This generates a list of feeds seen by the arcology.arroyo.Feed database module and presents them in a simple JSON list to be used by other automation on my system like my Feediverse cross-posting bots. This is the same metadata which is used in the Arcology Feed Generator though all of that is currently using the underlying KVF store rather than the normalized database tables at the moment.
from arcology.arroyo import Feed, engine
from sqlmodel import select, SQLModel, Session
from typing import List
def get_feeds(session: Session) -> List[Feed]:
= select(Feed)
q return session.exec(q).all()
@app.get("/feeds.json", response_model=List[Feed], name="feed-list")
async def feed_list(request: Request):
with Session(engine) as session:
= get_feeds(session)
feeds return feeds
NEXT
Move get_feeds
to feeds util module for
the function which sticks the <link>s in <head>
GET /{key}.xml
This is a shortcut in to the public router below for Arcology Feed Generator files, it's
basically a hack to set the media_type
in
the response…
@app.get("/{sub_key:path}.xml", response_class=Response, name="feed-route")
async def feed_route(request: Request, sub_key: str):
+= ".xml" # dark laughter
sub_key return Response(content=(await public_router(request, sub_key)), media_type="application/atom+xml")
GET /{key}/?
This does all the heavy lifting:
@app.get("/{sub_key:path}", response_class=HTMLResponse, name="base-route")
@app.get("/{sub_key:path}/", response_class=HTMLResponse, name="base-route")
async def public_router(request: Request, sub_key: str):
= None
key
if request.headers.get('host',"").startswith('localhost'):
= ArcologyKey(key=sub_key)
key else:
= ArcologyKey.from_request(request)
key
with Session(engine) as session:
= Keyword.get("ARCOLOGY_FEED", key.key, session)
kw if kw is not None:
return await render_feed_from_file(request, kw.filename(), engine, key.site)
return await render_page_from_key(request, key.key, engine, key.site)
NEXT HEAD /{key}/
I oughtta implement this to return metadata an whatnot for pages to generate previews on Fediverse, Twitter, etc…