shell:ln -s arroyo-arcology.el ~/org/cce/arroyo-arcology.el this needs to be in the CCE directory for Arroyo Emacs to automatically load it.
his can be set up to automatically load in an Arroyo Emacs environment.
The Arcology is fundamentally about rendering and sharing entire org-mode documents on the web. This made the direct usage of org-roam's database a pretty straight-forward endeavor, until the migration to a Node-centered model with org-roam v2. This model has made my note-taking much better but it's forced me to rethink the data model of the Arcology pretty significantly.
This ultimately has developed over 2021 as Arroyo Systems Management – a set of sidecar metadata tables for my notes and the org-mode meta applications built on top of them. The Arcology's database is a set of tables derived from the metadata in my org-mode files. This database is generated inside of Emacs and mounted read-only by my FastAPI session via SQLModel. I would love to generate this database another way, but there is still only one high-quality org parser: org-mode.
The "entry point" of this API is the arcology.arroyo.Page
below. It has some class
methods hanging off it which can instantiate Pages from the database by
filename or routing key.
A page doesn't require much metadata to render or be found, really.
The org-mode source file, its ARCOLOGY_KEY
routing key, and the root arcology.roam.Node object's primary
ID. Most of this can be gleaned from the arcology.roam.File object and my
Keyword sidecar.
'arroyo-db-keywords "ARCOLOGY_KEY")
(add-to-list 'arroyo-db-keywords "ARCOLOGY_FEED")
(add-to-list 'arroyo-db-keywords "ARCOLOGY_TOOT_VISIBILITY")
(add-to-list 'arroyo-db-keywords "ARCOLOGY_ALLOW_CRAWL") (add-to-list
The ARCOLOGY_KEY
is a file property
which contains the page's "routing key" – a string with at least one
/
in it which separates the site it'll
publish to from the path it'll be published on – this maps to a URL in
the form of localhost:3000/$ARCOLOGY_KEY
or the first part will map to one of the public domains. this will make
more sense later on.
The ARCOLOGY_FEED
is a file property
which contains a routing key to an RSS feed
This is assembled using noweb syntax because
Page relies on Link being defined for the link_model
relationship… And there is some more
code that makes it in to arcology.arroyo
for setting up the session and engine down below under Arcology SQLModel Database
Bindings …
from typing import Optional, List
from sqlmodel import Field, Relationship, SQLModel
from arcology.parse import parse_sexp, print_sexp
<<arcology.arroyo.Link>>
<<arcology.arroyo.Page>>
<<arcology.arroyo.Tag>>
<<arcology.arroyo.Node>>
<<arcology.arroyo.Ref>>
<<arcology.arroyo.Keyword>>
<<arcology.arroyo.Feed>>
Anyways.
NEXT document schemas
explain inter-relations between these classes, maybe a relationship graph
explain columns and link to where specialized columns like allow_crawl
go and come from?
Arcology Page
A Page represents the minimal metadata required to find and render an
org-mode document
and generate links to it. I would love to someday not have to wire up
all these relationships by hand, I'll have to remodel this at some
point, but for now specifying all the primaryjoin
characteristics is enough.
from sqlmodel import Session, select
import hashlib
from arcology.key import ArcologyKey, id_to_arcology_key
import arcology.html as html
class Page(SQLModel, table=True):
= "arcology_pages"
__tablename__ file: str = Field(primary_key=True)
str = Field(description="The ARCOLOGY_KEY for the page")
key: str = Field(description="Primary title of the page")
title: hash: str = Field(description="The hash of the file when it was indexed")
str = Field(description="The ID for the page itself", foreign_key="nodes.node_id")
root_id: str = Field(description="Maps to an arcology.Site key.")
site: str = Field(description="Lisp boolean for whether this page should go in robots.txt")
allow_crawl:
"Node"] = Relationship(
nodes: List[="page",
back_populates=dict(
sa_relationship_kwargs="Node.file==Page.file"
primaryjoin
)
)"Tag"] = Relationship(
tags: List[=dict(
sa_relationship_kwargs="Tag.file==Page.file"
primaryjoin
)
)"Reference"] = Relationship(
references: List[=dict(
sa_relationship_kwargs="Reference.file==Page.file"
primaryjoin
)
)
def get_title(self):
return parse_sexp(self.title)
def get_key(self):
return parse_sexp(self.key)
def get_file(self):
return parse_sexp(self.file)
def get_arcology_key(self):
return ArcologyKey(self.get_key())
def get_site(self):
return self.get_arcology_key().site
<<page_link_relationships>>
<<page_classmethods>>
<<page_html_generators>>
@classmethod
def from_file(cls, path: str, session: Session):
= select(cls).where(cls.file==print_sexp(path))
q return session.exec(q).one()
@classmethod
def from_key(cls, key: str, session: Session):
= select(cls).where(cls.key==print_sexp(key))
q try:
return next(session.exec(q))
except StopIteration:
return None
The Page carries bi-directional link relationships to both the Link and the Page on the other side of it.
"Link"] = Relationship(
backlinks: List[="dest_page",
back_populates=dict(
sa_relationship_kwargs="Page.file==Link.dest_file"
primaryjoin
)
)
"Link"] = Relationship(
outlinks: List[="source_page",
back_populates=dict(
sa_relationship_kwargs="Page.file==Link.source_file"
primaryjoin
)
)
"Page"] = Relationship(
backlink_pages: List[=Link,
link_model="outlink_pages",
back_populates=dict(
sa_relationship_kwargs="[Link.dest_file]",
foreign_keys=True,
viewonly
)
)
"Page"] = Relationship(
outlink_pages: List[=Link,
link_model="backlink_pages",
back_populates=dict(
sa_relationship_kwargs="[Link.source_file]",
foreign_keys=True,
viewonly
) )
The code to insert a page relies on a bunch of stuff pulled out of
the page and out of the Arcology
Keywords store – be sure the arguments line up, and maybe i should
switch these to use &keys
eventually
so that it's less foot-gun-shaped
'arroyo-db--schemata
(add-to-list
'(arcology-pages
[(file :not-null)
(key :not-null)
(site :not-null)
(title :not-null)
(root-id :not-null)
(allow-crawl)
(hash :not-null)]))
defun arroyo-arcology--insert-page (file kw site title root-id allow-crawl hash)
(
(arroyo-db-query [:delete :from arcology-pages= file $s1)]
:where (
file)
(arroyo-db-query [:insert :into arcology-pages :values $v1]vector file kw site title root-id allow-crawl hash))) (
Generating HTML from Arcology Pages
Arcology pages have two "documents" attached to them on render: the Org doc itself, and a document constituted from the backlinks.
The backlink document is generated dynamically using Page.make_backlinks_org
which just generates a
string from the Link relationships.
def make_backlinks_org(self):
if self.backlinks is None:
return ''
def to_org(link: Link):
return \
"""
* [[id:{path}][{title}]]
""".format(
=parse_sexp(link.source_id),
path=link.get_source_title()
title
)
return '\n'.join([ to_org(link) for link in self.backlinks ])
async def document_html(self):
= parse_sexp(self.hash)
cache_key return html.gen_html(parse_sexp(self.file), cache_key)
async def backlink_html(self):
= self.make_backlinks_org()
org = hashlib.sha224(org.encode('utf-8')).hexdigest()
cache_key return html.gen_html_text(org, cache_key)
Invoking Pandoc
Pandoc is used to generate the HTML for a page. It's a versatile kit and I do some fair bit to extend it in other places, for example in the
The HTML generation is done using PyPandoc, which I guess is just a shell wrapper around it. Caching is cheated with an functools.lrucache; for this to work out well I need to bring the file's hash in to the arcology.arroyo.Page so that the cache can bust when the document is updated.
import functools
import pypandoc
@functools.lru_cache(maxsize=128)
def gen_html(input_path: str, extra_cache_key: str = '', input_format: str = 'org'):
return pypandoc.convert_file(input_path, 'html', format='org')
@functools.lru_cache(maxsize=128)
def gen_html_text(input_text: str, extra_cache_key: str = '', input_format: str = 'org'):
return pypandoc.convert_text(input_text, 'html', format='org')
Rewriting and Hydrating the Pandoc HTML
So the HTML that comes out of Pandoc is smart but doesn't understand,
for example, ID links; I could of course use Emacs and its org-html-export-as-html
but that shit is gonna
be really slow. Instead I'll do the work myself (lol).
from arcology.parse import print_sexp, parse_sexp
import arcology.arroyo as arroyo
import sqlmodel
import re
from typing import Optional
from arcology.key import id_to_arcology_key, file_to_arcology_key
class HTMLRewriter():
def __init__(self, session):
self.res_404 = 'href="/404?missing={key}" class="dead-link"'
self.session = session
def replace(match):
raise NotImplementedError()
def re(self):
raise NotImplementedError()
def do(self, output_html):
return re.sub(self.re(), self.replace, output_html)
Rewriting the HTML is a pretty straightforward affair using re.sub with
callbacks rather than static replacements, with some abstraction
sprinkled on top in the form of the HTMLRewriter
superclass defined above. Each
implementation of it provides a function which accepts the match object,
and pulls the node's ARCOLOGY_KEY
with an optional node-id anchor
attached to it. This is then farmed out to arcology_key_to_url
or so to be turned in to
a URL. In this fashion, each href
is
replaced with a URL that will route to the target page, or a 404 page
link with a CSS class attached.
I'm pretty sure this is all quite inefficient but as always I invoke Personal Software Can Be Shitty.
So ID links can be rewritten like:
class IDReplacementRewriter(HTMLRewriter):
def replace(self, match):
id = match.group(1)
= id_to_arcology_key(id, self.session)
key if key is None:
return self.res_404.format(key=id)
else:
return 'class="internal" href="{url}"'.format(url=arcology_key_to_url(key))
def re(self):
return r'href="id:([^"]+)"'
File links can be rewritten like:
class FileReplacementRewriter(HTMLRewriter):
def replace(self, match):
file = match.group(1)
if file is None:
return self.res_404.format(key=file)
= file_to_arcology_key(file, self.session)
key if key is None:
return self.res_404.format(key=file)
else:
return 'class="file" href="{url}"'.format(url=arcology_key_to_url(key))
def re(self):
return r'href="file://([^"]+)"'
org-roam stub links can be rewritten
link. This one is a little wonky because res_404
and the other regexen don't only want to
operate on the anchor's attribute. This one wants to strip the roam:
text from the [[roam:Stub]]
links.
class RoamReplacementRewriter(HTMLRewriter):
def replace(self, match):
return self.res_404.format(key=match.group(1)) + ">"
def re(self):
return r'href="roam:([^"]+)">roam:'
I also make some quality-of-life rewrites of my org-fc cloze cards in
to simple <span>
elements with the
hint embedded in them.
class FCClozeReplacementRewriter(HTMLRewriter):
def replace(self, match):
= match.group(1) or ""
main = match.group(2) or ""
hint = re.sub(r"</?[^>]+>", "", hint)
hint return f"<span class='fc-cloze' title='{hint}'>{main}</span>"
def re(self):
return r'{{([^}]+)}{?([^}]+)?}?@[0-9]+}'
Invoke all these in a simple little harness:
def rewrite_html(input_html: str, session: sqlmodel.Session) -> str:
"""
Run a series of replacement functions on the input HTML and return a new string.
"""
= input_html
output_html
= [
rewriters
IDReplacementRewriter(session),
FileReplacementRewriter(session),
RoamReplacementRewriter(session),
FCClozeReplacementRewriter(session),
]
for rewriter in rewriters:
= rewriter.do(output_html)
output_html
return output_html
It's logical that at some point this will have a "pluggable" URL engine, and in fact the production URLs will be hosted under different domains so deconstructing a URL to an ARCOLOGYKEY … all of this can happen later, I am just playing jazz right now!
from arcology.key import ArcologyKey
def arcology_key_to_url(key: ArcologyKey) -> str:
return key.to_url()
arcology.key.ArcologyKey
encapuslates parsing
and rendering URLs
The ArcologyKey
is a simple dataclass
encapsulating the things which the
ARCOLOGY_KEY
page keyword represents.
For example the key ArcologyKey(key=arcology/arroyo#arcology/arroyo/key)
will contain the following properties:
key
: the key passed insite_key
: this everything up to the first slash. It points to objects defined and fetchable through Arcology Sites.site
: I typed the line above, and said "oh", and added this resolution of thearcology.sites.Site
object.rest
: "rest" is everything after the slash, but up to an optional anchoranchor_id
: said optional anchor – Pandoc headings within the page will have theID
property as the anchor, this is handy!
from dataclasses import dataclass
from typing import Optional
from fastapi import Request
from starlette import routing
import sqlmodel
from arcology.parse import parse_sexp, print_sexp
from arcology.sites import sites, Site
from arcology.config import get_settings, Environment
from arcology.sites import host_to_site
= routing.compile_path("/{sub_key:path}/")
route_regexp, _, _ = routing.compile_path("/{sub_key:path}")
route_regexp2, _, _
import logging
= logging.getLogger(__name__)
logger
logger.setLevel(logging.DEBUG)
@dataclass
class ArcologyKey():
str
key: str
site_key:
site: Sitestr = ""
rest: str] = None
anchor_id: Optional[
def __init__(self, key: str, site_key="", rest="", anchor_id = None):
self.key = key
self.site_key=site_key
self.rest = rest
self.anchor_id = anchor_id
= '/'
stop = 0
idx = [""]
collector for char in key:
if char == stop:
= '#'
stop += 1
idx = collector + [""]
collector continue
+= char
collector[idx]
if len(collector) > 0:
self.site_key = collector[0]
self.site = sites.get(self.site_key, None)
if len(collector) > 1:
self.rest = collector[1]
if len(collector) > 2:
self.anchor_id = collector[2]
def to_url(self) -> str:
= get_settings().arcology_env
env = self.site.domains.get(env, None)
domains
= ""
url if domains is not None:
= "https://{domain}/{rest}".format(domain=domains[0], rest=self.rest)
url else:
= "http://localhost:8000/{key}".format(key=self.key)
url if self.anchor_id is not None:
= url + "#" + self.anchor_id
url
return url
def from_request(request: Request):
= request.url.path
path = request.headers.get('host')
host return ArcologyKey.from_host_and_path(host, path)
def from_host_and_path(host: str, path: str):
= route_regexp.match(path) or route_regexp2.match(path) or None
m if m is None:
"no path match: %s", path)
logger.debug(return None
= m.group("sub_key")
sub_key
= host_to_site(host)
site if site is None:
"no host match: %s", host)
logger.debug(return None
if len(sub_key) == 0:
= "index"
sub_key = "{site_key}/{sub_key}".format(
key =site.key,
site_key=sub_key,
sub_key
)return ArcologyKey(key)
Retrieving the ARCOLOGY_KEY
given an ID
is a pretty straightforward SQLModel query, actually. If the referenced
node is in the Arroyo database, by definition it's got a published arcology.arroyo.Page, and so it's a
matter of going and fetching it. If the Node is the root node (a direct
link to the document), simply return the key, otherwise append the
node-id to it so that a URL can link directly to the heading's
anchor.
def id_to_arcology_key(id: str, session: sqlmodel.Session) -> Optional[ArcologyKey]:
"""
Given a node ID, return the ARCOLOGY_KEY for the node.
"""
from .arroyo import Node
= sqlmodel.select(Node) \
linked_node_query ==print_sexp(id))
.where(Node.node_id= session.exec(linked_node_query)
res
= res.all()
linked_node if len(linked_node) == 1:
= linked_node[0]
linked_node = linked_node.page
linked_page
if linked_page == None:
return None
= parse_sexp(linked_page.key)
page_key = ArcologyKey(key=page_key)
ret if linked_node.level != 0:
= id
ret.anchor_id return ret
elif len(linked_node) != 0:
raise Exception(f"more than one key for node? {id}")
else:
return None
By File is even more simple:
def file_to_arcology_key(file: str, session: sqlmodel.Session) -> Optional[ArcologyKey]:
"""
Given a node ID, return the ARCOLOGY_KEY for the node.
"""
from .arroyo import Page
= sqlmodel.select(Page).where(Page.file == print_sexp(file))
key_q = session.exec(key_q).first()
page
if page is None:
return
= parse_sexp(page.key)
page_key return ArcologyKey(key=page_key)
NEXT HTML should inject sidenotes in during rewritehtml?
this would be slow and maybe janky but that's probably fine once it's memoized. :Project: :Project:
but this would mean that node backlinks would appear in-line, things like Topic Index have some trouble otherwise.
Arcology Tags
class Tag(SQLModel, table=True):
= "arcology_tags"
__tablename__ file: str = Field(primary_key=True, foreign_key="arcology_pages.file")
str = Field(primary_key=True, description="The tag itself.")
tag: str = Field(description="A heading ID which the tag applies to")
node_id:
def tag(self):
return parse_sexp(self.tag)
A page has any number of tags according to the file primary key:
'arroyo-db--schemata
(add-to-list
'(arcology-tags
[(file :not-null)
(tag :not-null)
(node-id :not-null)]))
defun arroyo-arcology--insert-tags (file node-tags)
(
(arroyo-db-query [:delete :from arcology-tags= file $s1)]
:where (
file)
(pcase-dolist (`(,tag ,node-id) node-tags)
(arroyo-db-query [:insert :into arcology-tags
:values $v1]vector file tag node-id)))) (
Arcology Links
And for rewriting the links to point to their routing key, two tables:
A links
table which contains the file
and node ID references, as well as the title of the
source file which can be used to quickly generate backlink listings for
a given page (and its sub-heading nodes):
class Link(SQLModel, table=True):
= "arcology_links"
__tablename__ str] = Field(default="", description="The title of the page the link is written in.")
source_title: Optional[
def get_source_title(self):
return parse_sexp(self.source_title)
str = Field(primary_key=True, foreign_key="arcology_nodes.node_id")
source_id: "Node"] = Relationship(
source_node: Optional[=dict(
sa_relationship_kwargs# back_populates="outlinks",
="Node.node_id == Link.source_id"
primaryjoin
)
)
str = Field(primary_key=True, foreign_key="arcology_nodes.node_id")
dest_id: "Node"] = Relationship(
dest_node: Optional[=dict(
sa_relationship_kwargs# back_populates="backlinks",
="Node.node_id == Link.dest_id"
primaryjoin
)
)
str = Field(primary_key=True, foreign_key="arcology_pages.file")
source_file: "Page"] = Relationship(
source_page: Optional[="outlinks",
back_populates=dict(
sa_relationship_kwargs="Page.file==Link.source_file"
primaryjoin
)
)
str = Field(primary_key=True, foreign_key="arcology_pages.file")
dest_file: "Page"] = Relationship(
dest_page: Optional[="backlinks",
back_populates=dict(
sa_relationship_kwargs="Page.file==Link.dest_file"
primaryjoin
) )
Links in the org-roam database have a
useful type
column. We only store ID Links
for now… probably can support file links
easily enough but other "unidirectional" links I would like to store
elsewhere I think.
'arroyo-db--schemata
(add-to-list
'(arcology-links
[source-title
(source-file :not-null)
(source-id :not-null)
(dest-file :not-null)
(dest-id :not-null)]))
defun arcology--published-page? (file)
(not (not (arroyo-db-get "ARCOLOGY_KEY" file))))
(
defun arroyo-arcology--insert-links (file source-title links)
(
(arroyo-db-query [:delete :from arcology-links= source-file $s1)]
:where (
file)type ,props) links)
(pcase-dolist (`(,source ,dest ,cond ((equal type "id")
(caar (org-roam-db-query
(pcase-let* ((dest-file (
[:select file :from nodes= id $s1)]
:where (
dest)))
(`(,immediate-source-title ,immediate-source-level)car (org-roam-db-query
(
[:select [title level] :from nodes= id $s1)]
:where (
source)));; "level 0 -> level n" unless n == 0
(composed-node-titleif (= 0 immediate-source-level)
(
source-title" -> " immediate-source-title))))
(concat source-title when (and dest-file (arcology--published-page? dest-file)
(
(arroyo-db-query [:insert :into arcology-links
:values $v1]vector composed-node-title file source dest-file dest))))))
(;; insert https link?
equal type "https") nil)
((equal type "http") nil)
((equal type "roam") nil)
((t nil)))) (
INPROGRESS
source_title
should populate with the
immediate parent header's title, not level 0
It's passed in to arroyo-arcology--insert-links
Below. Not sure the better way to do that
– query org-roam-db
in the insert function
itself? good enough for now prolly.
deal with the title being fetched and populated in that function below if necessary.
Arcology Nodes
A nodes
table will help in reassembling
links in to HREFs
, in theory, but i don't
think it's necessary? maybe? There are bunch of other metadata on this
that I would like to pull across from org-roam eventually.
class Node(SQLModel, table=True):
= "arcology_nodes"
__tablename__ str = Field(primary_key=True, description="The heading ID property")
node_id: file: str = Field(description="File in which this Node appears", foreign_key="arcology_pages.file")
str = Field(description="Outline depth of the heading. 0 is top-level")
level:
"Page"] = Relationship(
page: Optional[="nodes",
back_populates=dict(
sa_relationship_kwargs=True,
viewonly="Node.file==Page.file"
primaryjoin
) )
'arroyo-db--schemata
(add-to-list
'(arcology-nodes
[(node-id :not-null)
(file :not-null)
(level :not-null)]))
defun arroyo-arcology--insert-nodes (file nodes)
(
(arroyo-db-query [:delete :from arcology-nodes= file $s1)]
:where (
file)
(pcase-dolist (`(,file ,id ,level) nodes)
(arroyo-db-query [:insert :into arcology-nodes
:values $v1]vector id file level)))) (
Arcology References
Each org-roam node can have a set of "references" attached to them, I use these URIs to point to a "canonical" resource which the node is referencing.
class Reference(SQLModel, table=True):
= "arcology_refs"
__tablename__ file: str = Field(primary_key=True, foreign_key="arcology_pages.file")
str = Field(primary_key=True, description="The full URI of the reference itself.")
ref: str = Field(description="A heading ID which the ref applies to")
node_id:
def url(self):
return parse_sexp(self.ref)
A page has any number of refs according to the file primary key:
'arroyo-db--schemata
(add-to-list
'(arcology-refs
[(file :not-null)
(ref :not-null)
(node-id :not-null)]))
defun arroyo-arcology--insert-refs (file node-refs)
(
(arroyo-db-query [:delete :from arcology-refs= file $s1)]
:where (
file)type ,node-id) node-refs)
(pcase-dolist (`(,ref ,
(arroyo-db-query [:insert :into arcology-refs
:values $v1]vector file (format "%s:%s" type ref) node-id)))) (
INPROGRESS Arcology Feeds
class Feed(SQLModel, table=True):
= "arcology_feeds"
__tablename__ file: str = Field(primary_key=True, foreign_key="arcology_pages.file")
str = Field(primary_key=True, description="The routing key for the feed.")
key: str = Field(description="Title of the page which the feed is embedded in")
title: str = Field(description="Arcology Site which the feed resides on.")
site: str = Field(description="Visibility of the feed's posts in feed2toot, etc")
post_visibility:
def get_key(self):
return parse_sexp(self.key)
def get_arcology_key(self):
return ArcologyKey(self.get_key())
def get_title(self):
return parse_sexp(self.title)
def get_site(self):
return parse_sexp(self.site)
def get_post_visibility(self):
return parse_sexp(self.post_visibility)
def dict(self, **kwargs):
return dict(
=self.get_key(),
key=self.get_arcology_key().to_url(),
url=self.get_title(),
title=self.get_site(),
site=self.get_post_visibility(),
visibility )
A page has any number of feeds according to the file primary key:
'arroyo-db--schemata
(add-to-list
'(arcology-feeds
[(file :not-null)
(key :not-null)
(title :not-null)
(site :not-null)
(post-visibility :not-null)]))
defun arroyo-arcology--insert-feeds (file)
(
(arroyo-db-query [:delete :from arcology-feeds= file $s1)]
:where (
file)car (arroyo-db-get "ARCOLOGY_FEED" file)))
(if-let* ((key ("/.*" "" key)))
(site (replace-regexp-in-string let* ((title (arroyo-db--get-file-title-from-org-roam file))
(car (arroyo-db-get "ARCOLOGY_TOOT_VISIBILITY" file))))
(post-visibility (
(arroyo-db-query [:insert :into arcology-feeds
:values $v1]vector file key title site post-visibility))))) (
Arcology Keywords
All of these models are generated below from the ARCOLOGY_KEY
entities embedded on each page.
these are Keywords, a 3-tuple of file, keyword, value,
a threeple
class Keyword(SQLModel, table=True):
= "keywords"
__tablename__ file: str = Field(primary_key=True, foreign_key="arcology_pages.file")
str = Field(primary_key=True, description="")
keyword: str = Field(description="The value of the page")
value:
def filename(self):
return parse_sexp(self.file)
def keyword(self):
return parse_sexp(self.keyword)
def value(self):
return parse_sexp(self.value)
@classmethod
def get(cls, key: str, value: str, session: Session):
= select(cls).where(cls.keyword==print_sexp(key)).where(cls.value==print_sexp(value))
q try:
return next(session.exec(q))
except StopIteration:
return None
Arcology Arroyo System Database Generator
Putting all those update functions together in an arroyo-db update function. This has to run after the org-roam and Arroyo System Cache keyword database is built, this is annoyign and I need to rethink it.
defun arroyo-arcology-update-file (&optional file)
(
(interactive)or file (buffer-file-name)))
(when-let* ((file (first (arroyo-db-get "ARCOLOGY_KEY" file)))
(page-keyword (first (split-string page-keyword "/")))
(site-key (
(page-nodes (org-roam-db-query [:select [file id level title] :from nodes= file $s1)]
:where (
file))caar (org-roam-db-query [:select [hash] :from files :where (= file $s1)]
(file-hash (
file)))apply #'vector (--map (second it) page-nodes)))
(page-node-ids (eq 0 (third it)) page-nodes))
(level-0-node (--first (elt level-0-node 1))
(level-0-id (elt level-0-node 3)))
(level-0-title (; remove the map here -- there will only ever be one level-0 node hopefully but this is hard to understand
let* ((allow-crawl (first (arroyo-db-get "ARCOLOGY_ALLOW_CRAWL" file)))
(and allow-crawl
(allow-crawl (not (equal allow-crawl "nil")))) ; make sure writing "nil" in the key is respected
(type node_id] :from refs
(all-node-refs (org-roam-db-query [:select [ref
:where (in node_id $v1)]
page-node-ids))
(all-node-tags (org-roam-db-query [:select [tag node_id] :from tags
:where (in node_id $v1)]
page-node-ids))type properties] :from links
(links (org-roam-db-query [:select [source dest
:where (in source $v1)]
page-node-ids)))
(arroyo-arcology--insert-page file page-keyword site-key level-0-title level-0-id allow-crawl file-hash)
(arroyo-arcology--insert-nodes file page-nodes)
(arroyo-arcology--insert-tags file all-node-tags)
(arroyo-arcology--insert-refs file all-node-refs)
(arroyo-arcology--insert-feeds file)
(arroyo-arcology--insert-links file level-0-title links))))
defun arroyo-arcology-update-db (&optional _wut)
(
(interactive)
(->>"ARCOLOGY_KEY")
(arroyo-db-get #'car)
(-map
(-uniq);; this runs *after* db is updated... what to do here?
;; (-filter #'arroyo-db-file-updated-p)
#'arroyo-arcology-update-file)
(-map
)
)
symbol-function 'arroyo-db-update-all-roam-files) #'arroyo-arcology-update-db)
(add-function :after (;; (add-to-list 'arroyo-db-update-functions #'arroyo-arcology-update-file)
provide 'arroyo-arcology) (
Arcology SQLModel Database Bindings
The engine looks like this, and it's pretty easy to attach my
org-roam database here using the SQLAlchmey
Events System – you can munge a SQLModel
's __table__.schema
to query and map against the
org-roam metadatabase.
from sqlmodel import create_engine
from sqlalchemy import event
from arcology.config import get_settings
from pathlib import Path
= get_settings()
settings = Path(settings.org_roam_db).expanduser().resolve()
org_roam_sqlite_file_name = Path(settings.arcology_db).expanduser().resolve()
arroyo_sqlite_file_name
def make_engine():
= create_engine('sqlite:///{path}'.format(path=arroyo_sqlite_file_name), echo=False)
engine
@event.listens_for(engine, "connect")
def do_connect(dbapi_connection, _connection_record):
"attach database '{orgdb}' as orgroam;".format(orgdb=org_roam_sqlite_file_name))
dbapi_connection.execute(
return engine
= make_engine() engine
An interactive testing session could look like this, and indeed C-c C-c
in here will run it in an Inferior Python session:
from sqlmodel import select, SQLModel, Session
import arcology.arroyo as arroyo
from arcology.parse import *
= arroyo.engine
engine = Session(engine)
session
= next(session.exec(select(arroyo.Link)))
first_link
= arroyo.Page.from_file("/home/rrix/org/arroyo/arroyo.org", session)
from_file = arroyo.Page.from_key("doc/archive", session)
from_key
= await from_key.document_html() ht
Invoking the Arroyo generator from Python
Since the Arcology Arroyo System is written in Emacs Lisp, it's not exactly simple to update the database. When implemented as part of a long-running user-controlled Emacs environment, Arroyo uses Emacs's Hooks to update the database when org-mode files change.
Instead of doing that, we find ourselves implementing some scaffolding to replace it:
Org-mode files are put on the server with Syncthing
"Batch" commands for running Emacs with the Arroyo generators from a shell
This little Emacs Lisp script sets up some of the minimal CCE scaffolding to make the Arroyo-DB functions available to an environment.
unless (boundp 'org-roam-directory)
(setq org-roam-directory (file-truename "~/org/")))
(
"cce/packaging.el" org-roam-directory))
(load-file (expand-file-name
'load-path default-directory)
(add-to-list 'load-path arroyo-source-directory)
(add-to-list
use-package dash)
(use-package f)
(use-package s)
(use-package emacsql)
(;; (use-package emacsql-sqlite3)
require 'subr-x)
(require 'cl)
(
require 'org-roam)
(require 'arroyo-db)
(require 'arroyo-arcology) (
That script is loaded by this script which isn't a script, but a template for a Python module so that the locations and variables can be customized at run time, loaded from the Arcology BaseSettings.
(lord help me)
set -ex
export DBPATH=$(mktemp $(dirname {arcology_db})/arcology.XXXXXXXXXX.db)
pushd {arcology_src};
cp {arcology_db} $DBPATH || echo "no existing db found, will be created from scratch"
{emacs} -Q --batch \
--eval '(setq org-roam-directory "{arcology_dir}")' \
--eval '(setq arcology-source-directory "{arcology_src}/lisp")' \
--eval '(setq arroyo-source-directory "{arroyo_src}")' \
--eval '(setq arroyo-db-location "'$DBPATH'")' \
--eval '(setq org-roam-db-location "{org_roam_db}")' \
-l lisp/arcology-batch.el \
--eval '(org-roam-db-sync)' # \
# --eval '(arroyo-db-update-all-roam-files)' \
# --eval '(arroyo-db-update-all-roam-files)' \
# --eval '(arroyo-arcology-update-db)'
mv $DBPATH {arcology_db}
echo "rebuild done"
The Python extracts stuff from that FastAPI/Pydantic BaseSettings
module and templates it in with
format()
. Sorry for Literate Programming (sorry for party
rocking)
from .config import get_settings
= """
COMMAND_TMPL <<arcology-batch-shell>>
"""
def build_command():
= get_settings()
settings
return COMMAND_TMPL.format(
= settings.arcology_directory,
arcology_dir = settings.arcology_src,
arcology_src = settings.arroyo_src,
arroyo_src = settings.arcology_db,
arcology_db = settings.org_roam_db,
org_roam_db = settings.arroyo_emacs,
emacs )
This is executed by Arcology Automated Database Builder.