WITH seed AS (
SELECT embedding AS seed_vec
FROM vectordb.public.langchain_pg_embedding
WHERE json_extract_scalar(cmetadata, '$.title') LIKE '%LLM%'
LIMIT 1
),
similar_docs AS (
SELECT DISTINCT
json_extract_scalar(e.cmetadata, '$.title') AS title,
json_extract_scalar(e.cmetadata, '$.source') AS url,
-- cosine similarity in pure Trino SQL
reduce(zip_with(e.embedding, s.seed_vec,
(a, b) -> CAST(a AS double) * CAST(b AS double)),
DOUBLE '0.0', (st, x) -> st + x, st -> st)
/ (sqrt(reduce(transform(e.embedding, ...), ...))
* sqrt(reduce(transform(s.seed_vec, ...), ...)))
AS similarity
FROM vectordb.public.langchain_pg_embedding e
CROSS JOIN seed s
ORDER BY similarity DESC
LIMIT 5
)
SELECT ai_gen(
'Create a recommended reading list from these articles: ' ||
(SELECT json_format(CAST(array_agg(title) AS JSON))
FROM similar_docs)
) AS reading_list;