From bfe94aa2fe491c1ac766d56bab70c296252b1f14 Mon Sep 17 00:00:00 2001 From: garrettmills Date: Sat, 4 Jan 2025 01:20:47 -0500 Subject: [PATCH] Initial implementation for generating thread JSON files --- .gitignore | 3 ++ index.ts | 14 +++---- src/bones/collection/Iterable.ts | 5 ++- src/bones/crypto.ts | 8 ++++ src/config.ts | 8 ++++ src/mail/read.ts | 14 +++++-- src/threads/refresh.ts | 69 ++++++++++++++++++++++++++++++++ src/types.ts | 27 +++++++++++++ 8 files changed, 136 insertions(+), 12 deletions(-) create mode 100644 src/bones/crypto.ts create mode 100644 src/threads/refresh.ts diff --git a/.gitignore b/.gitignore index 9b1ee42..b805514 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +chorus-data +chorus + # Based on https://raw.githubusercontent.com/github/gitignore/main/Node.gitignore # Logs diff --git a/index.ts b/index.ts index eead508..8acae9a 100644 --- a/index.ts +++ b/index.ts @@ -1,9 +1,7 @@ -import {withClient} from "./src/mail/client.ts"; -import {collect, Collection} from "./src/bones/collection/Collection.ts"; -import {getMailboxesToSearch, withMailbox} from "./src/mail/read.ts"; +import {ensureDirectoriesExist} from "./src/config.ts"; +import {refreshThreadsEntirely} from "./src/threads/refresh.ts"; -(await getMailboxesToSearch()) - .map(box => withMailbox(box, async c => { - console.log(await c.all()) - })) - .awaitAll() +;(async () => { + await ensureDirectoriesExist() + await refreshThreadsEntirely() +})() diff --git a/src/bones/collection/Iterable.ts b/src/bones/collection/Iterable.ts index e7bdac2..bdddde4 100644 --- a/src/bones/collection/Iterable.ts +++ b/src/bones/collection/Iterable.ts @@ -78,8 +78,11 @@ export abstract class Iterable { throw new Error('Chunk size must be at least 1') } + const total = await this.count() while ( true ) { - const items = await this.range(this.index, this.index + size - 1) + // Bound the RHS index by the max # of items in case the source + // doesn't gracefully handle out-of-bounds access + const items = await this.range(this.index, Math.min(this.index + size - 1, total - 1)) this.index += items.count() try { diff --git a/src/bones/crypto.ts b/src/bones/crypto.ts new file mode 100644 index 0000000..572b665 --- /dev/null +++ b/src/bones/crypto.ts @@ -0,0 +1,8 @@ + +const hasher = new Bun.CryptoHasher('sha256') +export function sha256(val: string|Uint8Array|ArrayBuffer): string { + hasher.update(val) + const digest = hasher.digest('base64') + hasher.update('') + return digest +} diff --git a/src/config.ts b/src/config.ts index ead12f6..c5c95de 100644 --- a/src/config.ts +++ b/src/config.ts @@ -1,3 +1,4 @@ +import { mkdir } from "node:fs/promises"; import {castCommentsConfig, type CommentsConfig} from "./types.ts"; const maybeConfig: any = { @@ -16,6 +17,13 @@ const maybeConfig: any = { idPrefix: 'c.', }, }, + dirs: { + data: process.env.CHORUS_DATA_DIR || 'chorus-data', + }, } export const config: CommentsConfig = castCommentsConfig(maybeConfig) + +export async function ensureDirectoriesExist(): Promise { + await mkdir(`${config.dirs.data}/threads`, { recursive: true }) +} diff --git a/src/mail/read.ts b/src/mail/read.ts index 33aa4df..e4456fb 100644 --- a/src/mail/read.ts +++ b/src/mail/read.ts @@ -10,12 +10,12 @@ import {AsyncCollection} from "../bones/collection/AsyncCollection.ts"; import {withClient} from "./client.ts"; import {buildThreadAddressMatcher} from "../threads/id.ts"; -export async function getMailboxesToSearch(thread?: string): Promise> { +export async function getMailboxesToSearch(thread?: string, client?: ImapFlow): Promise> { // There are 2 possibilities for where mail might end up. // Either the mail-server is configured to forward the + extensions // to their own folders automatically (e.g. the "c.1234" folder), or // aliased mail just winds up in INBOX. - return collect(await withClient(c => c.list())) + return collect(await (client?.list() || withClient(c => c.list()))) .filter(box => box.name === 'INBOX' || box.specialUse === '\\\\Inbox' || (!thread && box.name.startsWith(config.mail.threads.idPrefix)) @@ -31,6 +31,7 @@ export async function withMailbox(mailbox: string, cb: (c: AsyncCollection { private addressMatcher: RegExp private client?: Maybe + private borrowedClient: boolean = false private query: FetchQueryObject = { envelope: true, source: true, @@ -53,9 +54,14 @@ export class MailboxIterable extends Iterable { constructor( public readonly mailbox: string, + client?: ImapFlow, ) { super() this.addressMatcher = buildThreadAddressMatcher() + if ( client ) { + this.client = client + this.borrowedClient = true + } } async at(i: number): Promise> { @@ -104,6 +110,8 @@ export class MailboxIterable extends Iterable { date: message.envelope.date, from: message.envelope.from[0], subject: message.envelope.subject, + mailbox: this.mailbox, + modseq: message.modseq, recipients, content, thread, @@ -111,7 +119,7 @@ export class MailboxIterable extends Iterable { } public async release(): Promise { - if ( this.client ) { + if ( this.client && !this.borrowedClient ) { await this.client.logout() } } diff --git a/src/threads/refresh.ts b/src/threads/refresh.ts new file mode 100644 index 0000000..3d7613e --- /dev/null +++ b/src/threads/refresh.ts @@ -0,0 +1,69 @@ +import {getMailboxesToSearch, MailboxIterable} from "../mail/read.ts"; +import {withClient} from "../mail/client.ts"; +import type {Message, ThreadData} from "../types.ts"; +import {AsyncCollection} from "../bones/collection/AsyncCollection.ts"; +import {sha256} from "../bones/crypto.ts"; +import {config} from "../config.ts"; + +export async function refreshThreadsEntirely(): Promise { + await withClient(async client => { + const messagesByThread: {[thread: string]: Message[]} = {} + + const boxes = await getMailboxesToSearch(undefined, client) + const now = new Date() + + for ( const box of boxes.all() ) { + const iter = new MailboxIterable(box, client) + const messages = new AsyncCollection(iter) + + await messages.each(message => { + if ( !message.thread || !message.from.address ) { + return; + } + + if ( !messagesByThread[message.thread] ) { + messagesByThread[message.thread] = [] + } + + messagesByThread[message.thread].push(message) + }) + } + + for ( const threadId of Object.keys(messagesByThread) ) { + const threadData: ThreadData = { + thread: threadId, + refresh: { + date: now, + markers: {}, + }, + comments: [], + } + + const messages = messagesByThread[threadId] + for ( const message of messages ) { + if ( + !threadData.refresh.markers[message.mailbox] + || threadData.refresh.markers[message.mailbox] < message.modseq + ) { + threadData.refresh.markers[message.mailbox] = message.modseq + } + + threadData.comments.push({ + user: { + name: message.from.name || '(anonymous)', + mailId: sha256(message.from.address!), + domainId: sha256(message.from.address!.split('@').reverse()[0]), + }, + date: message.date, + subject: message.subject, + text: message.content, + }) + } + + const json = JSON.stringify( + threadData, + (_, v) => typeof v === 'bigint' ? `${v}` : v) + await Bun.write(`${config.dirs.data}/threads/${threadId}.json`, json) + } + }) +} diff --git a/src/types.ts b/src/types.ts index e1033bb..a5058d9 100644 --- a/src/types.ts +++ b/src/types.ts @@ -16,6 +16,9 @@ const commentsConfigSchema = z.object({ idPrefix: z.string(), }), }), + dirs: z.object({ + data: z.string(), + }), }) export type CommentsConfig = z.infer @@ -35,5 +38,29 @@ export type Message = { }, subject: string, content: string, + mailbox: string, + modseq: BigInt, thread?: string, } + +export type ThreadUser = { + name: string, + mailId: string, + domainId: string, +} + +export type ThreadComment = { + user: ThreadUser, + date: Date, + subject: string, + text: string, +} + +export type ThreadData = { + thread: string, + refresh: { + date: Date, + markers: {[key: string]: BigInt}, + }, + comments: ThreadComment[], +}