Initial implementation for generating thread JSON files

This commit is contained in:
Garrett Mills 2025-01-04 01:20:47 -05:00
parent 0f596ab5f5
commit bfe94aa2fe
8 changed files with 136 additions and 12 deletions

3
.gitignore vendored
View File

@ -1,3 +1,6 @@
chorus-data
chorus
# Based on https://raw.githubusercontent.com/github/gitignore/main/Node.gitignore
# Logs

View File

@ -1,9 +1,7 @@
import {withClient} from "./src/mail/client.ts";
import {collect, Collection} from "./src/bones/collection/Collection.ts";
import {getMailboxesToSearch, withMailbox} from "./src/mail/read.ts";
import {ensureDirectoriesExist} from "./src/config.ts";
import {refreshThreadsEntirely} from "./src/threads/refresh.ts";
(await getMailboxesToSearch())
.map(box => withMailbox(box, async c => {
console.log(await c.all())
}))
.awaitAll()
;(async () => {
await ensureDirectoriesExist()
await refreshThreadsEntirely()
})()

View File

@ -78,8 +78,11 @@ export abstract class Iterable<T> {
throw new Error('Chunk size must be at least 1')
}
const total = await this.count()
while ( true ) {
const items = await this.range(this.index, this.index + size - 1)
// Bound the RHS index by the max # of items in case the source
// doesn't gracefully handle out-of-bounds access
const items = await this.range(this.index, Math.min(this.index + size - 1, total - 1))
this.index += items.count()
try {

8
src/bones/crypto.ts Normal file
View File

@ -0,0 +1,8 @@
const hasher = new Bun.CryptoHasher('sha256')
export function sha256(val: string|Uint8Array|ArrayBuffer): string {
hasher.update(val)
const digest = hasher.digest('base64')
hasher.update('')
return digest
}

View File

@ -1,3 +1,4 @@
import { mkdir } from "node:fs/promises";
import {castCommentsConfig, type CommentsConfig} from "./types.ts";
const maybeConfig: any = {
@ -16,6 +17,13 @@ const maybeConfig: any = {
idPrefix: 'c.',
},
},
dirs: {
data: process.env.CHORUS_DATA_DIR || 'chorus-data',
},
}
export const config: CommentsConfig = castCommentsConfig(maybeConfig)
export async function ensureDirectoriesExist(): Promise<void> {
await mkdir(`${config.dirs.data}/threads`, { recursive: true })
}

View File

@ -10,12 +10,12 @@ import {AsyncCollection} from "../bones/collection/AsyncCollection.ts";
import {withClient} from "./client.ts";
import {buildThreadAddressMatcher} from "../threads/id.ts";
export async function getMailboxesToSearch(thread?: string): Promise<Collection<string>> {
export async function getMailboxesToSearch(thread?: string, client?: ImapFlow): Promise<Collection<string>> {
// There are 2 possibilities for where mail might end up.
// Either the mail-server is configured to forward the + extensions
// to their own folders automatically (e.g. the "c.1234" folder), or
// aliased mail just winds up in INBOX.
return collect(await withClient(c => c.list()))
return collect(await (client?.list() || withClient(c => c.list())))
.filter(box =>
box.name === 'INBOX' || box.specialUse === '\\\\Inbox'
|| (!thread && box.name.startsWith(config.mail.threads.idPrefix))
@ -31,6 +31,7 @@ export async function withMailbox<T>(mailbox: string, cb: (c: AsyncCollection<Me
export class MailboxIterable extends Iterable<Message> {
private addressMatcher: RegExp
private client?: Maybe<ImapFlow>
private borrowedClient: boolean = false
private query: FetchQueryObject = {
envelope: true,
source: true,
@ -53,9 +54,14 @@ export class MailboxIterable extends Iterable<Message> {
constructor(
public readonly mailbox: string,
client?: ImapFlow,
) {
super()
this.addressMatcher = buildThreadAddressMatcher()
if ( client ) {
this.client = client
this.borrowedClient = true
}
}
async at(i: number): Promise<Maybe<Message>> {
@ -104,6 +110,8 @@ export class MailboxIterable extends Iterable<Message> {
date: message.envelope.date,
from: message.envelope.from[0],
subject: message.envelope.subject,
mailbox: this.mailbox,
modseq: message.modseq,
recipients,
content,
thread,
@ -111,7 +119,7 @@ export class MailboxIterable extends Iterable<Message> {
}
public async release(): Promise<void> {
if ( this.client ) {
if ( this.client && !this.borrowedClient ) {
await this.client.logout()
}
}

69
src/threads/refresh.ts Normal file
View File

@ -0,0 +1,69 @@
import {getMailboxesToSearch, MailboxIterable} from "../mail/read.ts";
import {withClient} from "../mail/client.ts";
import type {Message, ThreadData} from "../types.ts";
import {AsyncCollection} from "../bones/collection/AsyncCollection.ts";
import {sha256} from "../bones/crypto.ts";
import {config} from "../config.ts";
export async function refreshThreadsEntirely(): Promise<void> {
await withClient(async client => {
const messagesByThread: {[thread: string]: Message[]} = {}
const boxes = await getMailboxesToSearch(undefined, client)
const now = new Date()
for ( const box of boxes.all() ) {
const iter = new MailboxIterable(box, client)
const messages = new AsyncCollection(iter)
await messages.each(message => {
if ( !message.thread || !message.from.address ) {
return;
}
if ( !messagesByThread[message.thread] ) {
messagesByThread[message.thread] = []
}
messagesByThread[message.thread].push(message)
})
}
for ( const threadId of Object.keys(messagesByThread) ) {
const threadData: ThreadData = {
thread: threadId,
refresh: {
date: now,
markers: {},
},
comments: [],
}
const messages = messagesByThread[threadId]
for ( const message of messages ) {
if (
!threadData.refresh.markers[message.mailbox]
|| threadData.refresh.markers[message.mailbox] < message.modseq
) {
threadData.refresh.markers[message.mailbox] = message.modseq
}
threadData.comments.push({
user: {
name: message.from.name || '(anonymous)',
mailId: sha256(message.from.address!),
domainId: sha256(message.from.address!.split('@').reverse()[0]),
},
date: message.date,
subject: message.subject,
text: message.content,
})
}
const json = JSON.stringify(
threadData,
(_, v) => typeof v === 'bigint' ? `${v}` : v)
await Bun.write(`${config.dirs.data}/threads/${threadId}.json`, json)
}
})
}

View File

@ -16,6 +16,9 @@ const commentsConfigSchema = z.object({
idPrefix: z.string(),
}),
}),
dirs: z.object({
data: z.string(),
}),
})
export type CommentsConfig = z.infer<typeof commentsConfigSchema>
@ -35,5 +38,29 @@ export type Message = {
},
subject: string,
content: string,
mailbox: string,
modseq: BigInt,
thread?: string,
}
export type ThreadUser = {
name: string,
mailId: string,
domainId: string,
}
export type ThreadComment = {
user: ThreadUser,
date: Date,
subject: string,
text: string,
}
export type ThreadData = {
thread: string,
refresh: {
date: Date,
markers: {[key: string]: BigInt},
},
comments: ThreadComment[],
}