Skip to content

Commit d1f4066

Browse files
committed
feat(RAG): initial beta with preprocessing, embedding, semantic retrieval, and ctx passage
1 parent 1923cd4 commit d1f4066

File tree

10 files changed

+612
-60
lines changed

10 files changed

+612
-60
lines changed

admin/app/controllers/ollama_controller.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,17 @@ export default class OllamaController {
4141

4242
if (lastUserMessage) {
4343
// Search for relevant context in the knowledge base
44+
// Using lower threshold (0.3) with improved hybrid search
4445
const relevantDocs = await this.ragService.searchSimilarDocuments(
4546
lastUserMessage.content,
4647
5, // Retrieve top 5 most relevant chunks
47-
0.7 // Minimum similarity score of 0.7
48+
0.3 // Minimum similarity score of 0.3 (lowered from 0.7 for better recall)
4849
)
4950

5051
// If relevant context is found, inject as a system message
5152
if (relevantDocs.length > 0) {
5253
const contextText = relevantDocs
53-
.map((doc, idx) => `[Context ${idx + 1}]\n${doc.text}`)
54+
.map((doc, idx) => `[Context ${idx + 1}] (Relevance: ${(doc.score * 100).toFixed(1)}%)\n${doc.text}`)
5455
.join('\n\n')
5556

5657
const systemMessage = {

admin/app/controllers/rag_controller.ts

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
import { RagService } from '#services/rag_service'
2+
import { EmbedFileJob } from '#jobs/embed_file_job'
23
import { inject } from '@adonisjs/core'
34
import type { HttpContext } from '@adonisjs/core/http'
45
import app from '@adonisjs/core/services/app'
56
import { randomBytes } from 'node:crypto'
67
import { sanitizeFilename } from '../utils/fs.js'
8+
import { stat } from 'node:fs/promises'
9+
import { getJobStatusSchema } from '#validators/rag'
710

811
@inject()
912
export default class RagController {
@@ -19,21 +22,50 @@ export default class RagController {
1922
const sanitizedName = sanitizeFilename(uploadedFile.clientName)
2023

2124
const fileName = `${sanitizedName}-${randomSuffix}.${uploadedFile.extname || 'txt'}`
22-
const fullPath = app.makePath('storage/uploads', fileName)
25+
const fullPath = app.makePath(RagService.UPLOADS_STORAGE_PATH, fileName)
2326

24-
await uploadedFile.move(app.makePath('storage/uploads'), {
27+
await uploadedFile.move(app.makePath(RagService.UPLOADS_STORAGE_PATH), {
2528
name: fileName,
2629
})
2730

28-
// Don't await this - process in background
29-
this.ragService.processAndEmbedFile(fullPath)
31+
// Get file size for tracking
32+
let fileSize: number | undefined = undefined
33+
try {
34+
const stats = await stat(fullPath)
35+
fileSize = stats.size
36+
} catch (error) {
37+
// Not critical if we can't get file size, just swallow the error
38+
}
39+
40+
// Dispatch background job for embedding
41+
const result = await EmbedFileJob.dispatch({
42+
filePath: fullPath,
43+
fileName,
44+
fileSize,
45+
})
3046

31-
return response.status(200).json({
32-
message: 'File has been uploaded and queued for processing.',
33-
file_path: `/uploads/${fileName}`,
47+
return response.status(202).json({
48+
message: result.message,
49+
jobId: result.jobId,
50+
fileName,
51+
filePath: `/${RagService.UPLOADS_STORAGE_PATH}/${fileName}`,
52+
alreadyProcessing: !result.created,
3453
})
3554
}
3655

56+
public async getJobStatus({ request, response }: HttpContext) {
57+
const reqData = await request.validateUsing(getJobStatusSchema)
58+
59+
const fullPath = app.makePath(RagService.UPLOADS_STORAGE_PATH, reqData.filePath)
60+
const status = await EmbedFileJob.getStatus(fullPath)
61+
62+
if (!status.exists) {
63+
return response.status(404).json({ error: 'Job not found for this file' })
64+
}
65+
66+
return response.status(200).json(status)
67+
}
68+
3769
public async getStoredFiles({ response }: HttpContext) {
3870
const files = await this.ragService.getStoredFiles()
3971
return response.status(200).json({ files })

admin/app/jobs/embed_file_job.ts

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
import { Job } from 'bullmq'
2+
import { QueueService } from '#services/queue_service'
3+
import { RagService } from '#services/rag_service'
4+
import { DockerService } from '#services/docker_service'
5+
import { OllamaService } from '#services/ollama_service'
6+
import { createHash } from 'crypto'
7+
import logger from '@adonisjs/core/services/logger'
8+
9+
export interface EmbedFileJobParams {
10+
filePath: string
11+
fileName: string
12+
fileSize?: number
13+
}
14+
15+
export class EmbedFileJob {
16+
static get queue() {
17+
return 'file-embeddings'
18+
}
19+
20+
static get key() {
21+
return 'embed-file'
22+
}
23+
24+
static getJobId(filePath: string): string {
25+
return createHash('sha256').update(filePath).digest('hex').slice(0, 16)
26+
}
27+
28+
async handle(job: Job) {
29+
const { filePath, fileName } = job.data as EmbedFileJobParams
30+
31+
logger.info(`[EmbedFileJob] Starting embedding process for: ${fileName}`)
32+
33+
const dockerService = new DockerService()
34+
const ollamaService = new OllamaService()
35+
const ragService = new RagService(dockerService, ollamaService)
36+
37+
try {
38+
// Update progress starting
39+
await job.updateProgress(0)
40+
await job.updateData({
41+
...job.data,
42+
status: 'processing',
43+
startedAt: Date.now(),
44+
})
45+
46+
logger.info(`[EmbedFileJob] Processing file: ${filePath}`)
47+
48+
// Process and embed the file
49+
const result = await ragService.processAndEmbedFile(filePath)
50+
51+
if (!result.success) {
52+
logger.error(`[EmbedFileJob] Failed to process file ${fileName}: ${result.message}`)
53+
throw new Error(result.message)
54+
}
55+
56+
// Update progress complete
57+
await job.updateProgress(100)
58+
await job.updateData({
59+
...job.data,
60+
status: 'completed',
61+
completedAt: Date.now(),
62+
chunks: result.chunks,
63+
})
64+
65+
logger.info(
66+
`[EmbedFileJob] Successfully embedded ${result.chunks} chunks from file: ${fileName}`
67+
)
68+
69+
return {
70+
success: true,
71+
fileName,
72+
filePath,
73+
chunks: result.chunks,
74+
message: `Successfully embedded ${result.chunks} chunks`,
75+
}
76+
} catch (error) {
77+
logger.error(`[EmbedFileJob] Error embedding file ${fileName}:`, error)
78+
79+
await job.updateData({
80+
...job.data,
81+
status: 'failed',
82+
failedAt: Date.now(),
83+
error: error instanceof Error ? error.message : 'Unknown error',
84+
})
85+
86+
throw error
87+
}
88+
}
89+
90+
static async getByFilePath(filePath: string): Promise<Job | undefined> {
91+
const queueService = new QueueService()
92+
const queue = queueService.getQueue(this.queue)
93+
const jobId = this.getJobId(filePath)
94+
return await queue.getJob(jobId)
95+
}
96+
97+
static async dispatch(params: EmbedFileJobParams) {
98+
const queueService = new QueueService()
99+
const queue = queueService.getQueue(this.queue)
100+
const jobId = this.getJobId(params.filePath)
101+
102+
try {
103+
const job = await queue.add(this.key, params, {
104+
jobId,
105+
attempts: 3,
106+
backoff: {
107+
type: 'exponential',
108+
delay: 5000, // Delay 5 seconds before retrying
109+
},
110+
removeOnComplete: { count: 50 }, // Keep last 50 completed jobs for history
111+
removeOnFail: { count: 20 } // Keep last 20 failed jobs for debugging
112+
})
113+
114+
logger.info(`[EmbedFileJob] Dispatched embedding job for file: ${params.fileName}`)
115+
116+
return {
117+
job,
118+
created: true,
119+
jobId,
120+
message: `File queued for embedding: ${params.fileName}`,
121+
}
122+
} catch (error) {
123+
if (error.message && error.message.includes('job already exists')) {
124+
const existing = await queue.getJob(jobId)
125+
logger.info(`[EmbedFileJob] Job already exists for file: ${params.fileName}`)
126+
return {
127+
job: existing,
128+
created: false,
129+
jobId,
130+
message: `Embedding job already exists for: ${params.fileName}`,
131+
}
132+
}
133+
throw error
134+
}
135+
}
136+
137+
static async getStatus(filePath: string): Promise<{
138+
exists: boolean
139+
status?: string
140+
progress?: number
141+
chunks?: number
142+
error?: string
143+
}> {
144+
const job = await this.getByFilePath(filePath)
145+
146+
if (!job) {
147+
return { exists: false }
148+
}
149+
150+
const state = await job.getState()
151+
const data = job.data
152+
153+
return {
154+
exists: true,
155+
status: data.status || state,
156+
progress: typeof job.progress === 'number' ? job.progress : undefined,
157+
chunks: data.chunks,
158+
error: data.error,
159+
}
160+
}
161+
}

0 commit comments

Comments
 (0)