Skip to content

Commit 96e5027

Browse files
committed
feat(AI Assistant): performance improvements and smarter RAG context usage
1 parent 460756f commit 96e5027

File tree

5 files changed

+243
-89
lines changed

5 files changed

+243
-89
lines changed

admin/app/controllers/ollama_controller.ts

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import { modelNameSchema } from '#validators/download'
55
import { chatSchema, getAvailableModelsSchema } from '#validators/ollama'
66
import { inject } from '@adonisjs/core'
77
import type { HttpContext } from '@adonisjs/core/http'
8-
import { DEFAULT_QUERY_REWRITE_MODEL, SYSTEM_PROMPTS } from '../../constants/ollama.js'
8+
import { DEFAULT_QUERY_REWRITE_MODEL, RAG_CONTEXT_LIMITS, SYSTEM_PROMPTS } from '../../constants/ollama.js'
99
import logger from '@adonisjs/core/services/logger'
1010
import type { Message } from 'ollama'
1111

@@ -66,9 +66,28 @@ export default class OllamaController {
6666

6767
logger.debug(`[RAG] Retrieved ${relevantDocs.length} relevant documents for query: "${rewrittenQuery}"`)
6868

69-
// If relevant context is found, inject as a system message
69+
// If relevant context is found, inject as a system message with adaptive limits
7070
if (relevantDocs.length > 0) {
71-
const contextText = relevantDocs
71+
// Determine context budget based on model size
72+
const { maxResults, maxTokens } = this.getContextLimitsForModel(reqData.model)
73+
let trimmedDocs = relevantDocs.slice(0, maxResults)
74+
75+
// Apply token cap if set (estimate ~4 chars per token)
76+
// Always include the first (most relevant) result — the cap only gates subsequent results
77+
if (maxTokens > 0) {
78+
const charCap = maxTokens * 4
79+
let totalChars = 0
80+
trimmedDocs = trimmedDocs.filter((doc, idx) => {
81+
totalChars += doc.text.length
82+
return idx === 0 || totalChars <= charCap
83+
})
84+
}
85+
86+
logger.debug(
87+
`[RAG] Injecting ${trimmedDocs.length}/${relevantDocs.length} results (model: ${reqData.model}, maxResults: ${maxResults}, maxTokens: ${maxTokens || 'unlimited'})`
88+
)
89+
90+
const contextText = trimmedDocs
7291
.map((doc, idx) => `[Context ${idx + 1}] (Relevance: ${(doc.score * 100).toFixed(1)}%)\n${doc.text}`)
7392
.join('\n\n')
7493

@@ -174,6 +193,25 @@ export default class OllamaController {
174193
return await this.ollamaService.getModels()
175194
}
176195

196+
/**
197+
* Determines RAG context limits based on model size extracted from the model name.
198+
* Parses size indicators like "1b", "3b", "8b", "70b" from model names/tags.
199+
*/
200+
private getContextLimitsForModel(modelName: string): { maxResults: number; maxTokens: number } {
201+
// Extract parameter count from model name (e.g., "llama3.2:3b", "qwen2.5:1.5b", "gemma:7b")
202+
const sizeMatch = modelName.match(/(\d+\.?\d*)[bB]/)
203+
const paramBillions = sizeMatch ? parseFloat(sizeMatch[1]) : 8 // default to 8B if unknown
204+
205+
for (const tier of RAG_CONTEXT_LIMITS) {
206+
if (paramBillions <= tier.maxParams) {
207+
return { maxResults: tier.maxResults, maxTokens: tier.maxTokens }
208+
}
209+
}
210+
211+
// Fallback: no limits
212+
return { maxResults: 5, maxTokens: 0 }
213+
}
214+
177215
private async rewriteQueryWithContext(
178216
messages: Message[]
179217
): Promise<string | null> {
@@ -199,8 +237,8 @@ export default class OllamaController {
199237
})
200238
.join('\n')
201239

202-
const availableModels = await this.ollamaService.getAvailableModels({ query: null, limit: 500 })
203-
const rewriteModelAvailable = availableModels?.models.some(model => model.name === DEFAULT_QUERY_REWRITE_MODEL)
240+
const installedModels = await this.ollamaService.getModels(true)
241+
const rewriteModelAvailable = installedModels?.some(model => model.name === DEFAULT_QUERY_REWRITE_MODEL)
204242
if (!rewriteModelAvailable) {
205243
logger.warn(`[RAG] Query rewrite model "${DEFAULT_QUERY_REWRITE_MODEL}" not available. Skipping query rewriting.`)
206244
const lastUserMessage = [...messages].reverse().find(msg => msg.role === 'user')

0 commit comments

Comments
 (0)