feat(AI Assistant): improved state management and performance

jakeaturner · jakeaturner · commit 460756f581bb · 2026-03-11T14:08:09.000-07:00
diff --git a/admin/app/controllers/ollama_controller.ts b/admin/app/controllers/ollama_controller.ts
@@ -1,3 +1,4 @@
+import { ChatService } from '#services/chat_service'
 import { OllamaService } from '#services/ollama_service'
 import { RagService } from '#services/rag_service'
 import { modelNameSchema } from '#validators/download'
@@ -11,6 +12,7 @@ import type { Message } from 'ollama'
 @inject()
 export default class OllamaController {
   constructor(
+    private chatService: ChatService,
     private ollamaService: OllamaService,
     private ragService: RagService
   ) { }
@@ -87,19 +89,59 @@ export default class OllamaController {
       const thinkingCapability = await this.ollamaService.checkModelHasThinking(reqData.model)
       const think: boolean | 'medium' = thinkingCapability ? (reqData.model.startsWith('gpt-oss') ? 'medium' : true) : false
 
+      // Separate sessionId from the Ollama request payload — Ollama rejects unknown fields
+      const { sessionId, ...ollamaRequest } = reqData
+
+      // Save user message to DB before streaming if sessionId provided
+      let userContent: string | null = null
+      if (sessionId) {
+        const lastUserMsg = [...reqData.messages].reverse().find((m) => m.role === 'user')
+        if (lastUserMsg) {
+          userContent = lastUserMsg.content
+          await this.chatService.addMessage(sessionId, 'user', userContent)
+        }
+      }
+
       if (reqData.stream) {
         logger.debug(`[OllamaController] Initiating streaming response for model: "${reqData.model}" with think: ${think}`)
         // Headers already flushed above
-        const stream = await this.ollamaService.chatStream({ ...reqData, think })
+        const stream = await this.ollamaService.chatStream({ ...ollamaRequest, think })
+        let fullContent = ''
         for await (const chunk of stream) {
+          if (chunk.message?.content) {
+            fullContent += chunk.message.content
+          }
           response.response.write(`data: ${JSON.stringify(chunk)}\n\n`)
         }
         response.response.end()
+
+        // Save assistant message and optionally generate title
+        if (sessionId && fullContent) {
+          await this.chatService.addMessage(sessionId, 'assistant', fullContent)
+          const messageCount = await this.chatService.getMessageCount(sessionId)
+          if (messageCount <= 2 && userContent) {
+            this.chatService.generateTitle(sessionId, userContent, fullContent).catch((err) => {
+              logger.error(`[OllamaController] Title generation failed: ${err instanceof Error ? err.message : err}`)
+            })
+          }
+        }
         return
       }
 
       // Non-streaming (legacy) path
-      return await this.ollamaService.chat({ ...reqData, think })
+      const result = await this.ollamaService.chat({ ...ollamaRequest, think })
+
+      if (sessionId && result?.message?.content) {
+        await this.chatService.addMessage(sessionId, 'assistant', result.message.content)
+        const messageCount = await this.chatService.getMessageCount(sessionId)
+        if (messageCount <= 2 && userContent) {
+          this.chatService.generateTitle(sessionId, userContent, result.message.content).catch((err) => {
+            logger.error(`[OllamaController] Title generation failed: ${err instanceof Error ? err.message : err}`)
+          })
+        }
+      }
+
+      return result
     } catch (error) {
       if (reqData.stream) {
         response.response.write(`data: ${JSON.stringify({ error: true })}\n\n`)
diff --git a/admin/app/services/chat_service.ts b/admin/app/services/chat_service.ts
@@ -4,7 +4,7 @@ import logger from '@adonisjs/core/services/logger'
 import { DateTime } from 'luxon'
 import { inject } from '@adonisjs/core'
 import { OllamaService } from './ollama_service.js'
-import { SYSTEM_PROMPTS } from '../../constants/ollama.js'
+import { DEFAULT_QUERY_REWRITE_MODEL, SYSTEM_PROMPTS } from '../../constants/ollama.js'
 import { toTitleCase } from '../utils/misc.js'
 
 @inject()
@@ -220,6 +220,59 @@ export class ChatService {
     }
   }
 
+  async getMessageCount(sessionId: number): Promise<number> {
+    try {
+      const count = await ChatMessage.query().where('session_id', sessionId).count('* as total')
+      return Number(count[0].$extras.total)
+    } catch (error) {
+      logger.error(
+        `[ChatService] Failed to get message count for session ${sessionId}: ${error instanceof Error ? error.message : error}`
+      )
+      return 0
+    }
+  }
+
+  async generateTitle(sessionId: number, userMessage: string, assistantMessage: string) {
+    try {
+      const models = await this.ollamaService.getModels()
+      const titleModelAvailable = models?.some((m) => m.name === DEFAULT_QUERY_REWRITE_MODEL)
+
+      let title: string
+
+      if (!titleModelAvailable) {
+        title = userMessage.slice(0, 57) + (userMessage.length > 57 ? '...' : '')
+      } else {
+        const response = await this.ollamaService.chat({
+          model: DEFAULT_QUERY_REWRITE_MODEL,
+          messages: [
+            { role: 'system', content: SYSTEM_PROMPTS.title_generation },
+            { role: 'user', content: userMessage },
+            { role: 'assistant', content: assistantMessage },
+          ],
+        })
+
+        title = response?.message?.content?.trim()
+        if (!title) {
+          title = userMessage.slice(0, 57) + (userMessage.length > 57 ? '...' : '')
+        }
+      }
+
+      await this.updateSession(sessionId, { title })
+      logger.info(`[ChatService] Generated title for session ${sessionId}: "${title}"`)
+    } catch (error) {
+      logger.error(
+        `[ChatService] Failed to generate title for session ${sessionId}: ${error instanceof Error ? error.message : error}`
+      )
+      // Fall back to truncated user message
+      try {
+        const fallbackTitle = userMessage.slice(0, 57) + (userMessage.length > 57 ? '...' : '')
+        await this.updateSession(sessionId, { title: fallbackTitle })
+      } catch {
+        // Silently fail - session keeps "New Chat" title
+      }
+    }
+  }
+
   async deleteAllSessions() {
     try {
       await ChatSession.query().delete()
diff --git a/admin/app/validators/ollama.ts b/admin/app/validators/ollama.ts
@@ -10,6 +10,7 @@ export const chatSchema = vine.compile(
       })
     ),
     stream: vine.boolean().optional(),
+    sessionId: vine.number().positive().optional(),
   })
 )
 
diff --git a/admin/constants/ollama.ts b/admin/constants/ollama.ts
@@ -83,9 +83,9 @@ IMPORTANT INSTRUCTIONS:
 1. If the user's question is directly related to the context above, use this information to provide accurate, detailed answers.
 2. Always cite or reference the context when using it (e.g., "According to the information available..." or "Based on the knowledge base...").
 3. If the context is only partially relevant, combine it with your general knowledge but be clear about what comes from the knowledge base.
-4. If the context is not relevant to the user's question, you can respond using your general knowledge without forcing the context into your answer.
+4. If the context is not relevant to the user's question, you can respond using your general knowledge without forcing the context into your answer. Do not mention the context if it's not relevant.
 5. Never fabricate information that isn't in the context or your training data.
-6. If you're unsure or the context doesn't contain enough information, acknowledge the limitations.
+6. If you're unsure or you don't have enough information to answer the user's question, acknowledge the limitations.
 
 Format your response using markdown for readability.
 `,
@@ -113,6 +113,7 @@ Ensure that your suggestions are comma-seperated with no conjunctions like "and"
 Do not use line breaks, new lines, or extra spacing to separate the suggestions.
 Format: suggestion1, suggestion2, suggestion3
 `,
+  title_generation: `You are a title generator. Given the start of a conversation, generate a concise, descriptive title under 60 characters. Return ONLY the title text with no quotes, punctuation wrapping, or extra formatting.`,
   query_rewrite: `
 You are a query rewriting assistant. Your task is to reformulate the user's latest question to include relevant context from the conversation history.
 
diff --git a/admin/inertia/components/chat/index.tsx b/admin/inertia/components/chat/index.tsx
@@ -90,8 +90,9 @@ export default function Chat({
     mutationFn: (request: {
       model: string
       messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }>
+      sessionId?: number
     }) => api.sendChatMessage({ ...request, stream: false }),
-    onSuccess: async (data, variables) => {
+    onSuccess: async (data) => {
       if (!data || !activeSessionId) {
         throw new Error('No response from Ollama')
       }
@@ -106,17 +107,9 @@ export default function Chat({
 
       setMessages((prev) => [...prev, assistantMessage])
 
-      // Save assistant message to backend
-      await api.addChatMessage(activeSessionId, 'assistant', assistantMessage.content)
-
-      // Update session title if it's a new chat
-      const currentSession = sessions.find((s) => s.id === activeSessionId)
-      if (currentSession && currentSession.title === 'New Chat') {
-        const userContent = variables.messages[variables.messages.length - 1].content
-        const newTitle = userContent.slice(0, 50) + (userContent.length > 50 ? '...' : '')
-        await api.updateChatSession(activeSessionId, { title: newTitle })
-        queryClient.invalidateQueries({ queryKey: ['chatSessions'] })
-      }
+      // Refresh sessions to pick up backend-persisted messages and title
+      queryClient.invalidateQueries({ queryKey: ['chatSessions'] })
+      setTimeout(() => queryClient.invalidateQueries({ queryKey: ['chatSessions'] }), 3000)
     },
     onError: (error) => {
       console.error('Error sending message:', error)
@@ -230,9 +223,6 @@ export default function Chat({
 
       setMessages((prev) => [...prev, userMessage])
 
-      // Save user message to backend
-      await api.addChatMessage(sessionId, 'user', content)
-
       const chatMessages = [
         ...messages.map((m) => ({ role: m.role, content: m.content })),
         { role: 'user' as const, content },
@@ -255,7 +245,7 @@ export default function Chat({
 
         try {
           await api.streamChatMessage(
-            { model: selectedModel || 'llama3.2', messages: chatMessages, stream: true },
+            { model: selectedModel || 'llama3.2', messages: chatMessages, stream: true, sessionId: sessionId ? Number(sessionId) : undefined },
             (chunkContent, chunkThinking, done) => {
               if (chunkThinking.length > 0 && thinkingStartTime === null) {
                 thinkingStartTime = Date.now()
@@ -336,24 +326,20 @@ export default function Chat({
             )
           )
 
-          await api.addChatMessage(sessionId, 'assistant', fullContent)
-
-          const currentSession = sessions.find((s) => s.id === sessionId)
-          if (currentSession && currentSession.title === 'New Chat') {
-            const newTitle = content.slice(0, 50) + (content.length > 50 ? '...' : '')
-            await api.updateChatSession(sessionId, { title: newTitle })
-            queryClient.invalidateQueries({ queryKey: ['chatSessions'] })
-          }
+          // Refresh sessions to pick up backend-persisted messages and title
+          queryClient.invalidateQueries({ queryKey: ['chatSessions'] })
+          setTimeout(() => queryClient.invalidateQueries({ queryKey: ['chatSessions'] }), 3000)
         }
       } else {
         // Non-streaming (legacy) path
         chatMutation.mutate({
           model: selectedModel || 'llama3.2',
           messages: chatMessages,
+          sessionId: sessionId ? Number(sessionId) : undefined,
         })
       }
     },
-    [activeSessionId, messages, selectedModel, chatMutation, queryClient, streamingEnabled, sessions]
+    [activeSessionId, messages, selectedModel, chatMutation, queryClient, streamingEnabled]
   )
 
   return (
diff --git a/admin/types/ollama.ts b/admin/types/ollama.ts
@@ -32,6 +32,7 @@ export type OllamaChatRequest = {
   model: string
   messages: OllamaChatMessage[]
   stream?: boolean
+  sessionId?: number
 }
 
 export type OllamaChatResponse = {

Original file line number	Diff line number	Diff line change
`@@ -10,6 +10,7 @@ export const chatSchema = vine.compile(`
`10`	`10`	`})`
`11`	`11`	`),`
`12`	`12`	`stream: vine.boolean().optional(),`
	`13`	`+ sessionId: vine.number().positive().optional(),`
`13`	`14`	`})`
`14`	`15`	`)`
`15`	`16`
Original file line number	Diff line number	Diff line change
`@@ -32,6 +32,7 @@ export type OllamaChatRequest = {`
`32`	`32`	`model: string`
`33`	`33`	`messages: OllamaChatMessage[]`
`34`	`34`	`stream?: boolean`
	`35`	`+ sessionId?: number`
`35`	`36`	`}`
`36`	`37`
`37`	`38`	`export type OllamaChatResponse = {`