@@ -12,7 +12,7 @@ import { OllamaService } from './ollama_service.js'
1212import { SERVICE_NAMES } from '../../constants/service_names.js'
1313import { removeStopwords } from 'stopword'
1414import { randomUUID } from 'node:crypto'
15- import { join } from 'node:path'
15+ import { join , resolve , sep } from 'node:path'
1616import KVStore from '#models/kv_store'
1717import { ZIMExtractionService } from './zim_extraction_service.js'
1818import { ZIM_BATCH_SIZE } from '../../constants/zim_extraction.js'
@@ -853,7 +853,7 @@ export class RagService {
853853
854854 /**
855855 * Retrieve all unique source files that have been stored in the knowledge base.
856- * @returns Array of unique source file identifiers
856+ * @returns Array of unique full source paths
857857 */
858858 public async getStoredFiles ( ) : Promise < string [ ] > {
859859 try {
@@ -886,19 +886,54 @@ export class RagService {
886886 offset = scrollResult . next_page_offset || null
887887 } while ( offset !== null )
888888
889- const sourcesArr = Array . from ( sources )
890-
891- // The source is a full path - only extract the filename for display
892- return sourcesArr . map ( ( src ) => {
893- const parts = src . split ( / [ / \\ ] / )
894- return parts [ parts . length - 1 ] // Return the last part as filename
895- } )
889+ return Array . from ( sources )
896890 } catch ( error ) {
897891 logger . error ( 'Error retrieving stored files:' , error )
898892 return [ ]
899893 }
900894 }
901895
896+ /**
897+ * Delete all Qdrant points associated with a given source path and remove
898+ * the corresponding file from disk if it lives under the uploads directory.
899+ * @param source - Full source path as stored in Qdrant payloads
900+ */
901+ public async deleteFileBySource ( source : string ) : Promise < { success : boolean ; message : string } > {
902+ try {
903+ await this . _ensureCollection (
904+ RagService . CONTENT_COLLECTION_NAME ,
905+ RagService . EMBEDDING_DIMENSION
906+ )
907+
908+ await this . qdrant ! . delete ( RagService . CONTENT_COLLECTION_NAME , {
909+ filter : {
910+ must : [ { key : 'source' , match : { value : source } } ] ,
911+ } ,
912+ } )
913+
914+ logger . info ( `[RAG] Deleted all points for source: ${ source } ` )
915+
916+ /** Delete the physical file only if it lives inside the uploads directory.
917+ * resolve() normalises path traversal sequences (e.g. "/../..") before the
918+ * check to prevent path traversal vulns
919+ * The trailing sep is to ensure a prefix like "kb_uploads_{something_incorrect}" can't slip through.
920+ */
921+ const uploadsAbsPath = join ( process . cwd ( ) , RagService . UPLOADS_STORAGE_PATH )
922+ const resolvedSource = resolve ( source )
923+ if ( resolvedSource . startsWith ( uploadsAbsPath + sep ) ) {
924+ await deleteFileIfExists ( resolvedSource )
925+ logger . info ( `[RAG] Deleted uploaded file from disk: ${ resolvedSource } ` )
926+ } else {
927+ logger . warn ( `[RAG] File was removed from knowledge base but doesn't live in Nomad's uploads directory, so it can't be safely removed. Skipping deletion of physical file...` )
928+ }
929+
930+ return { success : true , message : 'File removed from knowledge base.' }
931+ } catch ( error ) {
932+ logger . error ( '[RAG] Error deleting file from knowledge base:' , error )
933+ return { success : false , message : 'Error deleting file from knowledge base.' }
934+ }
935+ }
936+
902937 public async discoverNomadDocs ( force ?: boolean ) : Promise < { success : boolean ; message : string } > {
903938 try {
904939 const README_PATH = join ( process . cwd ( ) , 'README.md' )
0 commit comments