Skip to content

Commit 5314c79

Browse files
chriscrosstalkclaude
authored andcommitted
fix: improve download reliability with stall detection, failure visibility, and Wikipedia status tracking
Three bugs caused downloads to hang, disappear, or leave stuck spinners: 1. Wikipedia downloads that failed never updated the DB status from 'downloading', leaving the spinner stuck forever. Now the worker's failed handler marks them as failed. 2. No stall detection on streaming downloads - if data stopped flowing mid-download, the job hung indefinitely. Added a 5-minute stall timer that triggers retry. 3. Failed jobs were invisible to users since only waiting/active/delayed states were queried. Now failed jobs appear with error indicators in the download list. Closes #364, closes #216 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent c64fe74 commit 5314c79

File tree

6 files changed

+111
-21
lines changed

6 files changed

+111
-21
lines changed

admin/app/services/download_service.ts

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,34 +12,42 @@ export class DownloadService {
1212
async listDownloadJobs(filetype?: string): Promise<DownloadJobWithProgress[]> {
1313
// Get regular file download jobs (zim, map, etc.)
1414
const queue = this.queueService.getQueue(RunDownloadJob.queue)
15-
const fileJobs = await queue.getJobs(['waiting', 'active', 'delayed'])
15+
const fileJobs = await queue.getJobs(['waiting', 'active', 'delayed', 'failed'])
1616

1717
const fileDownloads = fileJobs.map((job) => ({
1818
jobId: job.id!.toString(),
1919
url: job.data.url,
2020
progress: parseInt(job.progress.toString(), 10),
2121
filepath: normalize(job.data.filepath),
2222
filetype: job.data.filetype,
23+
status: (job.failedReason ? 'failed' : 'active') as 'active' | 'failed',
24+
failedReason: job.failedReason || undefined,
2325
}))
2426

2527
// Get Ollama model download jobs
2628
const modelQueue = this.queueService.getQueue(DownloadModelJob.queue)
27-
const modelJobs = await modelQueue.getJobs(['waiting', 'active', 'delayed'])
29+
const modelJobs = await modelQueue.getJobs(['waiting', 'active', 'delayed', 'failed'])
2830

2931
const modelDownloads = modelJobs.map((job) => ({
3032
jobId: job.id!.toString(),
3133
url: job.data.modelName || 'Unknown Model', // Use model name as url
3234
progress: parseInt(job.progress.toString(), 10),
3335
filepath: job.data.modelName || 'Unknown Model', // Use model name as filepath
3436
filetype: 'model',
37+
status: (job.failedReason ? 'failed' : 'active') as 'active' | 'failed',
38+
failedReason: job.failedReason || undefined,
3539
}))
3640

3741
const allDownloads = [...fileDownloads, ...modelDownloads]
3842

3943
// Filter by filetype if specified
4044
const filtered = allDownloads.filter((job) => !filetype || job.filetype === filetype)
4145

42-
// Sort so actively downloading items (progress > 0) appear first, then by progress descending
43-
return filtered.sort((a, b) => b.progress - a.progress)
46+
// Sort: active downloads first (by progress desc), then failed at the bottom
47+
return filtered.sort((a, b) => {
48+
if (a.status === 'failed' && b.status !== 'failed') return 1
49+
if (a.status !== 'failed' && b.status === 'failed') return -1
50+
return b.progress - a.progress
51+
})
4452
}
4553
}

admin/app/utils/downloads.ts

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,29 @@ export async function doResumableDownload({
8888
let lastProgressTime = Date.now()
8989
let lastDownloadedBytes = startByte
9090

91+
// Stall detection: if no data arrives for 5 minutes, abort the download
92+
const STALL_TIMEOUT_MS = 5 * 60 * 1000
93+
let stallTimer: ReturnType<typeof setTimeout> | null = null
94+
95+
const clearStallTimer = () => {
96+
if (stallTimer) {
97+
clearTimeout(stallTimer)
98+
stallTimer = null
99+
}
100+
}
101+
102+
const resetStallTimer = () => {
103+
clearStallTimer()
104+
stallTimer = setTimeout(() => {
105+
cleanup(new Error('Download stalled - no data received for 5 minutes'))
106+
}, STALL_TIMEOUT_MS)
107+
}
108+
91109
// Progress tracking stream to monitor data flow
92110
const progressStream = new Transform({
93111
transform(chunk: Buffer, _: any, callback: Function) {
94112
downloadedBytes += chunk.length
113+
resetStallTimer()
95114

96115
// Update progress tracking
97116
const now = Date.now()
@@ -118,6 +137,7 @@ export async function doResumableDownload({
118137

119138
// Handle errors and cleanup
120139
const cleanup = (error?: Error) => {
140+
clearStallTimer()
121141
progressStream.destroy()
122142
response.data.destroy()
123143
writeStream.destroy()
@@ -136,6 +156,7 @@ export async function doResumableDownload({
136156
})
137157

138158
writeStream.on('finish', async () => {
159+
clearStallTimer()
139160
if (onProgress) {
140161
onProgress({
141162
downloadedBytes,
@@ -151,7 +172,8 @@ export async function doResumableDownload({
151172
resolve(filepath)
152173
})
153174

154-
// Pipe: response -> progressStream -> writeStream
175+
// Start stall timer and pipe: response -> progressStream -> writeStream
176+
resetStallTimer()
155177
response.data.pipe(progressStream).pipe(writeStream)
156178
})
157179
}

admin/commands/queue/work.ts

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,23 @@ export default class QueueWork extends BaseCommand {
6565
}
6666
)
6767

68-
worker.on('failed', (job, err) => {
68+
worker.on('failed', async (job, err) => {
6969
this.logger.error(`[${queueName}] Job failed: ${job?.id}, Error: ${err.message}`)
70+
71+
// If this was a Wikipedia download, mark it as failed in the DB
72+
if (job?.data?.filetype === 'zim' && job?.data?.url?.includes('wikipedia_en_')) {
73+
try {
74+
const { DockerService } = await import('#services/docker_service')
75+
const { ZimService } = await import('#services/zim_service')
76+
const dockerService = new DockerService()
77+
const zimService = new ZimService(dockerService)
78+
await zimService.onWikipediaDownloadComplete(job.data.url, false)
79+
} catch (e: any) {
80+
this.logger.error(
81+
`[${queueName}] Failed to update Wikipedia status: ${e.message}`
82+
)
83+
}
84+
}
7085
})
7186

7287
worker.on('completed', (job) => {

admin/inertia/components/ActiveDownloads.tsx

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import useDownloads, { useDownloadsProps } from '~/hooks/useDownloads'
22
import HorizontalBarChart from './HorizontalBarChart'
33
import { extractFileName } from '~/lib/util'
44
import StyledSectionHeader from './StyledSectionHeader'
5+
import { IconAlertTriangle } from '@tabler/icons-react'
56

67
interface ActiveDownloadProps {
78
filetype?: useDownloadsProps['filetype']
@@ -17,18 +18,39 @@ const ActiveDownloads = ({ filetype, withHeader = false }: ActiveDownloadProps)
1718
<div className="space-y-4">
1819
{downloads && downloads.length > 0 ? (
1920
downloads.map((download) => (
20-
<div className="bg-desert-white rounded-lg p-4 border border-desert-stone-light shadow-sm hover:shadow-lg transition-shadow">
21-
<HorizontalBarChart
22-
items={[
23-
{
24-
label: extractFileName(download.filepath) || download.url,
25-
value: download.progress,
26-
total: '100%',
27-
used: `${download.progress}%`,
28-
type: download.filetype,
29-
},
30-
]}
31-
/>
21+
<div
22+
key={download.jobId}
23+
className={`bg-desert-white rounded-lg p-4 border shadow-sm hover:shadow-lg transition-shadow ${
24+
download.status === 'failed'
25+
? 'border-red-300'
26+
: 'border-desert-stone-light'
27+
}`}
28+
>
29+
{download.status === 'failed' ? (
30+
<div className="flex items-center gap-2">
31+
<IconAlertTriangle className="w-5 h-5 text-red-500 flex-shrink-0" />
32+
<div className="flex-1 min-w-0">
33+
<p className="text-sm font-medium text-gray-900 truncate">
34+
{extractFileName(download.filepath) || download.url}
35+
</p>
36+
<p className="text-xs text-red-600 mt-0.5">
37+
Download failed{download.failedReason ? `: ${download.failedReason}` : ''}
38+
</p>
39+
</div>
40+
</div>
41+
) : (
42+
<HorizontalBarChart
43+
items={[
44+
{
45+
label: extractFileName(download.filepath) || download.url,
46+
value: download.progress,
47+
total: '100%',
48+
used: `${download.progress}%`,
49+
type: download.filetype,
50+
},
51+
]}
52+
/>
53+
)}
3254
</div>
3355
))
3456
) : (

admin/inertia/components/WikipediaSelector.tsx

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { formatBytes } from '~/lib/util'
22
import { WikipediaOption, WikipediaCurrentSelection } from '../../types/downloads'
33
import classNames from 'classnames'
4-
import { IconCheck, IconDownload, IconWorld } from '@tabler/icons-react'
4+
import { IconCheck, IconDownload, IconWorld, IconAlertTriangle } from '@tabler/icons-react'
55
import StyledButton from './StyledButton'
66
import LoadingSpinner from './LoadingSpinner'
77

@@ -29,8 +29,9 @@ const WikipediaSelector: React.FC<WikipediaSelectorProps> = ({
2929
// Determine which option to highlight
3030
const highlightedOptionId = selectedOptionId ?? currentSelection?.optionId ?? null
3131

32-
// Check if current selection is downloading
32+
// Check if current selection is downloading or failed
3333
const isDownloading = currentSelection?.status === 'downloading'
34+
const isFailed = currentSelection?.status === 'failed'
3435

3536
return (
3637
<div className="w-full">
@@ -55,6 +56,18 @@ const WikipediaSelector: React.FC<WikipediaSelectorProps> = ({
5556
</div>
5657
)}
5758

59+
{/* Failed status message */}
60+
{isFailed && (
61+
<div className="mb-4 p-3 bg-red-50 border border-red-200 rounded-lg flex items-center justify-between">
62+
<div className="flex items-center gap-2">
63+
<IconAlertTriangle className="w-5 h-5 text-red-600 flex-shrink-0" />
64+
<span className="text-sm text-red-700">
65+
Wikipedia download failed. Select a package and try again.
66+
</span>
67+
</div>
68+
</div>
69+
)}
70+
5871
{/* Options grid */}
5972
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
6073
{options.map((option) => {
@@ -63,6 +76,8 @@ const WikipediaSelector: React.FC<WikipediaSelectorProps> = ({
6376
currentSelection?.optionId === option.id && currentSelection?.status === 'installed'
6477
const isCurrentDownloading =
6578
currentSelection?.optionId === option.id && currentSelection?.status === 'downloading'
79+
const isCurrentFailed =
80+
currentSelection?.optionId === option.id && currentSelection?.status === 'failed'
6681
const isPending = selectedOptionId === option.id && selectedOptionId !== currentSelection?.optionId
6782

6883
return (
@@ -100,6 +115,12 @@ const WikipediaSelector: React.FC<WikipediaSelectorProps> = ({
100115
Downloading
101116
</span>
102117
)}
118+
{isCurrentFailed && (
119+
<span className="text-xs bg-red-500 text-white px-2 py-0.5 rounded-full flex items-center gap-1">
120+
<IconAlertTriangle size={12} />
121+
Failed
122+
</span>
123+
)}
103124
</div>
104125

105126
{/* Option content */}
@@ -136,7 +157,7 @@ const WikipediaSelector: React.FC<WikipediaSelectorProps> = ({
136157
</div>
137158

138159
{/* Submit button for Content Explorer mode */}
139-
{showSubmitButton && selectedOptionId && selectedOptionId !== currentSelection?.optionId && (
160+
{showSubmitButton && selectedOptionId && (selectedOptionId !== currentSelection?.optionId || isFailed) && (
140161
<div className="mt-4 flex justify-end">
141162
<StyledButton
142163
variant="primary"

admin/types/downloads.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ export type DownloadJobWithProgress = {
4141
progress: number
4242
filepath: string
4343
filetype: string
44+
status?: 'active' | 'failed'
45+
failedReason?: string
4446
}
4547

4648
// Wikipedia selector types

0 commit comments

Comments
 (0)