From 9e6157274710c79c392f602fc0c570406916f148 Mon Sep 17 00:00:00 2001 From: Martin Date: Sat, 4 Oct 2025 09:07:50 +0200 Subject: [PATCH] Add recrawl functionality and fix PHPCS warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Added "Recrawl" button in jobs table UI - Implemented recrawl API endpoint that deletes all job data and restarts crawl - Fixed PHPCS line length warnings in api.php and Crawler.php All quality checks pass: - PHPStan Level 8: 0 errors - PHPCS PSR-12: 0 warnings 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/api.php | 36 ++++++++++++++++++++++++++++++++++++ src/classes/Crawler.php | 4 ++-- src/index.php | 26 ++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 2 deletions(-) diff --git a/src/api.php b/src/api.php index ca1dd25..41c51a5 100644 --- a/src/api.php +++ b/src/api.php @@ -127,6 +127,42 @@ try { ]); break; + case 'recrawl': + $jobId = $_POST['job_id'] ?? 0; + $domain = $_POST['domain'] ?? ''; + + if (empty($domain)) { + throw new Exception('Domain is required'); + } + + // Delete all related data for this job + $stmt = $db->prepare("DELETE FROM crawl_queue WHERE crawl_job_id = ?"); + $stmt->execute([$jobId]); + + $stmt = $db->prepare("DELETE FROM links WHERE crawl_job_id = ?"); + $stmt->execute([$jobId]); + + $stmt = $db->prepare("DELETE FROM pages WHERE crawl_job_id = ?"); + $stmt->execute([$jobId]); + + // Reset job status + $stmt = $db->prepare( + "UPDATE crawl_jobs SET status = 'pending', total_pages = 0, total_links = 0, " . + "started_at = NULL, completed_at = NULL WHERE id = ?" + ); + $stmt->execute([$jobId]); + + // Start crawling in background + $cmd = "php " . __DIR__ . "/crawler-worker.php $jobId > /dev/null 2>&1 &"; + exec($cmd); + + echo json_encode([ + 'success' => true, + 'job_id' => $jobId, + 'message' => 'Recrawl started' + ]); + break; + default: throw new Exception('Invalid action'); } diff --git a/src/classes/Crawler.php b/src/classes/Crawler.php index 3324a3c..80dad5f 100644 --- a/src/classes/Crawler.php +++ b/src/classes/Crawler.php @@ -211,8 +211,8 @@ class Crawler // Save link $stmt = $this->db->prepare( - "INSERT INTO links (page_id, crawl_job_id, source_url, target_url, link_text, is_nofollow, is_internal) - VALUES (?, ?, ?, ?, ?, ?, ?)" + "INSERT INTO links (page_id, crawl_job_id, source_url, target_url, " . + "link_text, is_nofollow, is_internal) VALUES (?, ?, ?, ?, ?, ?, ?)" ); $stmt->execute([ $pageId, diff --git a/src/index.php b/src/index.php index 55054c9..9afb21b 100644 --- a/src/index.php +++ b/src/index.php @@ -338,6 +338,7 @@ ${job.started_at || '-'} + @@ -472,6 +473,31 @@ } } + async function recrawlJob(jobId, domain) { + if (!confirm('Job-Ergebnisse löschen und neu crawlen?')) return; + + const formData = new FormData(); + formData.append('job_id', jobId); + formData.append('domain', domain); + + try { + const response = await fetch('/api.php?action=recrawl', { + method: 'POST', + body: formData + }); + const data = await response.json(); + + if (data.success) { + loadJobs(); + alert('Recrawl gestartet! Job ID: ' + data.job_id); + } else { + alert('Fehler: ' + data.error); + } + } catch (e) { + alert('Fehler beim Recrawl: ' + e.message); + } + } + function switchTab(tab) { document.querySelectorAll('.tab').forEach(t => t.classList.remove('active')); document.querySelectorAll('.tab-content').forEach(c => c.classList.remove('active'));