Add recrawl functionality and fix PHPCS warnings
- Added "Recrawl" button in jobs table UI - Implemented recrawl API endpoint that deletes all job data and restarts crawl - Fixed PHPCS line length warnings in api.php and Crawler.php All quality checks pass: - PHPStan Level 8: 0 errors - PHPCS PSR-12: 0 warnings 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
36
src/api.php
36
src/api.php
@@ -127,6 +127,42 @@ try {
|
||||
]);
|
||||
break;
|
||||
|
||||
case 'recrawl':
|
||||
$jobId = $_POST['job_id'] ?? 0;
|
||||
$domain = $_POST['domain'] ?? '';
|
||||
|
||||
if (empty($domain)) {
|
||||
throw new Exception('Domain is required');
|
||||
}
|
||||
|
||||
// Delete all related data for this job
|
||||
$stmt = $db->prepare("DELETE FROM crawl_queue WHERE crawl_job_id = ?");
|
||||
$stmt->execute([$jobId]);
|
||||
|
||||
$stmt = $db->prepare("DELETE FROM links WHERE crawl_job_id = ?");
|
||||
$stmt->execute([$jobId]);
|
||||
|
||||
$stmt = $db->prepare("DELETE FROM pages WHERE crawl_job_id = ?");
|
||||
$stmt->execute([$jobId]);
|
||||
|
||||
// Reset job status
|
||||
$stmt = $db->prepare(
|
||||
"UPDATE crawl_jobs SET status = 'pending', total_pages = 0, total_links = 0, " .
|
||||
"started_at = NULL, completed_at = NULL WHERE id = ?"
|
||||
);
|
||||
$stmt->execute([$jobId]);
|
||||
|
||||
// Start crawling in background
|
||||
$cmd = "php " . __DIR__ . "/crawler-worker.php $jobId > /dev/null 2>&1 &";
|
||||
exec($cmd);
|
||||
|
||||
echo json_encode([
|
||||
'success' => true,
|
||||
'job_id' => $jobId,
|
||||
'message' => 'Recrawl started'
|
||||
]);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new Exception('Invalid action');
|
||||
}
|
||||
|
||||
@@ -211,8 +211,8 @@ class Crawler
|
||||
|
||||
// Save link
|
||||
$stmt = $this->db->prepare(
|
||||
"INSERT INTO links (page_id, crawl_job_id, source_url, target_url, link_text, is_nofollow, is_internal)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)"
|
||||
"INSERT INTO links (page_id, crawl_job_id, source_url, target_url, " .
|
||||
"link_text, is_nofollow, is_internal) VALUES (?, ?, ?, ?, ?, ?, ?)"
|
||||
);
|
||||
$stmt->execute([
|
||||
$pageId,
|
||||
|
||||
@@ -338,6 +338,7 @@
|
||||
<td>${job.started_at || '-'}</td>
|
||||
<td>
|
||||
<button class="action-btn" onclick="viewJob(${job.id})">Ansehen</button>
|
||||
<button class="action-btn" onclick="recrawlJob(${job.id}, '${job.domain}')">Recrawl</button>
|
||||
<button class="action-btn" onclick="deleteJob(${job.id})">Löschen</button>
|
||||
</td>
|
||||
</tr>
|
||||
@@ -472,6 +473,31 @@
|
||||
}
|
||||
}
|
||||
|
||||
async function recrawlJob(jobId, domain) {
|
||||
if (!confirm('Job-Ergebnisse löschen und neu crawlen?')) return;
|
||||
|
||||
const formData = new FormData();
|
||||
formData.append('job_id', jobId);
|
||||
formData.append('domain', domain);
|
||||
|
||||
try {
|
||||
const response = await fetch('/api.php?action=recrawl', {
|
||||
method: 'POST',
|
||||
body: formData
|
||||
});
|
||||
const data = await response.json();
|
||||
|
||||
if (data.success) {
|
||||
loadJobs();
|
||||
alert('Recrawl gestartet! Job ID: ' + data.job_id);
|
||||
} else {
|
||||
alert('Fehler: ' + data.error);
|
||||
}
|
||||
} catch (e) {
|
||||
alert('Fehler beim Recrawl: ' + e.message);
|
||||
}
|
||||
}
|
||||
|
||||
function switchTab(tab) {
|
||||
document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
|
||||
document.querySelectorAll('.tab-content').forEach(c => c.classList.remove('active'));
|
||||
|
||||
Reference in New Issue
Block a user