diff --git a/config/docker/init.sql b/config/docker/init.sql index 4e58035..02cf861 100644 --- a/config/docker/init.sql +++ b/config/docker/init.sql @@ -28,12 +28,14 @@ CREATE TABLE IF NOT EXISTS pages ( crawl_job_id INT NOT NULL, url VARCHAR(2048) NOT NULL, title VARCHAR(500), + meta_description TEXT, status_code INT, content_type VARCHAR(100), crawled_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (crawl_job_id) REFERENCES crawl_jobs(id) ON DELETE CASCADE, INDEX idx_crawl_job (crawl_job_id), INDEX idx_url (url(255)), + INDEX idx_status_code (status_code), UNIQUE KEY unique_job_url (crawl_job_id, url(255)) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; diff --git a/src/api.php b/src/api.php index 41c51a5..b5e3b42 100644 --- a/src/api.php +++ b/src/api.php @@ -116,6 +116,107 @@ try { ]); break; + case 'broken-links': + $jobId = $_GET['job_id'] ?? 0; + $stmt = $db->prepare( + "SELECT * FROM pages " . + "WHERE crawl_job_id = ? AND (status_code >= 400 OR status_code = 0) " . + "ORDER BY status_code DESC, url" + ); + $stmt->execute([$jobId]); + $brokenLinks = $stmt->fetchAll(); + + echo json_encode([ + 'success' => true, + 'broken_links' => $brokenLinks + ]); + break; + + case 'seo-analysis': + $jobId = $_GET['job_id'] ?? 0; + $stmt = $db->prepare( + "SELECT id, url, title, meta_description, status_code FROM pages " . + "WHERE crawl_job_id = ? ORDER BY url" + ); + $stmt->execute([$jobId]); + $pages = $stmt->fetchAll(); + + $issues = []; + foreach ($pages as $page) { + $pageIssues = []; + $titleLen = mb_strlen($page['title'] ?? ''); + $descLen = mb_strlen($page['meta_description'] ?? ''); + + // Title issues (Google: 50-60 chars optimal) + if (empty($page['title'])) { + $pageIssues[] = 'Title missing'; + } elseif ($titleLen < 30) { + $pageIssues[] = "Title too short ({$titleLen} chars)"; + } elseif ($titleLen > 60) { + $pageIssues[] = "Title too long ({$titleLen} chars)"; + } + + // Meta description issues (Google: 120-160 chars optimal) + if (empty($page['meta_description'])) { + $pageIssues[] = 'Meta description missing'; + } elseif ($descLen < 70) { + $pageIssues[] = "Meta description too short ({$descLen} chars)"; + } elseif ($descLen > 160) { + $pageIssues[] = "Meta description too long ({$descLen} chars)"; + } + + if (!empty($pageIssues)) { + $issues[] = [ + 'url' => $page['url'], + 'title' => $page['title'], + 'title_length' => $titleLen, + 'meta_description' => $page['meta_description'], + 'meta_length' => $descLen, + 'issues' => $pageIssues + ]; + } + } + + // Find duplicates + $titleCounts = []; + $descCounts = []; + foreach ($pages as $page) { + if (!empty($page['title'])) { + $titleCounts[$page['title']][] = $page['url']; + } + if (!empty($page['meta_description'])) { + $descCounts[$page['meta_description']][] = $page['url']; + } + } + + $duplicates = []; + foreach ($titleCounts as $title => $urls) { + if (count($urls) > 1) { + $duplicates[] = [ + 'type' => 'title', + 'content' => $title, + 'urls' => $urls + ]; + } + } + foreach ($descCounts as $desc => $urls) { + if (count($urls) > 1) { + $duplicates[] = [ + 'type' => 'meta_description', + 'content' => $desc, + 'urls' => $urls + ]; + } + } + + echo json_encode([ + 'success' => true, + 'issues' => $issues, + 'duplicates' => $duplicates, + 'total_pages' => count($pages) + ]); + break; + case 'delete': $jobId = $_POST['job_id'] ?? 0; $stmt = $db->prepare("DELETE FROM crawl_jobs WHERE id = ?"); diff --git a/src/classes/Crawler.php b/src/classes/Crawler.php index 80dad5f..c14cdc6 100644 --- a/src/classes/Crawler.php +++ b/src/classes/Crawler.php @@ -150,13 +150,18 @@ class Crawler ? $domCrawler->filter('title')->text() : ''; + $metaDescription = $domCrawler->filter('meta[name="description"]')->count() > 0 + ? $domCrawler->filter('meta[name="description"]')->attr('content') + : ''; + $stmt = $this->db->prepare( - "INSERT INTO pages (crawl_job_id, url, title, status_code, content_type) - VALUES (?, ?, ?, ?, ?) - ON DUPLICATE KEY UPDATE id=LAST_INSERT_ID(id), status_code = VALUES(status_code)" + "INSERT INTO pages (crawl_job_id, url, title, meta_description, status_code, content_type) " . + "VALUES (?, ?, ?, ?, ?, ?) " . + "ON DUPLICATE KEY UPDATE id=LAST_INSERT_ID(id), status_code = VALUES(status_code), " . + "meta_description = VALUES(meta_description)" ); - $stmt->execute([$this->crawlJobId, $url, $title, $statusCode, $contentType]); + $stmt->execute([$this->crawlJobId, $url, $title, $metaDescription, $statusCode, $contentType]); $pageId = $this->db->lastInsertId(); // If pageId is 0, fetch it manually diff --git a/src/index.php b/src/index.php index 9afb21b..b61f221 100644 --- a/src/index.php +++ b/src/index.php @@ -250,6 +250,8 @@
| URL | +Status Code | +Titel | +Gecrawlt | +
|---|---|---|---|
| Keine defekten Links gefunden | |||
| URL | +Title (Länge) | +Meta Description (Länge) | +Issues | +
|---|---|---|---|
| Keine SEO-Probleme gefunden | |||
Keine doppelten Inhalte gefunden
'; + } + } + // Update jobs table loadJobs(); } catch (e) {