From 09d5b617794096a0c060cbfe897cd208df9e3028 Mon Sep 17 00:00:00 2001 From: Martin Date: Sat, 4 Oct 2025 08:18:52 +0200 Subject: [PATCH] Fix link extraction bug caused by type checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PHPStan fix inadvertently broke link extraction by using is_int() on $pageId, which failed when lastInsertId() or fetchColumn() returned a string instead of an int. Changes: - Convert $pageId to int explicitly after fetching - Use $pageId > 0 instead of is_int($pageId) for validation - Handle both 0 and '0' cases when fetching manually This ensures link extraction works again while maintaining type safety. Tests pass, PHPStan clean. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/classes/Crawler.php | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/classes/Crawler.php b/src/classes/Crawler.php index 548a19f..237ca63 100644 --- a/src/classes/Crawler.php +++ b/src/classes/Crawler.php @@ -152,14 +152,18 @@ class Crawler $pageId = $this->db->lastInsertId(); // If pageId is 0, fetch it manually - if ($pageId == 0) { + if ($pageId == 0 || $pageId === '0') { $stmt = $this->db->prepare("SELECT id FROM pages WHERE crawl_job_id = ? AND url = ?"); $stmt->execute([$this->crawlJobId, $url]); - $pageId = $stmt->fetchColumn(); + $fetchedId = $stmt->fetchColumn(); + $pageId = is_numeric($fetchedId) ? (int)$fetchedId : 0; } + // Ensure pageId is an integer + $pageId = is_numeric($pageId) ? (int)$pageId : 0; + // Extract and save links - if (str_contains($contentType, 'text/html') && is_int($pageId)) { + if (str_contains($contentType, 'text/html') && $pageId > 0) { echo "Extracting links from: $url (pageId: $pageId)\n"; $this->extractLinks($domCrawler, $url, $pageId, $depth); } else {