Fix link extraction bug caused by type checking
The PHPStan fix inadvertently broke link extraction by using is_int() on $pageId, which failed when lastInsertId() or fetchColumn() returned a string instead of an int. Changes: - Convert $pageId to int explicitly after fetching - Use $pageId > 0 instead of is_int($pageId) for validation - Handle both 0 and '0' cases when fetching manually This ensures link extraction works again while maintaining type safety. Tests pass, PHPStan clean. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -152,14 +152,18 @@ class Crawler
|
|||||||
$pageId = $this->db->lastInsertId();
|
$pageId = $this->db->lastInsertId();
|
||||||
|
|
||||||
// If pageId is 0, fetch it manually
|
// If pageId is 0, fetch it manually
|
||||||
if ($pageId == 0) {
|
if ($pageId == 0 || $pageId === '0') {
|
||||||
$stmt = $this->db->prepare("SELECT id FROM pages WHERE crawl_job_id = ? AND url = ?");
|
$stmt = $this->db->prepare("SELECT id FROM pages WHERE crawl_job_id = ? AND url = ?");
|
||||||
$stmt->execute([$this->crawlJobId, $url]);
|
$stmt->execute([$this->crawlJobId, $url]);
|
||||||
$pageId = $stmt->fetchColumn();
|
$fetchedId = $stmt->fetchColumn();
|
||||||
|
$pageId = is_numeric($fetchedId) ? (int)$fetchedId : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ensure pageId is an integer
|
||||||
|
$pageId = is_numeric($pageId) ? (int)$pageId : 0;
|
||||||
|
|
||||||
// Extract and save links
|
// Extract and save links
|
||||||
if (str_contains($contentType, 'text/html') && is_int($pageId)) {
|
if (str_contains($contentType, 'text/html') && $pageId > 0) {
|
||||||
echo "Extracting links from: $url (pageId: $pageId)\n";
|
echo "Extracting links from: $url (pageId: $pageId)\n";
|
||||||
$this->extractLinks($domCrawler, $url, $pageId, $depth);
|
$this->extractLinks($domCrawler, $url, $pageId, $depth);
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
Reference in New Issue
Block a user