From 4e868ca8e995ee7c6bf8caec1f6aabb2463f04a9 Mon Sep 17 00:00:00 2001 From: Martin Date: Fri, 3 Oct 2025 20:22:17 +0200 Subject: [PATCH] Sonstiges --- index.php | 12 +- webanalyse.php | 639 +++++++++++++++++++++++++++---------------------- 2 files changed, 361 insertions(+), 290 deletions(-) diff --git a/index.php b/index.php index deb262d..49d6b3f 100644 --- a/index.php +++ b/index.php @@ -1,13 +1,11 @@ doCrawl(1); +$wa = new WebAnalyse(); +$db = mysqli_connect('localhost', 'root', '', 'screaming_frog'); +$wa->doCrawl(1); diff --git a/webanalyse.php b/webanalyse.php index e2fd76b..b3be88d 100644 --- a/webanalyse.php +++ b/webanalyse.php @@ -1,88 +1,53 @@ db = mysqli_connect("localhost", "root", "", "screaming_frog"); + $connection ??= mysqli_connect('localhost', 'root', '', 'screaming_frog'); + + if (!$connection instanceof mysqli) { + throw new RuntimeException('Verbindung zur Datenbank konnte nicht hergestellt werden: ' . mysqli_connect_error()); + } + + $connection->set_charset('utf8mb4'); + $this->db = $connection; } - /** - * Holt eine einzelne URL via cURL und liefert Response-Metadaten. + * Holt eine einzelne URL und gibt Response-Metadaten zurueck. * * @param string $url Zieladresse fuer den Abruf. * @return array Antwortdaten oder ein "error"-Schluessel. */ - function getWebsite($url) + public function getWebsite(string $url): array { - // cURL-Session initialisieren - $ch = curl_init(); + $handle = $this->createCurlHandle($url); + $response = curl_exec($handle); - // cURL-Optionen setzen - curl_setopt($ch, CURLOPT_URL, $url); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); // Antwort als String zurückgeben - curl_setopt($ch, CURLOPT_HEADER, true); // Header in der Antwort einschließen - curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); // Weiterleitungen folgen - curl_setopt($ch, CURLOPT_TIMEOUT, 30); // Timeout nach 30 Sekunden - curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'); // User Agent setzen - curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // SSL-Zertifikat nicht prüfen (nur für Tests) - - // Anfrage ausführen - $response = curl_exec($ch); - - // Fehler überprüfen - if (curl_errno($ch)) { - $error = curl_error($ch); - curl_close($ch); + if ($response === false) { + $error = curl_error($handle); + curl_close($handle); return ['error' => $error]; } - // Informationen abrufen - $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); - $headerSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE); - $totalTime = curl_getinfo($ch, CURLINFO_TOTAL_TIME); - $effectiveUrl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); + $info = curl_getinfo($handle); + curl_close($handle); - // cURL-Session schließen - curl_close($ch); - - // Header und Body trennen - $headers = substr($response, 0, $headerSize); - $body = substr($response, $headerSize); - - // Header in Array umwandeln - $headerLines = explode("\r\n", trim($headers)); - $parsedHeaders = []; - - foreach ($headerLines as $line) { - if (strpos($line, ':') !== false) { - list($key, $value) = explode(':', $line, 2); - $parsedHeaders[trim($key)] = trim($value); - } - } - - return [ - 'url' => $effectiveUrl, - 'status_code' => $httpCode, - // 'headers_raw' => $headers, - 'headers_parsed' => $parsedHeaders, - 'body' => $body, - 'response_time' => $totalTime, - 'body_size' => strlen($body) - ]; + return $this->buildResponsePayload($response, $info); } /** @@ -91,232 +56,202 @@ class webanalyse * @param array $urls Liste von Ziel-URLs. * @return array> Antworten je URL. */ - function getMultipleWebsites($urls) + public function getMultipleWebsites(array $urls): array { + if ($urls === []) { + return []; + } $results = []; - $curlHandles = []; $multiHandle = curl_multi_init(); + $handles = []; - // Einzelne cURL-Handles für jede URL erstellen foreach ($urls as $url) { - $ch = curl_init(); - - // cURL-Optionen setzen (gleich wie bei getWebsite) - curl_setopt($ch, CURLOPT_URL, $url); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - curl_setopt($ch, CURLOPT_HEADER, true); - curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); - curl_setopt($ch, CURLOPT_TIMEOUT, 30); - curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'); - curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); - - // Handle zum Multi-Handle hinzufügen - curl_multi_add_handle($multiHandle, $ch); - $curlHandles[$url] = $ch; + $handle = $this->createCurlHandle($url); + $handles[$url] = $handle; + curl_multi_add_handle($multiHandle, $handle); } - // Alle Anfragen parallel ausführen $running = null; do { - curl_multi_exec($multiHandle, $running); - curl_multi_select($multiHandle); - } while ($running > 0); + $status = curl_multi_exec($multiHandle, $running); + } while ($status === CURLM_CALL_MULTI_PERFORM); - - // Ergebnisse verarbeiten - foreach ($urls as $url) { - $ch = $curlHandles[$url]; - $response = curl_multi_getcontent($ch); - - // Fehler überprüfen - if (curl_errno($ch)) { - $error = curl_error($ch); - $results[$url] = ['error' => $error]; - } else { - // Informationen abrufen - $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); - $headerSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE); - $totalTime = curl_getinfo($ch, CURLINFO_TOTAL_TIME); - $effectiveUrl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); - - // Header und Body trennen - $headers = substr($response, 0, $headerSize); - $body = substr($response, $headerSize); - - // Header in Array umwandeln - $headerLines = explode("\r\n", trim($headers)); - $parsedHeaders = []; - - foreach ($headerLines as $line) { - if (strpos($line, ':') !== false) { - list($key, $value) = explode(':', $line, 2); - $parsedHeaders[trim($key)] = trim($value); - } - } - - $results[$url] = [ - 'url' => $effectiveUrl, - 'status_code' => $httpCode, - 'headers_parsed' => $parsedHeaders, - 'body' => $body, - 'response_time' => $totalTime, - 'body_size' => strlen($body) - ]; + while ($running && $status === CURLM_OK) { + if (curl_multi_select($multiHandle, 1.0) === -1) { + usleep(100000); } - // Handle aus Multi-Handle entfernen und schließen - curl_multi_remove_handle($multiHandle, $ch); - curl_close($ch); + do { + $status = curl_multi_exec($multiHandle, $running); + } while ($status === CURLM_CALL_MULTI_PERFORM); + } + + foreach ($handles as $url => $handle) { + $response = curl_multi_getcontent($handle); + + if ($response === false) { + $results[$url] = ['error' => curl_error($handle)]; + } else { + $results[$url] = $this->buildResponsePayload($response, curl_getinfo($handle)); + } + + curl_multi_remove_handle($multiHandle, $handle); + curl_close($handle); } - // Multi-Handle schließen curl_multi_close($multiHandle); return $results; } - - - /** * Persistiert Response-Daten und stoesst die Analyse der gefundenen Links an. * * @param int $crawlID Identifier der Crawl-Session. * @param string $url Ursprung-URL, deren Antwort verarbeitet wird. * @param array $data Ergebnis der HTTP-Abfrage. - * @return void */ - function processResults(int $crawlID, string $url, array $data) + public function processResults(int $crawlID, string $url, array $data): void { - if (!isset($data['error'])) { - $status_code = $data['status_code']; - $response_time = $data['response_time']; - $body_size = $data['body_size']; - $date = date('Y-m-d H:i:s'); - $body = $data['body']; - - $sql = "UPDATE urls SET - status_code = " . $status_code . ", - response_time = " . ($response_time * 1000) . ", - body_size = " . $body_size . ", - date = now(), - body = '" . $this->db->real_escape_string($body) . "' - - WHERE url = '" . $this->db->real_escape_string($url) . "' AND crawl_id = " . $crawlID . " LIMIT 1"; - // echo $sql; - - $this->db->query($sql); - } else { - // Handle error case if needed - echo "Fehler bei der Analyse von $url: " . $data['error'] . "\n"; + if (isset($data['error'])) { + error_log(sprintf('Fehler bei der Analyse von %s: %s', $url, $data['error'])); + return; } + $body = (string)($data['body'] ?? ''); + + $update = $this->db->prepare( + 'UPDATE urls + SET status_code = ?, response_time = ?, body_size = ?, date = NOW(), body = ? + WHERE url = ? AND crawl_id = ? + LIMIT 1' + ); + + if ($update === false) { + throw new RuntimeException('Update-Statement konnte nicht vorbereitet werden: ' . $this->db->error); + } + + $statusCode = (int)($data['status_code'] ?? 0); + $responseTimeMs = (int)round(((float)($data['response_time'] ?? 0)) * 1000); + $bodySize = (int)($data['body_size'] ?? strlen($body)); + + $update->bind_param('iiissi', $statusCode, $responseTimeMs, $bodySize, $body, $url, $crawlID); + $update->execute(); + $update->close(); + $this->findNewUrls($crawlID, $body, $url); } - /** * Extrahiert Links aus einer Antwort und legt neue URL-Datensaetze an. * * @param int $crawlID Identifier der Crawl-Session. * @param string $body HTML-Koerper der Antwort. * @param string $url Bearbeitete URL, dient als Kontext fuer relative Links. - * @return void */ - function findNewUrls(int $crawlID, string $body, string $url) { - - - - - $links = $this->extractLinks($body, $url); - - $temp = $this->db->query("select id from urls where url = '".$this->db->real_escape_string($url)."' and crawl_id = ".$crawlID." LIMIT 1")->fetch_all(MYSQLI_ASSOC); - $vonUrlId = $temp[0]['id']; - - - $this->db->query("delete from links where von = ".$vonUrlId); - - foreach($links as $l) { - - $u = $this->db->query("insert ignore into urls (url, crawl_id) values ('".$this->db->real_escape_string($l['absolute_url'])."',".$crawlID.")"); - $id = $this->db->insert_id; - if ($id === 0) { - $qwer = $this->db->query("select id from urls where url = '".$this->db->real_escape_string($l['absolute_url'])."' and crawl_id = ".$crawlID." LIMIT 1")->fetch_all(MYSQLI_ASSOC); - $id = $qwer[0]['id']; - } - - - - - - $sql_links = "insert ignore into links (von, nach, linktext, dofollow) values ( - ".$vonUrlId.", - ".$id.", - - - '".$this->db->real_escape_string(mb_convert_encoding($l['text'],"UTF-8"))."', - ".(strstr($l['rel']??"", 'nofollow') === false ? 1 : 0)." - - - )"; - - echo $sql_links; - - $u = $this->db->query($sql_links); - - - + public function findNewUrls(int $crawlID, string $body, string $url): void + { + if ($body === '') { + return; } + $links = $this->extractLinks($body, $url); + if ($links === []) { + return; + } + $originId = $this->resolveUrlId($crawlID, $url); + if ($originId === null) { + return; + } - print_r($links); + $deleteLinksStmt = $this->db->prepare('DELETE FROM links WHERE von = ?'); + if ($deleteLinksStmt !== false) { + $deleteLinksStmt->bind_param('i', $originId); + $deleteLinksStmt->execute(); + $deleteLinksStmt->close(); + } + $insertUrlStmt = $this->db->prepare('INSERT IGNORE INTO urls (url, crawl_id) VALUES (?, ?)'); + $selectUrlStmt = $this->db->prepare('SELECT id FROM urls WHERE url = ? AND crawl_id = ? LIMIT 1'); + $insertLinkStmt = $this->db->prepare('INSERT IGNORE INTO links (von, nach, linktext, dofollow) VALUES (?, ?, ?, ?)'); + if (!$insertUrlStmt || !$selectUrlStmt || !$insertLinkStmt) { + throw new RuntimeException('Vorbereitete Statements konnten nicht erstellt werden: ' . $this->db->error); + } + + foreach ($links as $link) { + $absoluteUrl = (string)$link['absolute_url']; + + $insertUrlStmt->bind_param('si', $absoluteUrl, $crawlID); + $insertUrlStmt->execute(); + + $targetId = $this->db->insert_id; + if ($targetId === 0) { + $selectUrlStmt->bind_param('si', $absoluteUrl, $crawlID); + $selectUrlStmt->execute(); + $result = $selectUrlStmt->get_result(); + $targetId = $result ? (int)($result->fetch_assoc()['id'] ?? 0) : 0; + } + + if ($targetId === 0) { + continue; + } + + $linkText = $this->normaliseText((string)($link['text'] ?? '')); + $isFollow = (int)(strpos((string)($link['rel'] ?? ''), 'nofollow') !== false ? 0 : 1); + + $insertLinkStmt->bind_param('iisi', $originId, $targetId, $linkText, $isFollow); + $insertLinkStmt->execute(); + } + + $insertUrlStmt->close(); + $selectUrlStmt->close(); + $insertLinkStmt->close(); } - /** * Startet einen Crawl-Durchlauf fuer unbehandelte URLs. * * @param int $crawlID Identifier der Crawl-Session. - * @return void */ - function doCrawl(int $crawlID) + public function doCrawl(int $crawlID): void { + $statement = $this->db->prepare( + 'SELECT url FROM urls WHERE crawl_id = ? AND date IS NULL LIMIT 50' + ); - $urls2toCrawl = $this->db->query("select * from urls where crawl_id = " . $crawlID . " and date is null LIMIT 2")->fetch_all(MYSQLI_ASSOC); // and date is not null - - - $urls = []; - foreach ($urls2toCrawl as $u) { - $urls[] = $u['url']; + if ($statement === false) { + return; } - $multipleResults = $this->getMultipleWebsites($urls); + $statement->bind_param('i', $crawlID); + $statement->execute(); + $result = $statement->get_result(); - // print_r($multipleResults); - foreach ($multipleResults as $url => $data) { + if (!$result instanceof mysqli_result) { + $statement->close(); + return; + } + $urls = []; + while ($row = $result->fetch_assoc()) { + $urls[] = $row['url']; + } + + $result->free(); + $statement->close(); + + if ($urls === []) { + return; + } + + foreach ($this->getMultipleWebsites($urls) as $url => $data) { $this->processResults($crawlID, $url, $data); } } - - - - - - - - - - - - - - /** * Parst HTML-Inhalt und liefert eine strukturierte Liste gefundener Links. * @@ -324,49 +259,40 @@ class webanalyse * @param string $baseUrl Basis-URL fuer die Aufloesung relativer Pfade. * @return array> Gesammelte Linkdaten. */ - function extractLinks($html, $baseUrl = '') + public function extractLinks(string $html, string $baseUrl = ''): array { $links = []; - // DOMDocument erstellen und HTML laden $dom = new DOMDocument(); - - // Fehlerbehandlung für ungültiges HTML - libxml_use_internal_errors(true); + $previous = libxml_use_internal_errors(true); $dom->loadHTML('' . $html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); libxml_clear_errors(); + libxml_use_internal_errors($previous); - // Alle Tags finden - $aTags = $dom->getElementsByTagName('a'); + foreach ($dom->getElementsByTagName('a') as $index => $aTag) { + $href = trim($aTag->getAttribute('href')); + if ($href === '') { + continue; + } - foreach ($aTags as $index => $aTag) { - $href = $aTag->getAttribute('href'); - $text = trim($aTag->textContent); + $absoluteUrl = $this->resolveUrl($href, $baseUrl); + $text = $this->normaliseText(trim($aTag->textContent)); $rel = $aTag->getAttribute('rel'); $title = $aTag->getAttribute('title'); $target = $aTag->getAttribute('target'); - // Nur Links mit href-Attribut - if (!empty($href)) { - // Relative URLs zu absoluten URLs konvertieren - $absoluteUrl = $href; - if (!empty($baseUrl) && !preg_match('/^https?:\/\//', $href)) { - $absoluteUrl = rtrim($baseUrl, '/') . '/' . ltrim($href, '/'); - } - - $links[] = [ - 'index' => $index + 1, - 'href' => $href, - 'absolute_url' => $absoluteUrl, - 'text' => $text, - 'rel' => $rel ?: null, - 'title' => $title ?: null, - 'target' => $target ?: null, - 'is_external' => $this->isExternalLink($href, $baseUrl), - 'link_type' => $this->getLinkType($href), - 'is_internal' => $this->isInternalLink($href, $baseUrl)?1:0 - ]; - } + $links[] = [ + 'index' => $index + 1, + 'href' => $href, + 'absolute_url' => $absoluteUrl, + 'text' => $text, + 'rel' => $rel !== '' ? $rel : null, + 'title' => $title !== '' ? $title : null, + 'target' => $target !== '' ? $target : null, + 'is_external' => $this->isExternalLink($absoluteUrl, $baseUrl), + 'link_type' => $this->getLinkType($href), + 'is_internal' => $this->isInternalLink($absoluteUrl, $baseUrl) ? 1 : 0, + ]; } return $links; @@ -379,19 +305,20 @@ class webanalyse * @param string $baseUrl Ausgangsadresse zur Domainabgleichung. * @return bool|null True fuer extern, false fuer intern, null falls undefiniert. */ - private function isExternalLink($href, $baseUrl) + private function isExternalLink(string $href, string $baseUrl): ?bool { - if (empty($baseUrl)) return null; - - // Relative Links sind intern - if (!preg_match('/^https?:\/\//', $href)) { - return false; + if ($baseUrl === '') { + return null; } $baseDomain = parse_url($baseUrl, PHP_URL_HOST); $linkDomain = parse_url($href, PHP_URL_HOST); - return $baseDomain !== $linkDomain; + if ($baseDomain === null || $linkDomain === null) { + return null; + } + + return !hash_equals($baseDomain, $linkDomain); } /** @@ -401,19 +328,20 @@ class webanalyse * @param string $baseUrl Ausgangsadresse zur Domainabgleichung. * @return bool|null True fuer intern, false fuer extern, null falls undefiniert. */ - private function isInternalLink($href, $baseUrl) + private function isInternalLink(string $href, string $baseUrl): ?bool { - if (empty($baseUrl)) return null; - - // Relative Links sind intern - if (!preg_match('/^https?:\/\//', $href)) { - return true; + if ($baseUrl === '') { + return null; } $baseDomain = parse_url($baseUrl, PHP_URL_HOST); $linkDomain = parse_url($href, PHP_URL_HOST); - return $baseDomain === $linkDomain; + if ($baseDomain === null || $linkDomain === null) { + return null; + } + + return hash_equals($baseDomain, $linkDomain); } /** @@ -422,36 +350,181 @@ class webanalyse * @param string $href Ziel des Links. * @return string Beschreibender Typ wie "absolute" oder "email". */ - private function getLinkType($href) + private function getLinkType(string $href): string { - if (empty($href)) return 'empty'; - if (strpos($href, 'mailto:') === 0) return 'email'; - if (strpos($href, 'tel:') === 0) return 'phone'; - if (strpos($href, '#') === 0) return 'anchor'; - if (strpos($href, 'javascript:') === 0) return 'javascript'; - if (preg_match('/^https?:\/\//', $href)) return 'absolute'; + if ($href === '') { + return 'empty'; + } + + $lower = strtolower($href); + if (strpos($lower, 'mailto:') === 0) { + return 'email'; + } + if (strpos($lower, 'tel:') === 0) { + return 'phone'; + } + if (strpos($lower, '#') === 0) { + return 'anchor'; + } + if (strpos($lower, 'javascript:') === 0) { + return 'javascript'; + } + if (filter_var($href, FILTER_VALIDATE_URL)) { + return 'absolute'; + } + return 'relative'; } - /** * Gruppiert Links anhand ihres vorab bestimmten Typs. * * @param array> $links Liste der extrahierten Links. * @return array>> Links nach Typ gruppiert. */ - function groupLinksByType($links) + public function groupLinksByType(array $links): array { $grouped = []; foreach ($links as $link) { - $type = $link['link_type']; - if (!isset($grouped[$type])) { - $grouped[$type] = []; - } + $type = (string)($link['link_type'] ?? 'unknown'); $grouped[$type][] = $link; } return $grouped; } + + /** + * Erstellt ein konfiguriertes Curl-Handle fuer einen Request. + * + * @return CurlHandle + */ + private function createCurlHandle(string $url) + { + $handle = curl_init($url); + if ($handle === false) { + throw new RuntimeException('Konnte Curl-Handle nicht initialisieren: ' . $url); + } + + curl_setopt_array($handle, [ + CURLOPT_URL => $url, + CURLOPT_RETURNTRANSFER => true, + CURLOPT_HEADER => true, + CURLOPT_FOLLOWLOCATION => true, + CURLOPT_TIMEOUT => self::CURL_TIMEOUT, + CURLOPT_USERAGENT => self::USER_AGENT, + CURLOPT_SSL_VERIFYPEER => false, + ]); + + return $handle; + } + + /** + * Splittet Header und Body und bereitet das Antwort-Array auf. + * + * @param string $response Vollstaendige Response inkl. Header. + * @param array $info curl_getinfo Ergebnis. + * @return array + */ + private function buildResponsePayload(string $response, array $info): array + { + $headerSize = (int)($info['header_size'] ?? 0); + $headers = substr($response, 0, $headerSize); + $body = substr($response, $headerSize); + + return [ + 'url' => $info['url'] ?? ($info['redirect_url'] ?? ''), + 'status_code' => (int)($info['http_code'] ?? 0), + 'headers_parsed' => $this->parseHeaders($headers), + 'body' => $body, + 'response_time' => (float)($info['total_time'] ?? 0.0), + 'body_size' => strlen($body), + ]; + } + + /** + * Wandelt Header-String in ein assoziatives Array um. + * + * @param string $headers Roh-Header. + * @return array + */ + private function parseHeaders(string $headers): array + { + $parsed = []; + foreach (preg_split('/\r?\n/', trim($headers)) as $line) { + if ($line === '' || strpos($line, ':') === false) { + continue; + } + + [$key, $value] = explode(':', $line, 2); + $parsed[trim($key)] = trim($value); + } + + return $parsed; + } + + /** + * Normalisiert relativen Pfad gegenueber einer Basis-URL zu einer absoluten Adresse. + */ + private function resolveUrl(string $href, string $baseUrl): string + { + if ($href === '' || filter_var($href, FILTER_VALIDATE_URL)) { + return $href; + } + + if ($baseUrl === '') { + return $href; + } + + $baseParts = parse_url($baseUrl); + if ($baseParts === false || !isset($baseParts['scheme'], $baseParts['host'])) { + return $href; + } + + $scheme = $baseParts['scheme']; + $host = $baseParts['host']; + $port = isset($baseParts['port']) ? ':' . $baseParts['port'] : ''; + $basePath = $baseParts['path'] ?? '/'; + + if (strpos($href, '/') === 0) { + $path = $href; + } else { + if (substr($basePath, -1) !== '/') { + $basePath = preg_replace('#/[^/]*$#', '/', $basePath) ?: '/'; + } + $path = $basePath . $href; + } + + return sprintf('%s://%s%s%s', $scheme, $host, $port, '/' . ltrim($path, '/')); + } + + /** + * Sorgt fuer sauberen UTF-8 Text ohne Steuerzeichen. + */ + private function normaliseText(string $text): string + { + $normalized = preg_replace('/\s+/u', ' ', $text) ?? ''; + $encoding = mb_detect_encoding($normalized, ['UTF-8', 'ISO-8859-1', 'Windows-1252'], true) ?: 'UTF-8'; + + return trim(mb_convert_encoding($normalized, 'UTF-8', $encoding)); + } + + /** + * Ermittelt die ID einer URL innerhalb eines Crawl-Durchlaufs. + */ + private function resolveUrlId(int $crawlID, string $url): ?int + { + $statement = $this->db->prepare('SELECT id FROM urls WHERE url = ? AND crawl_id = ? LIMIT 1'); + if ($statement === false) { + return null; + } + + $statement->bind_param('si', $url, $crawlID); + $statement->execute(); + $result = $statement->get_result(); + $id = $result ? $result->fetch_assoc()['id'] ?? null : null; + $statement->close(); + + return $id !== null ? (int)$id : null; + } }