Reorganize Docker-related files into docker/ directory

Moved Docker infrastructure files to dedicated docker/ folder:
- config/nginx/default.conf → docker/config/nginx/default.conf
- init.sql → docker/init.sql
- start.sh → docker/start.sh (currently unused)

Updated:
- docker-compose.yml: Adjusted volume paths
- README.md: Updated project structure documentation

Benefits:
- Clear separation between infrastructure (docker/) and application (src/)
- Better project organization
- Easier to understand for new developers

Docker Compose and Dockerfile remain in root for convenience.
All services tested and working correctly.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-10-04 08:31:47 +02:00
parent daa76b2141
commit de4d2e53d9
5 changed files with 19 additions and 10 deletions

View File

@@ -0,0 +1,31 @@
server {
listen 80;
server_name localhost;
root /var/www/html;
index index.php index.html;
error_log /var/log/nginx/error.log;
access_log /var/log/nginx/access.log;
location / {
try_files $uri $uri/ /index.php?$query_string;
}
location ~ \.php$ {
try_files $uri =404;
fastcgi_split_path_info ^(.+\.php)(/.+)$;
fastcgi_pass localhost:9000;
fastcgi_index index.php;
include fastcgi_params;
fastcgi_param SCRIPT_FILENAME $document_root$fastcgi_script_name;
fastcgi_param PATH_INFO $fastcgi_path_info;
}
location ~ /\.ht {
deny all;
}
location ~ /\.git {
deny all;
}
}

66
docker/init.sql Normal file
View File

@@ -0,0 +1,66 @@
-- Database initialization script for Web Crawler
-- Crawl Jobs Table
CREATE TABLE IF NOT EXISTS crawl_jobs (
id INT AUTO_INCREMENT PRIMARY KEY,
domain VARCHAR(255) NOT NULL,
status ENUM('pending', 'running', 'completed', 'failed') DEFAULT 'pending',
total_pages INT DEFAULT 0,
total_links INT DEFAULT 0,
started_at TIMESTAMP NULL,
completed_at TIMESTAMP NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
INDEX idx_domain (domain),
INDEX idx_status (status)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- Pages Table
CREATE TABLE IF NOT EXISTS pages (
id INT AUTO_INCREMENT PRIMARY KEY,
crawl_job_id INT NOT NULL,
url VARCHAR(2048) NOT NULL,
title VARCHAR(500),
status_code INT,
content_type VARCHAR(100),
crawled_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (crawl_job_id) REFERENCES crawl_jobs(id) ON DELETE CASCADE,
INDEX idx_crawl_job (crawl_job_id),
INDEX idx_url (url(255)),
UNIQUE KEY unique_job_url (crawl_job_id, url(255))
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- Links Table
CREATE TABLE IF NOT EXISTS links (
id INT AUTO_INCREMENT PRIMARY KEY,
page_id INT NOT NULL,
crawl_job_id INT NOT NULL,
source_url VARCHAR(2048) NOT NULL,
target_url VARCHAR(2048) NOT NULL,
link_text VARCHAR(1000),
is_nofollow BOOLEAN DEFAULT FALSE,
is_internal BOOLEAN DEFAULT TRUE,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (page_id) REFERENCES pages(id) ON DELETE CASCADE,
FOREIGN KEY (crawl_job_id) REFERENCES crawl_jobs(id) ON DELETE CASCADE,
INDEX idx_page (page_id),
INDEX idx_crawl_job (crawl_job_id),
INDEX idx_source_url (source_url(255)),
INDEX idx_target_url (target_url(255)),
INDEX idx_nofollow (is_nofollow)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- Queue Table for parallel processing
CREATE TABLE IF NOT EXISTS crawl_queue (
id INT AUTO_INCREMENT PRIMARY KEY,
crawl_job_id INT NOT NULL,
url VARCHAR(2048) NOT NULL,
depth INT DEFAULT 0,
status ENUM('pending', 'processing', 'completed', 'failed') DEFAULT 'pending',
retry_count INT DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
processed_at TIMESTAMP NULL,
FOREIGN KEY (crawl_job_id) REFERENCES crawl_jobs(id) ON DELETE CASCADE,
INDEX idx_status (status),
INDEX idx_crawl_job (crawl_job_id),
UNIQUE KEY unique_job_url (crawl_job_id, url(255))
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;

7
docker/start.sh Normal file
View File

@@ -0,0 +1,7 @@
#!/bin/bash
# Start PHP-FPM
php-fpm -D
# Start Nginx in foreground
nginx -g 'daemon off;'