<?php
require_once __DIR__ . '/../helpers/logger.php';
require_once __DIR__ . '/../models/AuditModel.php';
require_once __DIR__ . '/../models/UrlModel.php';
require_once __DIR__ . '/../models/PageModel.php';
require_once __DIR__ . '/Discover.php';
require_once __DIR__ . '/Fetcher.php';
require_once __DIR__ . '/Parser.php';

class Runner {
  public static function runOneBatch(mysqli $db): array {
    $auditModel = new AuditModel($db);
    $row = $auditModel->nextRunnableAudit();
    if (!$row) {
      return ['ok' => true, 'message' => 'No audits to process'];
    }

    $auditId = (int)$row['id'];
    $settings = json_decode($row['settings'] ?? '{}', true) ?: [];
    $startUrl = (string)$row['start_url'];
    $status = (string)$row['status'];

    // Discovery stage (once)
    if ($status === 'queued' || $status === 'discovering') {
      $auditModel->setStatus($auditId, 'discovering');
      Discover::run($db, $auditId, $startUrl, $settings);
      $auditModel->touchStarted($auditId);
      $auditModel->setStatus($auditId, 'running');
    }

    $urlModel = new UrlModel($db);
    $pageModel = new PageModel($db);

    $batchSize = (int)($settings['batch_size'] ?? 15);
    $items = $urlModel->nextBatch($auditId, $batchSize);

    if (empty($items)) {
      // No more queued URLs
      $counts = $urlModel->counts($auditId);
      if ((int)$counts['queued'] === 0 && (int)$counts['fetching'] === 0) {
        $auditModel->setStatus($auditId, 'evaluating');
        $auditModel->touchFinished($auditId);
        $auditModel->setStatus($auditId, 'done');
        return ['ok' => true, 'message' => 'Audit finished (Phase 1: crawl complete)', 'audit_id' => $auditId];
      }
      return ['ok' => true, 'message' => 'Nothing to crawl right now', 'audit_id' => $auditId];
    }

    foreach ($items as $it) {
      $urlId = (int)$it['id'];
      $url = (string)$it['url'];

      $urlModel->setState($urlId, 'fetching', ['updated_at' => date('Y-m-d H:i:s')]);

      // Respect crawl delay
      $delayMs = (int)($settings['crawl_delay_ms'] ?? 300);
      if ($delayMs > 0) {
        usleep($delayMs * 1000);
      }

      $res = Fetcher::fetch($url, $settings);

      $httpCode = (int)($res['http_code'] ?? 0);
      $finalUrl = (string)($res['final_url'] ?? $url);
      $hops = (int)($res['redirect_hops'] ?? 0);
      $ctype = (string)($res['content_type'] ?? '');
      $loadMs = (int)($res['load_time_ms'] ?? 0);
      $bytes = (int)($res['bytes'] ?? 0);

      if ($httpCode <= 0) {
        $urlModel->setState($urlId, 'failed', [
          'http_code' => null,
          'final_url' => null,
          'redirect_hops' => null,
          'content_type' => null,
          'load_time_ms' => null,
          'bytes' => null,
          'last_error' => (string)($res['error'] ?? 'Fetch error'),
          'updated_at' => date('Y-m-d H:i:s'),
        ]);
        continue;
      }

      // If only_html enabled and content-type not html, skip
      $onlyHtml = (bool)($settings['only_html'] ?? true);
      if ($onlyHtml && $ctype !== '' && stripos($ctype, 'text/html') === false) {
        $urlModel->setState($urlId, 'skipped', [
          'http_code' => $httpCode,
          'final_url' => $finalUrl,
          'redirect_hops' => $hops,
          'content_type' => $ctype,
          'load_time_ms' => $loadMs,
          'bytes' => $bytes,
          'last_error' => 'Non-HTML skipped',
          'fetched_at' => date('Y-m-d H:i:s'),
          'updated_at' => date('Y-m-d H:i:s'),
        ]);
        continue;
      }

      $body = (string)($res['body'] ?? '');
      if ($httpCode >= 200 && $httpCode < 400 && $body !== '') {
        $parsed = Parser::parse($body);
        if (($parsed['ok'] ?? false) === true) {
          $pageModel->upsert($auditId, $urlId, $parsed['data']);
        }

        // Discover additional internal links (simple)
        if (!empty($parsed['data']['links_json'])) {
          $links = json_decode($parsed['data']['links_json'], true) ?: [];
          $internals = $links['internal_urls'] ?? [];
          $depth = (int)($it['depth'] ?? 0);
          $maxDepth = (int)($settings['max_depth'] ?? 4);
          $maxUrls = (int)($settings['max_urls'] ?? 300);
          $counts = $urlModel->counts($auditId);
          $totalKnown = (int)($counts['total'] ?? 0);

          if ($depth < $maxDepth && $totalKnown < $maxUrls) {
            $base = seom_get_base_url($startUrl);
            foreach (array_slice($internals, 0, 50) as $lnk) {
              if ($totalKnown >= $maxUrls) break;
              $abs = seom_absolutize_url($lnk, $finalUrl);
              if (!$abs) continue;
              $ok = $urlModel->enqueue($auditId, $abs, $depth + 1, 'internal', $finalUrl, [
                'allow_query_params' => (bool)($settings['allow_query_params'] ?? false)
              ]);
              if ($ok) $totalKnown++;
            }
          }
        }

        $urlModel->setState($urlId, 'fetched', [
          'http_code' => $httpCode,
          'final_url' => $finalUrl,
          'redirect_hops' => $hops,
          'content_type' => $ctype,
          'load_time_ms' => $loadMs,
          'bytes' => $bytes,
          'last_error' => null,
          'fetched_at' => date('Y-m-d H:i:s'),
          'updated_at' => date('Y-m-d H:i:s'),
        ]);
      } else {
        $urlModel->setState($urlId, 'failed', [
          'http_code' => $httpCode,
          'final_url' => $finalUrl,
          'redirect_hops' => $hops,
          'content_type' => $ctype,
          'load_time_ms' => $loadMs,
          'bytes' => $bytes,
          'last_error' => 'HTTP ' . $httpCode,
          'fetched_at' => date('Y-m-d H:i:s'),
          'updated_at' => date('Y-m-d H:i:s'),
        ]);
      }
    }

    // Update counters
    $counts = $urlModel->counts($auditId);
    $auditModel->updateCounters($auditId, (int)$counts['total'], (int)$counts['crawled']);

    return ['ok' => true, 'message' => 'Batch processed', 'audit_id' => $auditId, 'counts' => $counts];
  }
}
