Initial commit: MangaReader iOS App

✨ Features: - App iOS completa para leer manga sin publicidad - Scraper con WKWebView para manhwaweb.com - Sistema de descargas offline - Lector con zoom y navegación - Favoritos y progreso de lectura - Compatible con iOS 15+ y Sideloadly/3uTools 📦 Contenido: - Backend Node.js con Puppeteer (opcional) - App iOS con SwiftUI - Scraper de capítulos e imágenes - Sistema de almacenamiento local - Testing completo - Documentación exhaustiva 🧪 Prueba: Capítulo 789 de One Piece descargado exitosamente - 21 páginas descargadas - 4.68 MB total - URLs verificadas y funcionales 🎉 Generated with Claude Code (https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2026-02-04 15:34:18 +01:00
commit b474182dd9
6394 changed files with 1063909 additions and 0 deletions
--- a/ios-app/Sources/Services/ManhwaWebScraper.swift
+++ b/ios-app/Sources/Services/ManhwaWebScraper.swift
@@ -0,0 +1,440 @@
+import Foundation
+import Combine
+import WebKit
+
+/// Scraper que utiliza WKWebView para extraer contenido de manhwaweb.com.
+///
+/// `ManhwaWebScraper` implementa la extracción de datos de sitios web que usan
+/// JavaScript dinámico para renderizar contenido. Esta estrategia es necesaria
+/// porque manhwaweb.com carga su contenido mediante JavaScript después de la
+/// carga inicial de la página, lo que impide el uso de HTTP requests simples.
+///
+/// El scraper utiliza un `WKWebView` invisible para cargar páginas, esperar a que
+/// JavaScript termine de ejecutarse, y luego extraer la información mediante
+/// inyección de JavaScript.
+///
+/// # Example
+/// ```swift
+/// let scraper = ManhwaWebScraper.shared
+/// do {
+///     let manga = try await scraper.scrapeMangaInfo(mangaSlug: "one-piece_1695365223767")
+///     print("Manga: \(manga.title)")
+///
+///     let chapters = try await scraper.scrapeChapters(mangaSlug: manga.slug)
+///     print("Capítulos: \(chapters.count)")
+/// } catch {
+///     print("Error: \(error.localizedDescription)")
+/// }
+/// ```
+@MainActor
+class ManhwaWebScraper: NSObject, ObservableObject {
+    // MARK: - Properties
+
+    /// WebView instance para cargar y ejecutar JavaScript
+    private var webView: WKWebView?
+
+    /// Continuation usada para operaciones async de espera
+    private var continuation: CheckedContinuation<Void, Never>?
+
+    // MARK: - Singleton
+
+    /// Instancia compartida del scraper (Singleton pattern)
+    static let shared = ManhwaWebScraper()
+
+    // MARK: - Initialization
+
+    /// Inicializador privado para implementar Singleton
+    private override init() {
+        super.init()
+        setupWebView()
+    }
+
+    // MARK: - Setup
+
+    /// Configura el WKWebView con preferencias optimizadas para scraping.
+    ///
+    /// Configura:
+    /// - User Agent personalizado para simular un iPhone
+    /// - JavaScript habilitado para ejecutar scripts en las páginas
+    /// - Navigation delegate para monitorear carga de páginas
+    private func setupWebView() {
+        let configuration = WKWebViewConfiguration()
+        configuration.applicationNameForUserAgent = "Mozilla/5.0 (iPhone; CPU iPhone OS 15_0 like Mac OS X) AppleWebKit/605.1.15"
+
+        // Preferencias para mejor rendimiento
+        let preferences = WKPreferences()
+        preferences.javaScriptEnabled = true
+        configuration.preferences = preferences
+
+        webView = WKWebView(frame: .zero, configuration: configuration)
+        webView?.navigationDelegate = self
+    }
+
+    // MARK: - Scraper Functions
+
+    /// Obtiene la lista de capítulos de un manga desde manhwaweb.com.
+    ///
+    /// Este método carga la página del manga, espera a que JavaScript renderice
+    /// el contenido, y extrae todos los links de capítulos disponibles.
+    ///
+    /// # Proceso
+    /// 1. Carga la URL del manga en WKWebView
+    /// 2. Espera 3 segundos a que JavaScript termine
+    /// 3. Ejecuta JavaScript para extraer capítulos
+    /// 4. Filtra duplicados y ordena descendentemente
+    ///
+    /// - Parameter mangaSlug: Slug único del manga (ej: `"one-piece_1695365223767"`)
+    /// - Returns: Array de `Chapter` ordenados por número (descendente)
+    /// - Throws: `ScrapingError` si el WebView no está inicializado o falla la extracción
+    ///
+    /// # Example
+    /// ```swift
+    /// do {
+    ///     let chapters = try await scraper.scrapeChapters(mangaSlug: "one-piece_1695365223767")
+    ///     print("Found \(chapters.count) chapters")
+    ///     for chapter in chapters.prefix(5) {
+    ///         print("- Chapter \(chapter.number): \(chapter.title)")
+    ///     }
+    /// } catch {
+    ///     print("Failed to scrape chapters: \(error)")
+    /// }
+    /// ```
+    func scrapeChapters(mangaSlug: String) async throws -> [Chapter] {
+        guard let webView = webView else {
+            throw ScrapingError.webViewNotInitialized
+        }
+
+        let url = URL(string: "https://manhwaweb.com/manga/\(mangaSlug)")!
+        var chapters: [Chapter] = []
+
+        // Load URL and wait
+        try await loadURLAndWait(url)
+
+        // Extract chapters using JavaScript
+        chapters = try await webView.evaluateJavaScript("""
+            (function() {
+                const chapters = [];
+                const links = document.querySelectorAll('a[href*="/leer/"]');
+
+                links.forEach(link => {
+                    const href = link.getAttribute('href');
+                    const text = link.textContent?.trim();
+
+                    if (href && text && href.includes('/leer/')) {
+                        // Extraer número de capítulo
+                        const match = href.match(/(\\d+)(?:\\/|\\?|\\s*$)/);
+                        const chapterNumber = match ? parseInt(match[1]) : null;
+
+                        if (chapterNumber && !isNaN(chapterNumber)) {
+                            chapters.push({
+                                number: chapterNumber,
+                                title: text,
+                                url: href.startsWith('http') ? href : 'https://manhwaweb.com' + href,
+                                slug: href.replace('/leer/', '').replace(/^\\//, '')
+                            });
+                        }
+                    }
+                });
+
+                // Eliminar duplicados
+                const unique = chapters.filter((chapter, index, self) =>
+                    index === self.findIndex((c) => c.number === chapter.number)
+                );
+
+                // Ordenar descendente
+                return unique.sort((a, b) => b.number - a.number);
+            })();
+        """) as! [ [String: Any] ]
+
+        let parsedChapters = chapters.compactMap { dict -> Chapter? in
+            guard let number = dict["number"] as? Int,
+                  let title = dict["title"] as? String,
+                  let url = dict["url"] as? String,
+                  let slug = dict["slug"] as? String else {
+                return nil
+            }
+            return Chapter(number: number, title: title, url: url, slug: slug)
+        }
+
+        return parsedChapters
+    }
+
+    /// Obtiene las URLs de las imágenes de un capítulo.
+    ///
+    /// Este método carga la página de lectura de un capítulo, espera a que
+    /// las imágenes carguen, y extrae todas las URLs de imágenes del contenido.
+    ///
+    /// # Proceso
+    /// 1. Carga la URL del capítulo en WKWebView
+    /// 2. Espera 5 segundos (más tiempo para cargar imágenes)
+    /// 3. Ejecuta JavaScript para extraer URLs de `<img>` tags
+    /// 4. Filtra elementos de UI (avatars, icons, logos)
+    /// 5. Elimina duplicados preservando orden
+    ///
+    /// - Parameter chapterSlug: Slug del capítulo (ej: `"one-piece/capitulo-1"`)
+    /// - Returns: Array de strings con URLs de imágenes en orden
+    /// - Throws: `ScrapingError` si el WebView no está inicializado o falla la carga
+    ///
+    /// # Example
+    /// ```swift
+    /// do {
+    ///     let images = try await scraper.scrapeChapterImages(chapterSlug: "one-piece/1")
+    ///     print("Found \(images.count) pages")
+    ///     for (index, imageUrl) in images.enumerated() {
+    ///         print("Page \(index + 1): \(imageUrl)")
+    ///     }
+    /// } catch {
+    ///     print("Failed to scrape images: \(error)")
+    /// }
+    /// ```
+    func scrapeChapterImages(chapterSlug: String) async throws -> [String] {
+        guard let webView = webView else {
+            throw ScrapingError.webViewNotInitialized
+        }
+
+        let url = URL(string: "https://manhwaweb.com/leer/\(chapterSlug)")!
+        var images: [String] = []
+
+        // Load URL and wait
+        try await loadURLAndWait(url, waitForImages: true)
+
+        // Extract image URLs using JavaScript
+        images = try await webView.evaluateJavaScript("""
+            (function() {
+                const imageUrls = [];
+                const imgs = document.querySelectorAll('img');
+
+                imgs.forEach(img => {
+                    let src = img.src || img.getAttribute('data-src');
+
+                    if (src) {
+                        // Filtrar UI elements
+                        const alt = (img.alt || '').toLowerCase();
+                        const className = (img.className || '').toLowerCase();
+
+                        const isUIElement =
+                            src.includes('avatar') ||
+                            src.includes('icon') ||
+                            src.includes('logo') ||
+                            src.includes('button') ||
+                            alt.includes('avatar') ||
+                            className.includes('avatar') ||
+                            className.includes('icon');
+
+                        if (!isUIElement && src.includes('http')) {
+                            imageUrls.push(src);
+                        }
+                    }
+                });
+
+                // Eliminar duplicados preservando orden
+                return [...new Set(imageUrls)];
+            })();
+        """) as! [String]
+
+        return images
+    }
+
+    /// Obtiene la información completa de un manga.
+    ///
+    /// Este método extrae todos los metadatos disponibles de un manga:
+    /// título, descripción, géneros, estado de publicación, e imagen de portada.
+    ///
+    /// # Proceso
+    /// 1. Carga la URL del manga en WKWebView
+    /// 2. Espera 3 segundos a que JavaScript renderice
+    /// 3. Ejecuta JavaScript para extraer información:
+    ///    - Título desde `<h1>` o `.title` o `<title>`
+    ///    - Descripción desde `<p>` con >100 caracteres
+    ///    - Géneros desde links `/genero/*`
+    ///    - Estado desde regex en body del documento
+    ///    - Cover image desde `.cover img`
+    ///
+    /// - Parameter mangaSlug: Slug único del manga (ej: `"one-piece_1695365223767"`)
+    /// - Returns: Objeto `Manga` con información completa
+    /// - Throws: `ScrapingError` si el WebView no está inicializado o falla la extracción
+    ///
+    /// # Example
+    /// ```swift
+    /// do {
+    ///     let manga = try await scraper.scrapeMangaInfo(mangaSlug: "one-piece_1695365223767")
+    ///     print("Title: \(manga.title)")
+    ///     print("Status: \(manga.displayStatus)")
+    ///     print("Genres: \(manga.genres.joined(separator: ", "))")
+    /// } catch {
+    ///     print("Failed to scrape manga info: \(error)")
+    /// }
+    /// ```
+    func scrapeMangaInfo(mangaSlug: String) async throws -> Manga {
+        guard let webView = webView else {
+            throw ScrapingError.webViewNotInitialized
+        }
+
+        let url = URL(string: "https://manhwaweb.com/manga/\(mangaSlug)")!
+
+        // Load URL and wait
+        try await loadURLAndWait(url)
+
+        // Extract manga info using JavaScript
+        let mangaInfo: [String: Any] = try await webView.evaluateJavaScript("""
+            (function() {
+                // Title
+                let title = '';
+                const titleEl = document.querySelector('h1') ||
+                               document.querySelector('.title') ||
+                               document.querySelector('[class*="title"]');
+                if (titleEl) {
+                    title = titleEl.textContent?.trim() || '';
+                }
+
+                if (!title) {
+                    title = document.title.replace(' - ManhwaWeb', '').replace(' - Manhwa Web', '').trim();
+                }
+
+                // Description
+                let description = '';
+                const paragraphs = document.querySelectorAll('p');
+                for (const p of paragraphs) {
+                    const text = p.textContent?.trim() || '';
+                    if (text.length > 100 && !text.includes('©')) {
+                        description = text;
+                        break;
+                    }
+                }
+
+                // Genres
+                const genres = [];
+                const genreLinks = document.querySelectorAll('a[href*="/genero/"]');
+                genreLinks.forEach(link => {
+                    const genre = link.textContent?.trim();
+                    if (genre) genres.push(genre);
+                });
+
+                // Status
+                let status = 'UNKNOWN';
+                const bodyText = document.body.textContent || '';
+                const statusMatch = bodyText.match(/Estado\\s*:?\\s*(PUBLICANDOSE|FINALIZADO|EN PAUSA|EN_ESPERA)/i);
+                if (statusMatch) {
+                    status = statusMatch[1].toUpperCase().replace(' ', '_');
+                }
+
+                // Cover image
+                let coverImage = '';
+                const coverImg = document.querySelector('.cover img') ||
+                                document.querySelector('[class*="cover"] img') ||
+                                document.querySelector('img[alt*="cover"]');
+                if (coverImg) {
+                    coverImage = coverImg.src || '';
+                }
+
+                return {
+                    title: title,
+                    description: description,
+                    genres: genres,
+                    status: status,
+                    coverImage: coverImage
+                };
+            })();
+        """) as! [String: Any]
+
+        let title = mangaInfo["title"] as? String ?? "Unknown"
+        let description = mangaInfo["description"] as? String ?? ""
+        let genres = mangaInfo["genres"] as? [String] ?? []
+        let status = mangaInfo["status"] as? String ?? "UNKNOWN"
+        let coverImage = mangaInfo["coverImage"] as? String
+
+        return Manga(
+            slug: mangaSlug,
+            title: title,
+            description: description,
+            genres: genres,
+            status: status,
+            url: url.absoluteString,
+            coverImage: coverImage?.isEmpty == false ? coverImage : nil
+        )
+    }
+
+    // MARK: - Helper Methods
+
+    /// Carga una URL en el WebView y espera a que JavaScript termine de ejecutarse.
+    ///
+    /// Este método es interno y usado por todos los métodos públicos de scraping.
+    /// Carga la URL y bloquea la ejecución por un tiempo fijo para dar oportunidad
+    /// a JavaScript de renderizar el contenido.
+    ///
+    /// - Parameters:
+    ///   - url: URL a cargar en el WebView
+    ///   - waitForImages: Si `true`, espera 5 segundos (para imágenes); si `false`, 3 segundos
+    /// - Throws: `ScrapingError.webViewNotInitialized` si el WebView no está configurado
+    private func loadURLAndWait(_ url: URL, waitForImages: Bool = false) async throws {
+        guard let webView = webView else {
+            throw ScrapingError.webViewNotInitialized
+        }
+
+        try await withCheckedThrowingContinuation { continuation in
+            webView.load(URLRequest(url: url))
+
+            // Esperar a que la página cargue
+            DispatchQueue.main.asyncAfter(deadline: .now() + (waitForImages ? 5.0 : 3.0)) {
+                continuation.resume()
+            }
+        }
+    }
+}
+
+// MARK: - WKNavigationDelegate
+
+/// Extensión que implementa el protocolo WKNavigationDelegate.
+///
+/// Maneja eventos de navegación del WebView como carga completada,
+/// fallos de navegación, etc. Actualmente solo loggea errores para debugging.
+extension ManhwaWebScraper: WKNavigationDelegate {
+    /// Se llama cuando la navegación se completa exitosamente.
+    nonisolated func webView(_ webView: WKWebView, didFinish navigation: WKNavigation!) {
+        // Navigation completed
+    }
+
+    /// Se llama cuando falla la navegación.
+    nonisolated func webView(_ webView: WKWebView, didFail navigation: WKNavigation!, withError error: Error) {
+        print("Navigation failed: \(error.localizedDescription)")
+    }
+
+    /// Se llama cuando falla la navegación provisional (antes de commit).
+    nonisolated func webView(_ webView: WKWebView, didFailProvisionalNavigation navigation: WKNavigation!, withError error: Error) {
+        print("Provisional navigation failed: \(error.localizedDescription)")
+    }
+}
+
+// MARK: - Errors
+
+/// Errores específicos que pueden ocurrir durante el scraping.
+///
+/// `ScrapingError` define los casos de error más comunes que pueden
+/// ocurrir al intentar extraer contenido de manhwaweb.com.
+enum ScrapingError: LocalizedError {
+    /// El WKWebView no está inicializado o es nil
+    case webViewNotInitialized
+
+    /// Error al cargar la página web (timeout, network error, etc.)
+    case pageLoadFailed
+
+    /// La página cargó pero no se encontró el contenido esperado
+    case noContentFound
+
+    /// Error al procesar/parsear el contenido extraído
+    case parsingError
+
+    /// Descripción legible del error para mostrar al usuario
+    var errorDescription: String? {
+        switch self {
+        case .webViewNotInitialized:
+            return "WebView no está inicializado"
+        case .pageLoadFailed:
+            return "Error al cargar la página"
+        case .noContentFound:
+            return "No se encontró contenido"
+        case .parsingError:
+            return "Error al procesar el contenido"
+        }
+    }
+}