import puppeteer from 'puppeteer'; const BASE_URL = 'https://manhwaweb.com'; // Configuración de Puppeteer const PUPPETEER_OPTIONS = { headless: 'new', args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-accelerated-2d-canvas', '--disable-gpu' ] }; /** * Obtiene el HTML renderizado de una URL usando Puppeteer */ async function getRenderedHTML(url, waitFor = 3000) { let browser; try { browser = await puppeteer.launch(PUPPETEER_OPTIONS); const page = await browser.newPage(); // Set user agent await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'); // Navigate to the URL and wait for network to be idle await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 45000 }); // Additional wait to ensure JavaScript content is loaded await page.waitForTimeout(waitFor); // Get the rendered HTML const html = await page.content(); await browser.close(); return html; } catch (error) { if (browser) { await browser.close(); } throw error; } } /** * Obtiene la lista de capítulos de un manga desde su página * @param {string} mangaSlug - Slug del manga (ej: "one-piece_1695365223767") * @returns {Promise} Lista de capítulos */ export async function getMangaChapters(mangaSlug) { let browser; try { const url = `${BASE_URL}/manga/${mangaSlug}`; browser = await puppeteer.launch(PUPPETEER_OPTIONS); const page = await browser.newPage(); await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'); await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 45000 }); // Wait for content to load await new Promise(resolve => setTimeout(resolve, 3000)); // Extract chapters using page.evaluate const chapters = await page.evaluate(() => { const chapters = []; // Look for links that contain chapter information const links = document.querySelectorAll('a[href*="/leer/"]'); links.forEach(link => { const href = link.getAttribute('href'); const text = link.textContent?.trim(); if (href && text && href.includes('/leer/')) { // Extract chapter number from href // Ej: /leer/one-piece_1695365223767-1172 const match = href.match(/(\d+)(?:\/|\?|\s*$)/); const chapterNumber = match ? parseInt(match[1]) : null; if (chapterNumber && !isNaN(chapterNumber)) { chapters.push({ number: chapterNumber, title: text, url: href.startsWith('http') ? href : `https://manhwaweb.com${href}`, slug: href.replace('/leer/', '').replace(/^\//, '') }); } } }); return chapters; }); await browser.close(); // Remove duplicates and sort by chapter number (descending) const uniqueChapters = chapters .filter((chapter, index, self) => index === self.findIndex((c) => c.number === chapter.number) ) .sort((a, b) => b.number - a.number); console.log(`✓ Found ${uniqueChapters.length} chapters for ${mangaSlug}`); return uniqueChapters; } catch (error) { if (browser) { await browser.close(); } console.error('Error scraping manga chapters:', error.message); throw error; } } /** * Obtiene las imágenes de un capítulo específico * @param {string} chapterSlug - Slug del capítulo (ej: "one-piece_1695365223767-1172") * @returns {Promise} Lista de URLs de imágenes */ export async function getChapterImages(chapterSlug) { let browser; try { const url = `${BASE_URL}/leer/${chapterSlug}`; browser = await puppeteer.launch(PUPPETEER_OPTIONS); const page = await browser.newPage(); await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'); await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 45000 }); // Wait for images to load await new Promise(resolve => setTimeout(resolve, 3000)); // Extract image URLs const images = await page.evaluate(() => { const imageUrls = []; const images = document.querySelectorAll('img'); images.forEach(img => { let src = img.src || img.getAttribute('data-src'); if (src) { // Convert relative URLs to absolute if (!src.startsWith('http')) { if (src.startsWith('//')) { src = 'https:' + src; } else if (src.startsWith('/')) { src = 'https://manhwaweb.com' + src; } } // Filter out small UI images const alt = img.alt?.toLowerCase() || ''; const className = img.className?.toLowerCase() || ''; const isUIElement = src.includes('avatar') || src.includes('icon') || src.includes('logo') || src.includes('button') || alt.includes('avatar') || className.includes('avatar') || className.includes('icon'); if (!isUIElement && src.includes('http')) { imageUrls.push(src); } } }); return imageUrls; }); await browser.close(); // Remove duplicates while preserving order const uniqueImages = [...new Set(images)]; console.log(`✓ Found ${uniqueImages.length} images for chapter ${chapterSlug}`); return uniqueImages; } catch (error) { if (browser) { await browser.close(); } console.error('Error scraping chapter images:', error.message); throw error; } } /** * Busca información de un manga específico * @param {string} mangaSlug - Slug del manga * @returns {Promise} Información del manga */ export async function getMangaInfo(mangaSlug) { let browser; try { const url = `${BASE_URL}/manga/${mangaSlug}`; browser = await puppeteer.launch(PUPPETEER_OPTIONS); const page = await browser.newPage(); await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'); await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 45000 }); await new Promise(resolve => setTimeout(resolve, 2000)); // Extract manga information const mangaInfo = await page.evaluate(() => { // Try to find title let title = ''; const titleElement = document.querySelector('h1') || document.querySelector('.title') || document.querySelector('[class*="title"]'); if (titleElement) { title = titleElement.textContent?.trim() || ''; } // If no title found, use page title if (!title) { title = document.title.replace(' - ManhwaWeb', '').replace(' - Manhwa Web', '').trim(); } // Extract description let description = ''; const descElements = document.querySelectorAll('p'); for (const p of descElements) { const text = p.textContent?.trim() || ''; if (text.length > 100 && !text.includes('©')) { description = text; break; } } // Extract genres const genres = []; const genreLinks = document.querySelectorAll('a[href*="/genero/"]'); genreLinks.forEach(link => { const genre = link.textContent?.trim(); if (genre) genres.push(genre); }); // Extract status let status = 'UNKNOWN'; const bodyText = document.body.textContent || ''; const statusMatch = bodyText.match(/Estado\s*:?\s*(PUBLICANDOSE|FINALIZADO|EN PAUSA|EN_ESPERA)/i); if (statusMatch) { status = statusMatch[1].toUpperCase().replace(' ', '_'); } // Extract cover image let coverImage = ''; const coverImg = document.querySelector('.cover img') || document.querySelector('[class*="cover"] img') || document.querySelector('img[alt*="cover"]'); if (coverImg) { coverImage = coverImg.src || ''; } return { title, description, genres, status, coverImage }; }); await browser.close(); return { slug: mangaSlug, title: mangaInfo.title || 'Unknown', description: mangaInfo.description || '', genres: mangaInfo.genres || [], status: mangaInfo.status, url: url, coverImage: mangaInfo.coverImage }; } catch (error) { if (browser) { await browser.close(); } console.error('Error scraping manga info:', error.message); throw error; } } /** * Obtiene una lista de mangas populares (para descubrimiento) * @returns {Promise} Lista de mangas populares */ export async function getPopularMangas() { let browser; try { const url = `${BASE_URL}/`; browser = await puppeteer.launch(PUPPETEER_OPTIONS); const page = await browser.newPage(); await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'); await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 45000 }); await new Promise(resolve => setTimeout(resolve, 3000)); // Extract manga list const mangas = await page.evaluate(() => { const mangaList = []; const links = document.querySelectorAll('a[href*="/manga/"]'); links.forEach(link => { const href = link.getAttribute('href'); if (href && href.includes('/manga/')) { // Extract slug from href const slugMatch = href.match(/\/manga\/([^\/\?#]+)/); const slug = slugMatch ? slugMatch[1] : null; // Extract title let title = link.textContent?.trim() || link.querySelector('img')?.getAttribute('alt') || slug?.split('_')[0]?.replace(/-/g, ' '); if (slug && title && !mangaList.find(m => m.slug === slug)) { mangaList.push({ slug, title, url: href.startsWith('http') ? href : `https://manhwaweb.com${href}` }); } } }); return mangaList; }); await browser.close(); const limitedMangas = mangas.slice(0, 50); console.log(`✓ Found ${limitedMangas.length} popular mangas`); return limitedMangas; } catch (error) { if (browser) { await browser.close(); } console.error('Error scraping popular mangas:', error.message); throw error; } }