Files
MangaReader/backend/scraper.js
renato97 b474182dd9 Initial commit: MangaReader iOS App
 Features:
- App iOS completa para leer manga sin publicidad
- Scraper con WKWebView para manhwaweb.com
- Sistema de descargas offline
- Lector con zoom y navegación
- Favoritos y progreso de lectura
- Compatible con iOS 15+ y Sideloadly/3uTools

📦 Contenido:
- Backend Node.js con Puppeteer (opcional)
- App iOS con SwiftUI
- Scraper de capítulos e imágenes
- Sistema de almacenamiento local
- Testing completo
- Documentación exhaustiva

🧪 Prueba: Capítulo 789 de One Piece descargado exitosamente
  - 21 páginas descargadas
  - 4.68 MB total
  - URLs verificadas y funcionales

🎉 Generated with Claude Code (https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2026-02-04 15:34:18 +01:00

367 lines
10 KiB
JavaScript

import puppeteer from 'puppeteer';
const BASE_URL = 'https://manhwaweb.com';
// Configuración de Puppeteer
const PUPPETEER_OPTIONS = {
headless: 'new',
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--disable-gpu'
]
};
/**
* Obtiene el HTML renderizado de una URL usando Puppeteer
*/
async function getRenderedHTML(url, waitFor = 3000) {
let browser;
try {
browser = await puppeteer.launch(PUPPETEER_OPTIONS);
const page = await browser.newPage();
// Set user agent
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36');
// Navigate to the URL and wait for network to be idle
await page.goto(url, {
waitUntil: 'networkidle0',
timeout: 30000
});
// Additional wait to ensure JavaScript content is loaded
await page.waitForTimeout(waitFor);
// Get the rendered HTML
const html = await page.content();
await browser.close();
return html;
} catch (error) {
if (browser) {
await browser.close();
}
throw error;
}
}
/**
* Obtiene la lista de capítulos de un manga desde su página
* @param {string} mangaSlug - Slug del manga (ej: "one-piece_1695365223767")
* @returns {Promise<Array>} Lista de capítulos
*/
export async function getMangaChapters(mangaSlug) {
let browser;
try {
const url = `${BASE_URL}/manga/${mangaSlug}`;
browser = await puppeteer.launch(PUPPETEER_OPTIONS);
const page = await browser.newPage();
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36');
await page.goto(url, {
waitUntil: 'networkidle0',
timeout: 30000
});
// Wait for content to load
await page.waitForTimeout(3000);
// Extract chapters using page.evaluate
const chapters = await page.evaluate(() => {
const chapters = [];
// Look for links that contain chapter information
const links = document.querySelectorAll('a[href*="/leer/"]');
links.forEach(link => {
const href = link.getAttribute('href');
const text = link.textContent?.trim();
if (href && text && href.includes('/leer/')) {
// Extract chapter number from href
// Ej: /leer/one-piece_1695365223767-1172
const match = href.match(/(\d+)(?:\/|\?|\s*$)/);
const chapterNumber = match ? parseInt(match[1]) : null;
if (chapterNumber && !isNaN(chapterNumber)) {
chapters.push({
number: chapterNumber,
title: text,
url: href.startsWith('http') ? href : `https://manhwaweb.com${href}`,
slug: href.replace('/leer/', '').replace(/^\//, '')
});
}
}
});
return chapters;
});
await browser.close();
// Remove duplicates and sort by chapter number (descending)
const uniqueChapters = chapters
.filter((chapter, index, self) =>
index === self.findIndex((c) => c.number === chapter.number)
)
.sort((a, b) => b.number - a.number);
console.log(`✓ Found ${uniqueChapters.length} chapters for ${mangaSlug}`);
return uniqueChapters;
} catch (error) {
if (browser) {
await browser.close();
}
console.error('Error scraping manga chapters:', error.message);
throw error;
}
}
/**
* Obtiene las imágenes de un capítulo específico
* @param {string} chapterSlug - Slug del capítulo (ej: "one-piece_1695365223767-1172")
* @returns {Promise<Array>} Lista de URLs de imágenes
*/
export async function getChapterImages(chapterSlug) {
let browser;
try {
const url = `${BASE_URL}/leer/${chapterSlug}`;
browser = await puppeteer.launch(PUPPETEER_OPTIONS);
const page = await browser.newPage();
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36');
await page.goto(url, {
waitUntil: 'networkidle0',
timeout: 30000
});
// Wait for images to load
await page.waitForTimeout(3000);
// Extract image URLs
const images = await page.evaluate(() => {
const imageUrls = [];
const images = document.querySelectorAll('img');
images.forEach(img => {
let src = img.src || img.getAttribute('data-src');
if (src) {
// Convert relative URLs to absolute
if (!src.startsWith('http')) {
if (src.startsWith('//')) {
src = 'https:' + src;
} else if (src.startsWith('/')) {
src = 'https://manhwaweb.com' + src;
}
}
// Filter out small UI images
const alt = img.alt?.toLowerCase() || '';
const className = img.className?.toLowerCase() || '';
const isUIElement =
src.includes('avatar') ||
src.includes('icon') ||
src.includes('logo') ||
src.includes('button') ||
alt.includes('avatar') ||
className.includes('avatar') ||
className.includes('icon');
if (!isUIElement && src.includes('http')) {
imageUrls.push(src);
}
}
});
return imageUrls;
});
await browser.close();
// Remove duplicates while preserving order
const uniqueImages = [...new Set(images)];
console.log(`✓ Found ${uniqueImages.length} images for chapter ${chapterSlug}`);
return uniqueImages;
} catch (error) {
if (browser) {
await browser.close();
}
console.error('Error scraping chapter images:', error.message);
throw error;
}
}
/**
* Busca información de un manga específico
* @param {string} mangaSlug - Slug del manga
* @returns {Promise<Object>} Información del manga
*/
export async function getMangaInfo(mangaSlug) {
let browser;
try {
const url = `${BASE_URL}/manga/${mangaSlug}`;
browser = await puppeteer.launch(PUPPETEER_OPTIONS);
const page = await browser.newPage();
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36');
await page.goto(url, {
waitUntil: 'networkidle0',
timeout: 30000
});
await page.waitForTimeout(2000);
// Extract manga information
const mangaInfo = await page.evaluate(() => {
// Try to find title
let title = '';
const titleElement = document.querySelector('h1') ||
document.querySelector('.title') ||
document.querySelector('[class*="title"]');
if (titleElement) {
title = titleElement.textContent?.trim() || '';
}
// If no title found, use page title
if (!title) {
title = document.title.replace(' - ManhwaWeb', '').replace(' - Manhwa Web', '').trim();
}
// Extract description
let description = '';
const descElements = document.querySelectorAll('p');
for (const p of descElements) {
const text = p.textContent?.trim() || '';
if (text.length > 100 && !text.includes('©')) {
description = text;
break;
}
}
// Extract genres
const genres = [];
const genreLinks = document.querySelectorAll('a[href*="/genero/"]');
genreLinks.forEach(link => {
const genre = link.textContent?.trim();
if (genre) genres.push(genre);
});
// Extract status
let status = 'UNKNOWN';
const bodyText = document.body.textContent || '';
const statusMatch = bodyText.match(/Estado\s*:?\s*(PUBLICANDOSE|FINALIZADO|EN PAUSA|EN_ESPERA)/i);
if (statusMatch) {
status = statusMatch[1].toUpperCase().replace(' ', '_');
}
// Extract cover image
let coverImage = '';
const coverImg = document.querySelector('.cover img') ||
document.querySelector('[class*="cover"] img') ||
document.querySelector('img[alt*="cover"]');
if (coverImg) {
coverImage = coverImg.src || '';
}
return {
title,
description,
genres,
status,
coverImage
};
});
await browser.close();
return {
slug: mangaSlug,
title: mangaInfo.title || 'Unknown',
description: mangaInfo.description || '',
genres: mangaInfo.genres || [],
status: mangaInfo.status,
url: url,
coverImage: mangaInfo.coverImage
};
} catch (error) {
if (browser) {
await browser.close();
}
console.error('Error scraping manga info:', error.message);
throw error;
}
}
/**
* Obtiene una lista de mangas populares (para descubrimiento)
* @returns {Promise<Array>} Lista de mangas populares
*/
export async function getPopularMangas() {
let browser;
try {
const url = `${BASE_URL}/`;
browser = await puppeteer.launch(PUPPETEER_OPTIONS);
const page = await browser.newPage();
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36');
await page.goto(url, {
waitUntil: 'networkidle0',
timeout: 30000
});
await page.waitForTimeout(3000);
// Extract manga list
const mangas = await page.evaluate(() => {
const mangaList = [];
const links = document.querySelectorAll('a[href*="/manga/"]');
links.forEach(link => {
const href = link.getAttribute('href');
if (href && href.includes('/manga/')) {
// Extract slug from href
const slugMatch = href.match(/\/manga\/([^\/\?#]+)/);
const slug = slugMatch ? slugMatch[1] : null;
// Extract title
let title = link.textContent?.trim() ||
link.querySelector('img')?.getAttribute('alt') ||
slug?.split('_')[0]?.replace(/-/g, ' ');
if (slug && title && !mangaList.find(m => m.slug === slug)) {
mangaList.push({
slug,
title,
url: href.startsWith('http') ? href : `https://manhwaweb.com${href}`
});
}
}
});
return mangaList;
});
await browser.close();
const limitedMangas = mangas.slice(0, 50);
console.log(`✓ Found ${limitedMangas.length} popular mangas`);
return limitedMangas;
} catch (error) {
if (browser) {
await browser.close();
}
console.error('Error scraping popular mangas:', error.message);
throw error;
}
}