Fix regex pattern for page detection

Changed from:
  /^(\d+)\s+pages?$/i  (exact match only)
To:
  /(\d+)\s+pages?/i    (matches anywhere in text)

Added:
- Shows ALL text matches found
- Logs which div contains the match
- Searches in body text as fallback
- Better visibility into what text exists

This will reveal why page detection is failing!
This commit is contained in:
renato97
2025-11-04 05:43:41 +00:00
parent cba176dab9
commit 423f8ecbc4

View File

@@ -809,14 +809,44 @@
// Usar la misma lógica de detección // Usar la misma lógica de detección
let detectedPages = 'UNKNOWN'; let detectedPages = 'UNKNOWN';
const allDivs = doc.querySelectorAll('div'); const allDivs = doc.querySelectorAll('div');
const allText = doc.body.textContent;
console.log('🔍 Searching in all divs...');
const textMatches = [];
for (let div of allDivs) { for (let div of allDivs) {
const text = div.textContent.trim(); const text = div.textContent.trim();
const pageMatch = text.match(/^(\d+)\s+pages?$/i); // Buscar CUALQUIER número seguido de "pages" o "page"
const pageMatch = text.match(/(\d+)\s+pages?/i);
if (pageMatch) { if (pageMatch) {
detectedPages = parseInt(pageMatch[1]); const count = parseInt(pageMatch[1]);
console.log(`✓ Found: "${text}" = ${detectedPages} pages (class: ${div.className})`); textMatches.push({
break; text: text,
count: count,
className: div.className
});
console.log(`🔍 Found text "${text}" = ${count} pages (class: ${div.className})`);
}
}
console.log(`📊 Total text matches found: ${textMatches.length}`);
if (textMatches.length > 0) {
textMatches.forEach((match, idx) => {
console.log(` ${idx + 1}. "${match.text}" -> ${match.count} pages (class: ${match.className})`);
});
// Usar el primer match
detectedPages = textMatches[0].count;
console.log(`✅ Using: ${detectedPages} pages from "${textMatches[0].text}"`);
} else {
console.log('❌ No matches found in divs');
// Buscar en todo el texto
const bodyMatch = allText.match(/(\d+)\s+pages?/i);
if (bodyMatch) {
const count = parseInt(bodyMatch[1]);
console.log(`⚠️ Found in body text: ${count} pages`);
detectedPages = count;
} else {
console.log('❌ No page count found anywhere!');
} }
} }