🎓 Initial commit: Math2 Platform - Plataforma de Álgebra Lineal PRO
Some checks failed
Test Suite / test-backend (push) Has been cancelled
Test Suite / test-frontend (push) Has been cancelled
Test Suite / e2e-tests (push) Has been cancelled
Test Suite / coverage-check (push) Has been cancelled

 Características:
- 45 ejercicios universitarios (Basic → Advanced)
- Renderizado LaTeX profesional
- IA generativa (Z.ai/DashScope)
- Docker 9 servicios
- Tests 123/123 pasando
- Seguridad enterprise (JWT, XSS, Rate limiting)

🐳 Infraestructura:
- Next.js 14 + Node.js 20
- PostgreSQL 15 + Redis 7
- Docker Compose completo
- Nginx + SSL ready

📚 Documentación:
- 5 informes técnicos completos
- README profesional
- Scripts de deployment automatizados

Estado: Producción lista 
This commit is contained in:
Renato
2026-03-31 11:27:11 -03:00
commit bc43c9e772
309 changed files with 84845 additions and 0 deletions

View File

@@ -0,0 +1,88 @@
# ========================================
# ALERTMANAGER CONFIGURATION
# Enterprise Alert Routing
# ========================================
global:
smtp_smarthost: '${SMTP_HOST:-localhost:587}'
smtp_from: '${SMTP_FROM:-alerts@mathplatform.com}'
smtp_auth_username: '${SMTP_USER:-}'
smtp_auth_password: '${SMTP_PASSWORD:-}'
slack_api_url: '${SLACK_WEBHOOK_URL:-}'
telegram_api_url: 'https://api.telegram.org'
# Templates
templates:
- '/etc/alertmanager/templates/*.tmpl'
# Inhibition rules
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'instance']
# Route tree
troute:
receiver: 'default-receiver'
group_by: ['alertname', 'severity', 'instance']
group_wait: 10s
group_interval: 5m
repeat_interval: 4h
routes:
# Critical alerts
- match:
severity: critical
receiver: 'critical-receiver'
continue: true
# Database alerts
- match:
job: postgres
receiver: 'database-receiver'
group_interval: 10m
# Backend alerts
- match:
job: backend
receiver: 'backend-receiver'
group_interval: 5m
# Receivers
receivers:
- name: 'default-receiver'
slack_configs:
- channel: '#alerts'
title: 'Math Platform Alert'
text: '{{ range .Alerts }}{{ .Annotations.summary }}{{ end }}'
send_resolved: true
- name: 'critical-receiver'
slack_configs:
- channel: '#critical-alerts'
title: 'CRITICAL: Math Platform'
text: '{{ range .Alerts }}{{ .Annotations.description }}{{ end }}'
send_resolved: true
email_configs:
- to: '${CRITICAL_EMAIL:-admin@mathplatform.com}'
subject: 'CRITICAL Alert: {{ .GroupLabels.alertname }}'
html: '{{ template "email.default.html" . }}'
send_resolved: true
telegram_configs:
- bot_token: '${TELEGRAM_BOT_TOKEN}'
chat_id: '${TELEGRAM_ADMIN_CHAT_ID}'
message: '🔴 CRITICAL: {{ .GroupLabels.alertname }} - {{ range .Alerts }}{{ .Annotations.summary }}{{ end }}'
send_resolved: true
- name: 'database-receiver'
slack_configs:
- channel: '#database-alerts'
title: 'Database Alert'
text: '{{ range .Alerts }}{{ .Annotations.summary }}{{ end }}'
- name: 'backend-receiver'
slack_configs:
- channel: '#backend-alerts'
title: 'Backend Alert'
text: '{{ range .Alerts }}{{ .Annotations.summary }}{{ end }}'

View File

@@ -0,0 +1,12 @@
apiVersion: 1
providers:
- name: 'Math Platform Dashboards'
orgId: 1
folder: ''
type: file
disableDeletion: false
editable: true
allowUiUpdates: true
options:
path: /etc/grafana/provisioning/dashboards

View File

@@ -0,0 +1,22 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true
editable: false
jsonData:
timeInterval: "15s"
httpMethod: POST
manageAlerts: true
alertmanagerUid: alertmanager
- name: Alertmanager
type: alertmanager
access: proxy
url: http://alertmanager:9093
editable: false
jsonData:
implementation: prometheus

View File

@@ -0,0 +1,88 @@
# ========================================
# PROMETHEUS CONFIGURATION
# Enterprise Monitoring Setup
# ========================================
global:
scrape_interval: 15s
evaluation_interval: 15s
external_labels:
cluster: 'math-platform'
replica: '{{.ExternalURL}}'
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets: ['alertmanager:9093']
# Load rules once and periodically evaluate them
rule_files:
- /etc/prometheus/rules/*.yml
# Scrape configurations
scrape_configs:
# Prometheus itself
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
# Backend API
- job_name: 'backend'
static_configs:
- targets: ['backend:3001']
metrics_path: '/metrics'
scrape_interval: 10s
scrape_timeout: 5s
# Frontend
- job_name: 'frontend'
static_configs:
- targets: ['frontend:3000']
scrape_interval: 30s
# PostgreSQL (via postgres_exporter)
- job_name: 'postgres'
static_configs:
- targets: ['postgres-exporter:9187']
scrape_interval: 15s
# Redis (via redis_exporter)
- job_name: 'redis'
static_configs:
- targets: ['redis-exporter:9121']
scrape_interval: 15s
# Workers
- job_name: 'pdf-worker'
static_configs:
- targets: ['pdf-worker:3002']
scrape_interval: 30s
- job_name: 'exercise-worker'
static_configs:
- targets: ['exercise-worker:3003']
scrape_interval: 30s
- job_name: 'notification-worker'
static_configs:
- targets: ['notification-worker:3004']
scrape_interval: 30s
# Nginx
- job_name: 'nginx'
static_configs:
- targets: ['nginx:9113']
scrape_interval: 30s
# Node Exporter (host metrics)
- job_name: 'node-exporter'
static_configs:
- targets: ['node-exporter:9100']
scrape_interval: 15s
# Docker Daemon
- job_name: 'docker'
static_configs:
- targets: ['docker-exporter:9323']
scrape_interval: 30s

View File

@@ -0,0 +1,203 @@
# ========================================
# PROMETHEUS ALERTING RULES
# Enterprise Grade Monitoring
# ========================================
groups:
# ========================================
# Backend API Alerts
# ========================================
- name: backend_alerts
interval: 30s
rules:
- alert: BackendDown
expr: up{job="backend"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Backend API is down"
description: "Backend API has been down for more than 1 minute"
- alert: BackendHighErrorRate
expr: rate(http_requests_total{job="backend", status=~"5.."}[5m]) > 0.05
for: 2m
labels:
severity: warning
annotations:
summary: "Backend high error rate"
description: "Backend error rate is {{ $value | humanizePercentage }}"
- alert: BackendHighResponseTime
expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{job="backend"}[5m])) > 2
for: 3m
labels:
severity: warning
annotations:
summary: "Backend high response time"
description: "95th percentile response time is {{ $value }}s"
- alert: BackendLowSuccessRate
expr: rate(http_requests_total{job="backend", status=~"2.."}[5m]) / rate(http_requests_total{job="backend"}[5m]) < 0.95
for: 5m
labels:
severity: critical
annotations:
summary: "Backend low success rate"
description: "Success rate is {{ $value | humanizePercentage }}"
# ========================================
# Database Alerts
# ========================================
- name: database_alerts
interval: 30s
rules:
- alert: PostgreSQLDown
expr: up{job="postgres"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "PostgreSQL is down"
description: "PostgreSQL database has been down for more than 1 minute"
- alert: PostgreSQLHighConnections
expr: pg_stat_activity_count > 150
for: 5m
labels:
severity: warning
annotations:
summary: "PostgreSQL high connection count"
description: "PostgreSQL has {{ $value }} connections (> 150)"
- alert: PostgreSQLReplicationLag
expr: pg_replication_lag_seconds > 30
for: 5m
labels:
severity: warning
annotations:
summary: "PostgreSQL replication lag"
description: "Replication lag is {{ $value }}s"
- alert: PostgreSQLSlowQueries
expr: rate(pg_stat_statements_seconds_total[5m]) > 1
for: 5m
labels:
severity: warning
annotations:
summary: "PostgreSQL slow queries detected"
description: "Slow query rate is {{ $value }}s/s"
# ========================================
# Redis Alerts
# ========================================
- name: redis_alerts
interval: 30s
rules:
- alert: RedisDown
expr: up{job="redis"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Redis is down"
description: "Redis has been down for more than 1 minute"
- alert: RedisHighMemoryUsage
expr: redis_memory_used_bytes / redis_memory_max_bytes > 0.9
for: 5m
labels:
severity: warning
annotations:
summary: "Redis high memory usage"
description: "Redis memory usage is {{ $value | humanizePercentage }}"
- alert: RedisRejectedConnections
expr: rate(redis_rejected_connections_total[5m]) > 0
for: 1m
labels:
severity: warning
annotations:
summary: "Redis rejected connections"
description: "Redis is rejecting connections"
# ========================================
# Worker Alerts
# ========================================
- name: worker_alerts
interval: 30s
rules:
- alert: PDFWorkerDown
expr: up{job="pdf-worker"} == 0
for: 2m
labels:
severity: warning
annotations:
summary: "PDF Worker is down"
- alert: ExerciseWorkerDown
expr: up{job="exercise-worker"} == 0
for: 2m
labels:
severity: warning
annotations:
summary: "Exercise Worker is down"
- alert: NotificationWorkerDown
expr: up{job="notification-worker"} == 0
for: 2m
labels:
severity: warning
annotations:
summary: "Notification Worker is down"
- alert: WorkerHighCPUUsage
expr: 100 - (avg by (instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
for: 10m
labels:
severity: warning
annotations:
summary: "Worker high CPU usage"
description: "CPU usage is {{ $value }}%"
# ========================================
# Infrastructure Alerts
# ========================================
- name: infrastructure_alerts
interval: 30s
rules:
- alert: NodeHighMemoryUsage
expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes > 0.9
for: 5m
labels:
severity: critical
annotations:
summary: "Node high memory usage"
description: "Memory usage is {{ $value | humanizePercentage }}"
- alert: NodeDiskFull
expr: (node_filesystem_avail_bytes / node_filesystem_size_bytes) < 0.1
for: 5m
labels:
severity: critical
annotations:
summary: "Node disk is filling up"
description: "Disk has {{ $value | humanizePercentage }} available"
- alert: NodeHighLoad
expr: node_load1 > 4
for: 5m
labels:
severity: warning
annotations:
summary: "Node high load average"
description: "Load average is {{ $value }}"
- alert: ContainerHighRestartRate
expr: rate(container_start_count_total[15m]) > 0
for: 5m
labels:
severity: warning
annotations:
summary: "Container restarting frequently"
description: "Container {{ $labels.name }} is restarting"