chore: initial project import
Some checks failed
CI - Production Readiness / Verify (push) Has been cancelled
Some checks failed
CI - Production Readiness / Verify (push) Has been cancelled
This commit is contained in:
160
app/api/health/route.ts
Normal file
160
app/api/health/route.ts
Normal file
@ -0,0 +1,160 @@
|
||||
import { NextRequest, NextResponse } from "next/server";
|
||||
|
||||
import { prisma } from "@/lib/prisma";
|
||||
|
||||
type ComponentHealth = {
|
||||
status: "ok" | "degraded" | "down";
|
||||
message: string;
|
||||
meta?: unknown;
|
||||
};
|
||||
|
||||
function normalizePositiveNumber(value: string | undefined, fallback: number) {
|
||||
const parsed = Number(value);
|
||||
if (!Number.isFinite(parsed) || parsed <= 0) {
|
||||
return fallback;
|
||||
}
|
||||
|
||||
return parsed;
|
||||
}
|
||||
|
||||
function maybeExposeDetails(req: NextRequest) {
|
||||
const expected = process.env.HEALTHCHECK_TOKEN?.trim();
|
||||
if (!expected) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const fromHeader = req.headers.get("authorization")?.trim() || req.headers.get("x-health-token")?.trim();
|
||||
const fromQuery = new URL(req.url).searchParams.get("token")?.trim();
|
||||
const token = fromHeader || fromQuery;
|
||||
if (!token) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return token === expected || token === `Bearer ${expected}`;
|
||||
}
|
||||
|
||||
function isUp(components: ComponentHealth[]) {
|
||||
return components.every((item) => item.status === "ok");
|
||||
}
|
||||
|
||||
export async function GET(req: NextRequest) {
|
||||
const checks: Record<string, ComponentHealth> = {};
|
||||
|
||||
try {
|
||||
await prisma.$queryRaw`SELECT 1`;
|
||||
checks.database = { status: "ok", message: "connected" };
|
||||
} catch (error) {
|
||||
checks.database = {
|
||||
status: "down",
|
||||
message: error instanceof Error ? error.message : "Database query failed"
|
||||
};
|
||||
}
|
||||
|
||||
let retries: ComponentHealth = { status: "ok", message: "campaign retry worker state unavailable" };
|
||||
let webhook: ComponentHealth = { status: "ok", message: "webhook events healthy" };
|
||||
if (checks.database.status === "ok") {
|
||||
const failureThreshold = normalizePositiveNumber(process.env.WEBHOOK_FAILURE_RATE_THRESHOLD_PER_HOUR, 10);
|
||||
const staleThresholdMinutes = normalizePositiveNumber(process.env.RETRY_WORKER_STALE_MINUTES, 30);
|
||||
|
||||
const [retryState, webhookFailureCount, disconnectedChannels] = await Promise.all([
|
||||
prisma.backgroundJobState.findUnique({
|
||||
where: { jobName: "campaign-retry-worker" },
|
||||
select: {
|
||||
lockedUntil: true,
|
||||
lastRunCompletedAt: true,
|
||||
lastRunStatus: true,
|
||||
lastError: true,
|
||||
consecutiveFailures: true
|
||||
}
|
||||
}),
|
||||
prisma.webhookEvent.count({
|
||||
where: {
|
||||
processStatus: "failed",
|
||||
createdAt: {
|
||||
gte: new Date(Date.now() - 60 * 60 * 1000)
|
||||
}
|
||||
}
|
||||
}),
|
||||
prisma.channel.count({ where: { status: "DISCONNECTED" } })
|
||||
]);
|
||||
|
||||
if (!retryState) {
|
||||
retries = {
|
||||
status: "degraded",
|
||||
message: "retry worker state not initialized"
|
||||
};
|
||||
} else {
|
||||
const staleSince = new Date(Date.now() - staleThresholdMinutes * 60 * 1000);
|
||||
const isStaleLastRun = retryState.lastRunCompletedAt && retryState.lastRunCompletedAt < staleSince;
|
||||
const shouldBeDown = retryState.lastRunStatus === "failed" && (retryState.consecutiveFailures ?? 0) >= 3;
|
||||
|
||||
if (shouldBeDown) {
|
||||
retries = {
|
||||
status: "down",
|
||||
message: "retry worker in repeated failure state",
|
||||
meta: {
|
||||
status: retryState.lastRunStatus,
|
||||
consecutiveFailures: retryState.consecutiveFailures
|
||||
}
|
||||
};
|
||||
} else if (isStaleLastRun) {
|
||||
retries = {
|
||||
status: "degraded",
|
||||
message: "retry worker hasn't completed a run recently",
|
||||
meta: {
|
||||
lastRunCompletedAt: retryState.lastRunCompletedAt?.toISOString() ?? null,
|
||||
staleMinutes: staleThresholdMinutes
|
||||
}
|
||||
};
|
||||
} else {
|
||||
retries = {
|
||||
status: "ok",
|
||||
message: `retry worker status: ${retryState.lastRunStatus ?? "unknown"}`,
|
||||
meta: {
|
||||
consecutiveFailures: retryState.consecutiveFailures ?? 0
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (webhookFailureCount > failureThreshold) {
|
||||
webhook = {
|
||||
status: "degraded",
|
||||
message: `high webhook failure volume: ${webhookFailureCount} in 60m`,
|
||||
meta: { count: webhookFailureCount, threshold: failureThreshold }
|
||||
};
|
||||
} else if (disconnectedChannels > 0) {
|
||||
webhook = {
|
||||
status: "degraded",
|
||||
message: `disconnected channels: ${disconnectedChannels}`,
|
||||
meta: { disconnectedChannels }
|
||||
};
|
||||
}
|
||||
} else {
|
||||
retries = {
|
||||
status: "down",
|
||||
message: "skipped due to database not available"
|
||||
};
|
||||
webhook = {
|
||||
status: "down",
|
||||
message: "skipped due to database not available"
|
||||
};
|
||||
}
|
||||
|
||||
checks.retries = retries;
|
||||
checks.webhook = webhook;
|
||||
|
||||
const components = Object.entries(checks);
|
||||
const overall: "ok" | "degraded" | "down" = isUp([checks.database, checks.retries, checks.webhook]) ? "ok" : checks.database.status === "down" ? "down" : "degraded";
|
||||
const exposeDetails = maybeExposeDetails(req);
|
||||
const payload = {
|
||||
ok: overall !== "down",
|
||||
status: overall,
|
||||
components: exposeDetails
|
||||
? checks
|
||||
: Object.fromEntries(components.map(([name, item]) => [name, { status: item.status, message: item.message }])),
|
||||
timestamp: new Date().toISOString()
|
||||
};
|
||||
|
||||
return NextResponse.json(payload, { status: overall === "down" ? 503 : 200 });
|
||||
}
|
||||
Reference in New Issue
Block a user