feat: model rewrite
This commit is contained in:
parent
ac2006c206
commit
1dd5a38aa8
30 changed files with 1702 additions and 58 deletions
|
|
@ -76,8 +76,10 @@ pnpm run bench # 벤치마크 실행
|
|||
| `OIDC_REDIRECT_URI` | empty | OIDC callback URL |
|
||||
| `OIDC_ALLOWED_EMAILS` | empty | 관리자 접근을 허용할 이메일 목록 |
|
||||
| `OIDC_SCOPES` | `openid profile email` | OIDC authorization scope |
|
||||
| `MODEL_CATALOG_REFRESH_MIN_MS` | `300000` 예시 | 모델 카탈로그 refresh 최소 간격(ms) |
|
||||
|
||||
## Detailed Docs
|
||||
관련 기능을 수정하기 전에 해당 문서를 반드시 먼저 읽으세요.
|
||||
|
||||
클라이언트 중심
|
||||
- [docs/client.md](docs/client.md) — 클라이언트 구조, `/dashboard` 라우팅, 관리자 UI 동작
|
||||
|
|
@ -87,6 +89,7 @@ pnpm run bench # 벤치마크 실행
|
|||
|
||||
서버 중심
|
||||
- [docs/server.md](docs/server.md) — 서버 구조, 서비스, 모델, 의존성
|
||||
- [docs/model-routing.md](docs/model-routing.md) — 모델 카탈로그, force/fallback rewrite, 캐시 갱신 규칙
|
||||
- [docs/database.md](docs/database.md) — DB 테이블 스키마 전체
|
||||
- [docs/api.md](docs/api.md) — API 엔드포인트 레퍼런스
|
||||
- [docs/k8s-traefik.md](docs/k8s-traefik.md) — Traefik path 기반 내부망 제어 예시
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ const Backends = lazy(() => import('./routes/Backends').then((module) => ({ defa
|
|||
const Permissions = lazy(() => import('./routes/Permissions').then((module) => ({ default: module.Permissions })));
|
||||
const Analytics = lazy(() => import('./routes/Analytics').then((module) => ({ default: module.Analytics })));
|
||||
const DetailLogs = lazy(() => import('./routes/DetailLogs').then((module) => ({ default: module.DetailLogs })));
|
||||
const Models = lazy(() => import('./routes/Models').then((module) => ({ default: module.Models })));
|
||||
const Scripts = lazy(() => import('./routes/Scripts').then((module) => ({ default: module.Scripts })));
|
||||
|
||||
function RouteLoadingFallback() {
|
||||
|
|
@ -40,6 +41,7 @@ function AuthenticatedApp() {
|
|||
<Route path="/backends" component={Backends} />
|
||||
<Route path="/permissions" component={Permissions} />
|
||||
<Route path="/analytics" component={Analytics} />
|
||||
<Route path="/models" component={Models} />
|
||||
<Route path="/detail-logs" component={DetailLogs} />
|
||||
<Route path="/scripts" component={Scripts} />
|
||||
</Router>
|
||||
|
|
|
|||
|
|
@ -1,6 +1,9 @@
|
|||
import type {
|
||||
User,
|
||||
Backend,
|
||||
BackendModelsResponse,
|
||||
ModelCacheOverview,
|
||||
ModelRewriteRule,
|
||||
Permission,
|
||||
RequestLogPage,
|
||||
UsageStats,
|
||||
|
|
@ -100,6 +103,9 @@ export const api = {
|
|||
backends: {
|
||||
getAll: (): Promise<Backend[]> => fetchJson<Backend[]>(`${API_BASE}/admin/backends`),
|
||||
getById: (id: number): Promise<Backend> => fetchJson<Backend>(`${API_BASE}/admin/backends/${id}`),
|
||||
getModels: (id: number): Promise<BackendModelsResponse> => fetchJson<BackendModelsResponse>(`${API_BASE}/admin/backends/${id}/models`),
|
||||
refreshModels: (id: number): Promise<BackendModelsResponse> =>
|
||||
fetchJson<BackendModelsResponse>(`${API_BASE}/admin/backends/${id}/models/refresh`, { method: 'POST' }),
|
||||
create: (data: { name: string; base_url: string; api_key?: string; detail_logging?: boolean }): Promise<Backend> =>
|
||||
fetchJson<Backend>(`${API_BASE}/admin/backends`, { method: 'POST', body: JSON.stringify(data) }),
|
||||
update: (id: number, data: Partial<Backend>): Promise<Backend> =>
|
||||
|
|
@ -120,6 +126,20 @@ export const api = {
|
|||
fetchJson<void>(`${API_BASE}/admin/permissions?user_id=${userId}&backend_id=${backendId}`, { method: 'DELETE' }),
|
||||
},
|
||||
|
||||
modelRewrites: {
|
||||
getAll: (): Promise<ModelRewriteRule[]> => fetchJson<ModelRewriteRule[]>(`${API_BASE}/admin/model-rewrites`),
|
||||
create: (data: { source_model: string; target_model: string; is_active?: boolean; force?: boolean; note?: string }): Promise<ModelRewriteRule> =>
|
||||
fetchJson<ModelRewriteRule>(`${API_BASE}/admin/model-rewrites`, { method: 'POST', body: JSON.stringify(data) }),
|
||||
update: (id: number, data: Partial<ModelRewriteRule>): Promise<ModelRewriteRule> =>
|
||||
fetchJson<ModelRewriteRule>(`${API_BASE}/admin/model-rewrites/${id}`, { method: 'PUT', body: JSON.stringify(data) }),
|
||||
delete: (id: number): Promise<void> =>
|
||||
fetchJson<void>(`${API_BASE}/admin/model-rewrites/${id}`, { method: 'DELETE' }),
|
||||
},
|
||||
|
||||
modelCache: {
|
||||
getOverview: (): Promise<ModelCacheOverview> => fetchJson<ModelCacheOverview>(`${API_BASE}/admin/models/cache`),
|
||||
},
|
||||
|
||||
scripts: {
|
||||
getAll: (): Promise<UserScript[]> => fetchJson<UserScript[]>(`${API_BASE}/admin/scripts`),
|
||||
getById: (id: number): Promise<UserScript> => fetchJson<UserScript>(`${API_BASE}/admin/scripts/${id}`),
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
import { createResource, createSignal, Show, type Component } from 'solid-js';
|
||||
import { For, createResource, createSignal, Show, type Component } from 'solid-js';
|
||||
import Pencil from 'lucide-solid/icons/pencil';
|
||||
import Plus from 'lucide-solid/icons/plus';
|
||||
import RefreshCw from 'lucide-solid/icons/refresh-cw';
|
||||
import Trash2 from 'lucide-solid/icons/trash-2';
|
||||
import { api } from '../api/client';
|
||||
import { Layout } from '../components/Layout';
|
||||
import type { Backend } from '../types';
|
||||
import type { Backend, BackendModelsResponse } from '../types';
|
||||
import {
|
||||
Alert,
|
||||
Button,
|
||||
|
|
@ -45,6 +46,34 @@ export const Backends: Component = () => {
|
|||
const [form, setForm] = createSignal<BackendFormState>(emptyForm());
|
||||
const [submitting, setSubmitting] = createSignal(false);
|
||||
const [notice, setNotice] = createSignal<{ tone: 'success' | 'danger'; message: string } | null>(null);
|
||||
const [expandedBackendId, setExpandedBackendId] = createSignal<number | null>(null);
|
||||
const [backendModels, setBackendModels] = createSignal<Record<number, BackendModelsResponse>>({});
|
||||
|
||||
const modelStateTone = (backend: Backend): 'success' | 'warning' | 'danger' | 'neutral' => {
|
||||
switch (backend.model_cache_state) {
|
||||
case 'ready':
|
||||
return 'success';
|
||||
case 'error':
|
||||
return 'danger';
|
||||
case 'inactive':
|
||||
return 'neutral';
|
||||
default:
|
||||
return 'warning';
|
||||
}
|
||||
};
|
||||
|
||||
const modelStateLabel = (backend: Backend): string => {
|
||||
switch (backend.model_cache_state) {
|
||||
case 'ready':
|
||||
return 'Cached';
|
||||
case 'error':
|
||||
return 'Error';
|
||||
case 'inactive':
|
||||
return 'Skipped';
|
||||
default:
|
||||
return 'Pending';
|
||||
}
|
||||
};
|
||||
|
||||
const openCreateDialog = () => {
|
||||
setEditingBackend(null);
|
||||
|
|
@ -127,6 +156,37 @@ export const Backends: Component = () => {
|
|||
}
|
||||
};
|
||||
|
||||
const toggleDetails = async (backend: Backend) => {
|
||||
const isClosing = expandedBackendId() === backend.id;
|
||||
setExpandedBackendId(isClosing ? null : backend.id);
|
||||
if (isClosing || backendModels()[backend.id]) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const detail = await api.backends.getModels(backend.id);
|
||||
setBackendModels((current) => ({ ...current, [backend.id]: detail }));
|
||||
} catch (error) {
|
||||
setNotice({ tone: 'danger', message: error instanceof Error ? error.message : 'Failed to load backend models.' });
|
||||
}
|
||||
};
|
||||
|
||||
const refreshModels = async (backend: Backend) => {
|
||||
if (!backend.is_active) return;
|
||||
|
||||
setSubmitting(true);
|
||||
try {
|
||||
const detail = await api.backends.refreshModels(backend.id);
|
||||
setBackendModels((current) => ({ ...current, [backend.id]: detail }));
|
||||
setNotice({ tone: 'success', message: `${backend.name} model cache refreshed.` });
|
||||
await refetch();
|
||||
} catch (error) {
|
||||
setNotice({ tone: 'danger', message: error instanceof Error ? error.message : 'Model refresh failed.' });
|
||||
} finally {
|
||||
setSubmitting(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<Layout>
|
||||
<div class="ui-app-page">
|
||||
|
|
@ -171,6 +231,16 @@ export const Backends: Component = () => {
|
|||
header: 'Detail Log',
|
||||
cell: (backend) => <StatusBadge tone={backend.detail_logging ? 'warning' : 'neutral'}>{backend.detail_logging ? 'On' : 'Off'}</StatusBadge>,
|
||||
},
|
||||
{
|
||||
id: 'model_cache',
|
||||
header: 'Model Cache',
|
||||
cell: (backend) => <StatusBadge tone={modelStateTone(backend)}>{modelStateLabel(backend)}</StatusBadge>,
|
||||
},
|
||||
{
|
||||
id: 'model_count',
|
||||
header: 'Models',
|
||||
cell: (backend) => <span>{backend.cached_model_count ?? 0}</span>,
|
||||
},
|
||||
{
|
||||
id: 'status',
|
||||
header: 'Status',
|
||||
|
|
@ -181,11 +251,47 @@ export const Backends: Component = () => {
|
|||
loading={backends.loading}
|
||||
rowActions={(backend) => (
|
||||
<div class="ui-row-actions">
|
||||
<IconButton
|
||||
icon={<RefreshCw />}
|
||||
label="Refresh Models"
|
||||
disabled={!backend.is_active || submitting()}
|
||||
onClick={() => void refreshModels(backend)}
|
||||
/>
|
||||
<IconButton icon={<Pencil />} label="Edit" onClick={() => openEditDialog(backend)} />
|
||||
<Button onClick={() => void toggleDetails(backend)}>{expandedBackendId() === backend.id ? 'Hide Models' : 'View Models'}</Button>
|
||||
<IconButton variant="danger" icon={<Trash2 />} label="Delete" onClick={() => requestDelete(backend)} />
|
||||
</div>
|
||||
)}
|
||||
/>
|
||||
<Show when={expandedBackendId()}>
|
||||
{(backendId) => {
|
||||
const detail = () => backendModels()[backendId()];
|
||||
return (
|
||||
<Panel
|
||||
title={`Backend ${backendId()} Models`}
|
||||
description={detail()?.cache.state === 'inactive' ? 'Inactive backends skip model fetches and only keep the last DB snapshot.' : 'Live cache state and last persisted model snapshot.'}
|
||||
>
|
||||
<div class="ui-stack ui-stack--tight">
|
||||
<Show when={detail()} fallback={<EmptyState title="Loading models" description="Reading cached model information for this backend." />}>
|
||||
<Alert tone={detail()!.cache.last_error ? 'danger' : 'success'}>
|
||||
{detail()!.cache.last_error
|
||||
? `Last error: ${detail()!.cache.last_error}`
|
||||
: `State: ${detail()!.cache.state}, models: ${detail()!.cache.model_count}, last sync: ${detail()!.cache.last_synced_at ?? 'never'}`}
|
||||
</Alert>
|
||||
<Show
|
||||
when={detail()!.models.length > 0}
|
||||
fallback={<EmptyState title="No cached models" description="This backend has not published any models yet or the last refresh failed." />}
|
||||
>
|
||||
<div class="ui-chip-row">
|
||||
<For each={detail()!.models}>{(modelId) => <StatusBadge tone="neutral">{modelId}</StatusBadge>}</For>
|
||||
</div>
|
||||
</Show>
|
||||
</Show>
|
||||
</div>
|
||||
</Panel>
|
||||
);
|
||||
}}
|
||||
</Show>
|
||||
</Show>
|
||||
</Show>
|
||||
</Panel>
|
||||
|
|
|
|||
263
client/src/routes/Models.tsx
Normal file
263
client/src/routes/Models.tsx
Normal file
|
|
@ -0,0 +1,263 @@
|
|||
import { createResource, createSignal, For, Show, type Component } from 'solid-js';
|
||||
import Pencil from 'lucide-solid/icons/pencil';
|
||||
import Plus from 'lucide-solid/icons/plus';
|
||||
import Trash2 from 'lucide-solid/icons/trash-2';
|
||||
import { api } from '../api/client';
|
||||
import { Layout } from '../components/Layout';
|
||||
import type { ModelRewriteRule } from '../types';
|
||||
import {
|
||||
Alert,
|
||||
Button,
|
||||
Checkbox,
|
||||
ConfirmDialog,
|
||||
DataGrid,
|
||||
EmptyState,
|
||||
FormDialog,
|
||||
IconButton,
|
||||
PageHeader,
|
||||
Panel,
|
||||
StatusBadge,
|
||||
SummaryStrip,
|
||||
TextField,
|
||||
} from '../ui';
|
||||
|
||||
interface RewriteFormState {
|
||||
source_model: string;
|
||||
target_model: string;
|
||||
is_active: boolean;
|
||||
force: boolean;
|
||||
note: string;
|
||||
}
|
||||
|
||||
const emptyForm = (): RewriteFormState => ({
|
||||
source_model: '',
|
||||
target_model: '',
|
||||
is_active: true,
|
||||
force: false,
|
||||
note: '',
|
||||
});
|
||||
|
||||
export const Models: Component = () => {
|
||||
const [overview, { refetch: refetchOverview }] = createResource(() => api.modelCache.getOverview());
|
||||
const [rules, { refetch: refetchRules }] = createResource(() => api.modelRewrites.getAll());
|
||||
const [dialogOpen, setDialogOpen] = createSignal(false);
|
||||
const [confirmOpen, setConfirmOpen] = createSignal(false);
|
||||
const [editingRule, setEditingRule] = createSignal<ModelRewriteRule | null>(null);
|
||||
const [pendingDeleteRule, setPendingDeleteRule] = createSignal<ModelRewriteRule | null>(null);
|
||||
const [form, setForm] = createSignal<RewriteFormState>(emptyForm());
|
||||
const [submitting, setSubmitting] = createSignal(false);
|
||||
const [notice, setNotice] = createSignal<{ tone: 'success' | 'danger'; message: string } | null>(null);
|
||||
|
||||
const openCreateDialog = () => {
|
||||
setEditingRule(null);
|
||||
setForm(emptyForm());
|
||||
setDialogOpen(true);
|
||||
};
|
||||
|
||||
const openEditDialog = (rule: ModelRewriteRule) => {
|
||||
setEditingRule(rule);
|
||||
setForm({
|
||||
source_model: rule.source_model,
|
||||
target_model: rule.target_model,
|
||||
is_active: rule.is_active,
|
||||
force: rule.force,
|
||||
note: rule.note ?? '',
|
||||
});
|
||||
setDialogOpen(true);
|
||||
};
|
||||
|
||||
const saveRule = async (event: Event) => {
|
||||
event.preventDefault();
|
||||
const current = form();
|
||||
if (!current.source_model.trim() || !current.target_model.trim()) {
|
||||
setNotice({ tone: 'danger', message: 'Source and target model are required.' });
|
||||
return;
|
||||
}
|
||||
|
||||
setSubmitting(true);
|
||||
try {
|
||||
if (editingRule()) {
|
||||
await api.modelRewrites.update(editingRule()!.id, {
|
||||
source_model: current.source_model.trim(),
|
||||
target_model: current.target_model.trim(),
|
||||
is_active: current.is_active,
|
||||
force: current.force,
|
||||
note: current.note.trim() || undefined,
|
||||
});
|
||||
setNotice({ tone: 'success', message: 'Model rule updated.' });
|
||||
} else {
|
||||
await api.modelRewrites.create({
|
||||
source_model: current.source_model.trim(),
|
||||
target_model: current.target_model.trim(),
|
||||
is_active: current.is_active,
|
||||
force: current.force,
|
||||
note: current.note.trim() || undefined,
|
||||
});
|
||||
setNotice({ tone: 'success', message: 'Model rule created.' });
|
||||
}
|
||||
setDialogOpen(false);
|
||||
setEditingRule(null);
|
||||
setForm(emptyForm());
|
||||
await Promise.all([refetchRules(), refetchOverview()]);
|
||||
} catch (error) {
|
||||
setNotice({ tone: 'danger', message: error instanceof Error ? error.message : 'Model rule save failed.' });
|
||||
} finally {
|
||||
setSubmitting(false);
|
||||
}
|
||||
};
|
||||
|
||||
const deleteRule = async () => {
|
||||
const current = pendingDeleteRule();
|
||||
if (!current) return;
|
||||
|
||||
setSubmitting(true);
|
||||
try {
|
||||
await api.modelRewrites.delete(current.id);
|
||||
setNotice({ tone: 'success', message: `${current.source_model} removed.` });
|
||||
setConfirmOpen(false);
|
||||
setPendingDeleteRule(null);
|
||||
await Promise.all([refetchRules(), refetchOverview()]);
|
||||
} catch (error) {
|
||||
setNotice({ tone: 'danger', message: error instanceof Error ? error.message : 'Model rule deletion failed.' });
|
||||
} finally {
|
||||
setSubmitting(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<Layout>
|
||||
<div class="ui-app-page">
|
||||
<PageHeader
|
||||
title="Models"
|
||||
description="Inspect cached backend model catalogs and manage global model rewrite rules."
|
||||
actions={<Button onClick={() => void Promise.all([refetchOverview(), refetchRules()])}>Refresh</Button>}
|
||||
/>
|
||||
|
||||
<SummaryStrip
|
||||
items={[
|
||||
{ label: 'Catalog Models', value: overview()?.models.length ?? 0, hint: 'Unique models across active backends' },
|
||||
{ label: 'Tracked Backends', value: overview()?.backends.length ?? 0, hint: 'Memory cache status by backend' },
|
||||
{ label: 'Rewrite Rules', value: rules()?.length ?? 0, hint: 'Global source -> target mappings' },
|
||||
]}
|
||||
/>
|
||||
|
||||
<Show when={notice()}>
|
||||
{(currentNotice) => <Alert tone={currentNotice().tone}>{currentNotice().message}</Alert>}
|
||||
</Show>
|
||||
|
||||
<Panel title="Model Cache Overview" description="Memory-backed catalog state used by request routing and `/v1/models`.">
|
||||
<Show
|
||||
when={(overview()?.backends.length ?? 0) > 0}
|
||||
fallback={<EmptyState title="No backend cache yet" description="Backend model states appear here after the server has seen active backends." />}
|
||||
>
|
||||
<DataGrid
|
||||
rows={overview()?.backends ?? []}
|
||||
columns={[
|
||||
{ id: 'backend_id', header: 'Backend', mono: true, cell: (item) => <span>{item.backend_id}</span> },
|
||||
{ id: 'state', header: 'State', cell: (item) => <StatusBadge tone={item.state === 'ready' ? 'success' : item.state === 'error' ? 'danger' : item.state === 'inactive' ? 'neutral' : 'warning'}>{item.state}</StatusBadge> },
|
||||
{ id: 'model_count', header: 'Models', cell: (item) => <span>{item.model_count}</span> },
|
||||
{ id: 'last_synced_at', header: 'Last Sync', cell: (item) => <span>{item.last_synced_at ? new Date(item.last_synced_at).toLocaleString() : '-'}</span> },
|
||||
{ id: 'last_error', header: 'Last Error', cell: (item) => <span title={item.last_error ?? '-'}>{item.last_error ?? '-'}</span> },
|
||||
]}
|
||||
getRowKey={(item) => item.backend_id}
|
||||
loading={overview.loading}
|
||||
/>
|
||||
<Show when={(overview()?.models.length ?? 0) > 0}>
|
||||
<div class="ui-chip-row">
|
||||
<For each={overview()?.models ?? []}>
|
||||
{(entry) => <StatusBadge tone="neutral">{`${entry.model_id} (${entry.backend_ids.length})`}</StatusBadge>}
|
||||
</For>
|
||||
</div>
|
||||
</Show>
|
||||
</Show>
|
||||
</Panel>
|
||||
|
||||
<Panel
|
||||
title="Model Rewrite Rules"
|
||||
description="Force rules always rewrite. Fallback rules rewrite only when the original model has no usable backend."
|
||||
>
|
||||
<div class="ui-stack ui-stack--tight">
|
||||
<div class="ui-row-actions">
|
||||
<IconButton variant="primary" icon={<Plus />} label="Add Rule" onClick={openCreateDialog} />
|
||||
</div>
|
||||
<Show
|
||||
when={(rules()?.length ?? 0) > 0}
|
||||
fallback={<EmptyState title="No rewrite rules" description="Requests currently route using the original model name." />}
|
||||
>
|
||||
<DataGrid
|
||||
rows={rules() ?? []}
|
||||
columns={[
|
||||
{ id: 'source_model', header: 'Source', cell: (rule) => <span>{rule.source_model}</span> },
|
||||
{ id: 'target_model', header: 'Target', cell: (rule) => <span>{rule.target_model}</span> },
|
||||
{ id: 'mode', header: 'Mode', cell: (rule) => <StatusBadge tone={rule.force ? 'warning' : 'neutral'}>{rule.force ? 'Force' : 'Fallback'}</StatusBadge> },
|
||||
{ id: 'is_active', header: 'Status', cell: (rule) => <StatusBadge tone={rule.is_active ? 'success' : 'warning'}>{rule.is_active ? 'Active' : 'Inactive'}</StatusBadge> },
|
||||
{ id: 'note', header: 'Note', cell: (rule) => <span title={rule.note ?? '-'}>{rule.note ?? '-'}</span> },
|
||||
]}
|
||||
getRowKey={(rule) => rule.id}
|
||||
loading={rules.loading}
|
||||
rowActions={(rule) => (
|
||||
<div class="ui-row-actions">
|
||||
<IconButton icon={<Pencil />} label="Edit" onClick={() => openEditDialog(rule)} />
|
||||
<IconButton
|
||||
variant="danger"
|
||||
icon={<Trash2 />}
|
||||
label="Delete"
|
||||
onClick={() => {
|
||||
setPendingDeleteRule(rule);
|
||||
setConfirmOpen(true);
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
/>
|
||||
</Show>
|
||||
</div>
|
||||
</Panel>
|
||||
|
||||
<FormDialog
|
||||
open={dialogOpen()}
|
||||
onOpenChange={setDialogOpen}
|
||||
title={editingRule() ? 'Edit Model Rule' : 'Add Model Rule'}
|
||||
description="Choose whether the target model should always replace the source, or only act as a fallback when the source is unavailable."
|
||||
footer={
|
||||
<>
|
||||
<Button onClick={() => setDialogOpen(false)} disabled={submitting()}>Cancel</Button>
|
||||
<Button type="submit" form="model-rule-form" variant="primary" disabled={submitting()}>
|
||||
{editingRule() ? 'Save Changes' : 'Create Rule'}
|
||||
</Button>
|
||||
</>
|
||||
}
|
||||
>
|
||||
<form id="model-rule-form" class="ui-form" onSubmit={(event) => void saveRule(event)}>
|
||||
<TextField label="Source Model" value={form().source_model} onInput={(event) => setForm((current) => ({ ...current, source_model: event.currentTarget.value }))} />
|
||||
<TextField label="Target Model" value={form().target_model} onInput={(event) => setForm((current) => ({ ...current, target_model: event.currentTarget.value }))} />
|
||||
<TextField label="Note" value={form().note} onInput={(event) => setForm((current) => ({ ...current, note: event.currentTarget.value }))} />
|
||||
<Checkbox
|
||||
label="Always force rewrite"
|
||||
description="When enabled, requests always route to the target model. When disabled, the target model is only used as a fallback."
|
||||
checked={form().force}
|
||||
onChange={(checked) => setForm((current) => ({ ...current, force: checked }))}
|
||||
/>
|
||||
<Checkbox
|
||||
label="Rule is active"
|
||||
description="Inactive rules stay stored but do not affect request routing."
|
||||
checked={form().is_active}
|
||||
onChange={(checked) => setForm((current) => ({ ...current, is_active: checked }))}
|
||||
/>
|
||||
</form>
|
||||
</FormDialog>
|
||||
|
||||
<ConfirmDialog
|
||||
open={confirmOpen()}
|
||||
onOpenChange={setConfirmOpen}
|
||||
title="Delete rewrite rule"
|
||||
description="Removing the rule stops rewriting requests that target this source model."
|
||||
confirmLabel="Delete Rule"
|
||||
tone="danger"
|
||||
busy={submitting()}
|
||||
onConfirm={() => void deleteRule()}
|
||||
/>
|
||||
</div>
|
||||
</Layout>
|
||||
);
|
||||
};
|
||||
|
|
@ -18,6 +18,58 @@ export type Backend = {
|
|||
detail_logging: boolean;
|
||||
created_at: string;
|
||||
updated_at: string;
|
||||
cached_model_count?: number;
|
||||
last_model_sync_at?: string;
|
||||
model_cache_initialized?: boolean;
|
||||
model_cache_state?: 'ready' | 'uninitialized' | 'error' | 'inactive';
|
||||
};
|
||||
|
||||
export type BackendModelSnapshot = {
|
||||
id: number;
|
||||
backend_id: number;
|
||||
model_id: string;
|
||||
raw_json?: string;
|
||||
fetched_at: string;
|
||||
created_at: string;
|
||||
updated_at: string;
|
||||
};
|
||||
|
||||
export type BackendModelCacheStatus = {
|
||||
backend_id: number;
|
||||
initialized: boolean;
|
||||
state: 'ready' | 'uninitialized' | 'error' | 'inactive';
|
||||
model_count: number;
|
||||
last_synced_at?: string;
|
||||
last_attempted_at?: string;
|
||||
last_error?: string;
|
||||
};
|
||||
|
||||
export type BackendModelsResponse = {
|
||||
backend: Backend;
|
||||
cache: BackendModelCacheStatus;
|
||||
snapshots: BackendModelSnapshot[];
|
||||
models: string[];
|
||||
};
|
||||
|
||||
export type BackendModelCatalogEntry = {
|
||||
model_id: string;
|
||||
backend_ids: number[];
|
||||
};
|
||||
|
||||
export type ModelCacheOverview = {
|
||||
backends: BackendModelCacheStatus[];
|
||||
models: BackendModelCatalogEntry[];
|
||||
};
|
||||
|
||||
export type ModelRewriteRule = {
|
||||
id: number;
|
||||
source_model: string;
|
||||
target_model: string;
|
||||
is_active: boolean;
|
||||
force: boolean;
|
||||
note?: string;
|
||||
created_at: string;
|
||||
updated_at: string;
|
||||
};
|
||||
|
||||
export type Permission = {
|
||||
|
|
@ -33,6 +85,7 @@ export type RequestLog = {
|
|||
backend_id: number;
|
||||
endpoint: string;
|
||||
request_model?: string;
|
||||
routed_model?: string;
|
||||
response_model?: string;
|
||||
prompt_tokens?: number;
|
||||
completion_tokens?: number;
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import FileCode from 'lucide-solid/icons/file-code';
|
|||
import LayoutDashboard from 'lucide-solid/icons/layout-dashboard';
|
||||
import LogOut from 'lucide-solid/icons/log-out';
|
||||
import Logs from 'lucide-solid/icons/logs';
|
||||
import Network from 'lucide-solid/icons/network';
|
||||
import Moon from 'lucide-solid/icons/moon';
|
||||
import Server from 'lucide-solid/icons/server';
|
||||
import ShieldCheck from 'lucide-solid/icons/shield-check';
|
||||
|
|
@ -22,6 +23,7 @@ const navItems = [
|
|||
{ path: '/backends', label: 'Backends', icon: Server },
|
||||
{ path: '/permissions', label: 'Permissions', icon: ShieldCheck },
|
||||
{ path: '/analytics', label: 'Analytics', icon: ChartColumn },
|
||||
{ path: '/models', label: 'Models', icon: Network },
|
||||
{ path: '/detail-logs', label: 'Detail Logs', icon: Logs },
|
||||
{ path: '/scripts', label: 'Scripts', icon: FileCode },
|
||||
];
|
||||
|
|
|
|||
|
|
@ -150,6 +150,12 @@
|
|||
gap: var(--space-2);
|
||||
}
|
||||
|
||||
.ui-chip-row {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: var(--space-2);
|
||||
}
|
||||
|
||||
.ui-section-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ CREATE TABLE IF NOT EXISTS request_logs (
|
|||
backend_id INTEGER NOT NULL,
|
||||
endpoint TEXT NOT NULL,
|
||||
request_model TEXT,
|
||||
routed_model TEXT,
|
||||
response_model TEXT,
|
||||
prompt_tokens INTEGER,
|
||||
completion_tokens INTEGER,
|
||||
|
|
@ -26,4 +27,3 @@ CREATE INDEX IF NOT EXISTS idx_request_logs_user ON request_logs(user_id);
|
|||
CREATE INDEX IF NOT EXISTS idx_request_logs_backend ON request_logs(backend_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_request_logs_endpoint ON request_logs(endpoint);
|
||||
CREATE INDEX IF NOT EXISTS idx_request_logs_detail_logged ON request_logs(detail_logged);
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,34 @@ CREATE TABLE IF NOT EXISTS backends (
|
|||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
-- Backend model snapshots (offline/admin visibility only)
|
||||
CREATE TABLE IF NOT EXISTS backend_models (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
backend_id INTEGER NOT NULL,
|
||||
model_id TEXT NOT NULL,
|
||||
raw_json TEXT,
|
||||
fetched_at TEXT NOT NULL,
|
||||
created_at TEXT NOT NULL,
|
||||
updated_at TEXT NOT NULL,
|
||||
FOREIGN KEY (backend_id) REFERENCES backends(id) ON DELETE CASCADE,
|
||||
UNIQUE(backend_id, model_id)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_backend_models_backend ON backend_models(backend_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_backend_models_model ON backend_models(model_id);
|
||||
|
||||
-- Global model rewrite rules
|
||||
CREATE TABLE IF NOT EXISTS model_rewrites (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
source_model TEXT UNIQUE NOT NULL,
|
||||
target_model TEXT NOT NULL,
|
||||
is_active BOOLEAN DEFAULT 1,
|
||||
force BOOLEAN DEFAULT 0,
|
||||
note TEXT,
|
||||
created_at TEXT NOT NULL,
|
||||
updated_at TEXT NOT NULL
|
||||
);
|
||||
|
||||
-- Permissions table (many-to-many: users ↔ backends)
|
||||
CREATE TABLE IF NOT EXISTS permissions (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
|
|
|
|||
28
docs/api.md
28
docs/api.md
|
|
@ -17,11 +17,17 @@
|
|||
|
||||
| Method | Path | Description |
|
||||
|--------|------|-------------|
|
||||
| POST | `/v1/chat/completions` | Chat completions 프록시 (스크립트 적용, 분석 로깅) |
|
||||
| GET | `/v1/models` | 사용 가능한 모델 목록 |
|
||||
| POST | `/v1/chat/completions` | Chat completions 프록시 (모델 카탈로그 기반 라우팅, 스크립트 적용, 분석 로깅) |
|
||||
| GET | `/v1/models` | 허용 가능한 활성 백엔드들의 캐시된 모델 목록 합집합 |
|
||||
|
||||
`/v1/**`는 기존 사용자 API 키 인증을 유지하며 관리자 인증과 분리된다.
|
||||
|
||||
추가 동작:
|
||||
- `/v1/chat/completions` 는 요청 모델명을 먼저 전역 rewrite 규칙으로 해석한 뒤, 최종 모델을 서빙하는 허용 가능한 활성 백엔드만 후보로 사용한다
|
||||
- `force=true` rewrite 는 항상 적용된다
|
||||
- `force=false` rewrite 는 원본 모델을 서빙하는 허용 가능한 활성 백엔드가 없을 때만 fallback 으로 적용된다
|
||||
- 최종 후보가 없으면 모델 미지원 오류를 반환하고 `request_model`, `routed_model` 을 함께 내려준다
|
||||
|
||||
## Admin API
|
||||
|
||||
`/admin/**`는 기본적으로 관리자 인증이 필요하다. 브라우저는 세션 쿠키, 자동화는 `Authorization: Bearer <admin_api_token>` 방식으로 접근한다.
|
||||
|
|
@ -61,6 +67,24 @@
|
|||
| GET | `/admin/backends/:id` | 백엔드 조회 |
|
||||
| PUT | `/admin/backends/:id` | 백엔드 수정 |
|
||||
| DELETE | `/admin/backends/:id` | 백엔드 삭제 |
|
||||
| GET | `/admin/backends/:id/models` | 백엔드별 모델 스냅샷 + 메모리 캐시 상태 조회 |
|
||||
| POST | `/admin/backends/:id/models/refresh` | 활성 백엔드 모델 캐시 강제 갱신 |
|
||||
|
||||
### Models
|
||||
|
||||
| Method | Path | Description |
|
||||
|--------|------|-------------|
|
||||
| GET | `/admin/models/cache` | 전체 메모리 모델 캐시 상태와 모델 집계 조회 |
|
||||
| GET | `/admin/model-rewrites` | 전역 모델 rewrite 규칙 목록 |
|
||||
| POST | `/admin/model-rewrites` | 전역 모델 rewrite 규칙 생성 (`force=true` 면 항상 rewrite, 아니면 fallback) |
|
||||
| PUT | `/admin/model-rewrites/:id` | 전역 모델 rewrite 규칙 수정 |
|
||||
| DELETE | `/admin/model-rewrites/:id` | 전역 모델 rewrite 규칙 삭제 |
|
||||
|
||||
`GET /admin/backends/:id/models` 응답에는 아래가 함께 포함된다.
|
||||
- `backend`: 백엔드 기본 정보 + 캐시 요약
|
||||
- `cache`: 메모리 캐시 상태 (`ready`, `uninitialized`, `error`, `inactive`)
|
||||
- `snapshots`: DB에 저장된 마지막 모델 스냅샷
|
||||
- `models`: 현재 메모리 캐시에 올라와 있는 모델 ID 목록
|
||||
|
||||
### Permissions
|
||||
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ client/src/
|
|||
Dashboard.tsx # 운영 요약, 최근 요청, 관리자 토큰 관리
|
||||
Users.tsx # 사용자 CRUD
|
||||
Backends.tsx # 백엔드 CRUD
|
||||
Models.tsx # 모델 캐시/리라이트 규칙 관리
|
||||
Permissions.tsx # 권한 매핑 관리
|
||||
Analytics.tsx # 분석 화면
|
||||
DetailLogs.tsx # 상세 요청 로그 탐색
|
||||
|
|
@ -38,6 +39,7 @@ client/src/
|
|||
| `/dashboard` | Dashboard | 시스템 개요 |
|
||||
| `/dashboard/users` | Users | 사용자 관리 |
|
||||
| `/dashboard/backends` | Backends | 백엔드 관리 |
|
||||
| `/dashboard/models` | Models | 모델 캐시/리라이트 관리 |
|
||||
| `/dashboard/permissions` | Permissions | 권한 관리 |
|
||||
| `/dashboard/analytics` | Analytics | 분석 대시보드 |
|
||||
| `/dashboard/detail-logs` | DetailLogs | 상세 요청 로그 탐색 |
|
||||
|
|
@ -63,3 +65,13 @@ SPA는 `/dashboard`를 라우터 base로 사용하고, 관리자 API는 계속 `
|
|||
- ENV 로그인과 OIDC 로그인을 함께 사용할 수 있다
|
||||
- 세션 기반 쓰기 요청에는 `X-CSRF-Token`이 자동 포함된다
|
||||
- 401 응답이 오면 UI는 로그인 상태로 되돌아간다
|
||||
|
||||
## Model Management UI
|
||||
|
||||
- `Backends` 화면은 백엔드별 모델 캐시 상태, 모델 수, 마지막 sync 상태를 표시한다
|
||||
- `Backends` 화면에서 활성 백엔드는 수동 refresh 와 캐시된 모델 목록 확인이 가능하다
|
||||
- 비활성 백엔드는 모델 조회를 시도하지 않으며 UI에서도 `Skipped` 상태로 표시된다
|
||||
- `Models` 화면은 전체 메모리 모델 카탈로그와 전역 모델 rewrite 규칙을 관리한다
|
||||
- rewrite 규칙은 2가지 모드를 가진다
|
||||
- `Force`: 원본 모델 사용 가능 여부와 관계없이 항상 target model 로 rewrite
|
||||
- `Fallback`: 원본 모델을 서빙하는 허용 가능한 활성 백엔드가 없을 때만 target model 로 rewrite
|
||||
|
|
|
|||
|
|
@ -36,6 +36,43 @@ Indexes: `idx_users_api_key(api_key)`
|
|||
| created_at | TIMESTAMP | DEFAULT CURRENT_TIMESTAMP |
|
||||
| updated_at | TIMESTAMP | DEFAULT CURRENT_TIMESTAMP |
|
||||
|
||||
### backend_models
|
||||
|
||||
백엔드가 마지막으로 광고한 모델 스냅샷. 요청 라우팅에는 사용하지 않고 관리자 조회와 오프라인 확인용으로만 사용한다.
|
||||
|
||||
| Column | Type | Constraints |
|
||||
|--------|------|-------------|
|
||||
| id | INTEGER | PRIMARY KEY AUTOINCREMENT |
|
||||
| backend_id | INTEGER | NOT NULL, FK → `backends(id)` |
|
||||
| model_id | TEXT | NOT NULL |
|
||||
| raw_json | TEXT | |
|
||||
| fetched_at | TEXT | NOT NULL |
|
||||
| created_at | TEXT | NOT NULL |
|
||||
| updated_at | TEXT | NOT NULL |
|
||||
|
||||
Unique: `(backend_id, model_id)`
|
||||
|
||||
Indexes: `idx_backend_models_backend(backend_id)`, `idx_backend_models_model(model_id)`
|
||||
|
||||
### model_rewrites
|
||||
|
||||
요청 모델명을 실제 라우팅 모델명으로 변환하는 전역 규칙.
|
||||
|
||||
| Column | Type | Constraints |
|
||||
|--------|------|-------------|
|
||||
| id | INTEGER | PRIMARY KEY AUTOINCREMENT |
|
||||
| source_model | TEXT | UNIQUE NOT NULL |
|
||||
| target_model | TEXT | NOT NULL |
|
||||
| is_active | BOOLEAN | DEFAULT 1 |
|
||||
| force | BOOLEAN | DEFAULT 0 |
|
||||
| note | TEXT | |
|
||||
| created_at | TEXT | NOT NULL |
|
||||
| updated_at | TEXT | NOT NULL |
|
||||
|
||||
의미:
|
||||
- `force = 1`: 항상 `target_model` 로 rewrite
|
||||
- `force = 0`: 원본 모델을 서빙하는 허용 가능한 활성 백엔드가 없을 때만 fallback rewrite
|
||||
|
||||
### permissions
|
||||
|
||||
`users`와 `backends`의 many-to-many 관계.
|
||||
|
|
@ -165,6 +202,7 @@ Indexes: `idx_backend_metrics_backend`, `idx_backend_metrics_date`
|
|||
| backend_id | INTEGER | NOT NULL |
|
||||
| endpoint | TEXT | NOT NULL |
|
||||
| request_model | TEXT | |
|
||||
| routed_model | TEXT | rewrite 후 실제 라우팅에 사용된 모델 |
|
||||
| response_model | TEXT | |
|
||||
| prompt_tokens | INTEGER | |
|
||||
| completion_tokens | INTEGER | |
|
||||
|
|
|
|||
57
docs/model-routing.md
Normal file
57
docs/model-routing.md
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
# Model Routing
|
||||
|
||||
모델 카탈로그와 rewrite 규칙을 이용해 `/v1/chat/completions` 와 `/v1/models` 의 동작을 결정하는 방식.
|
||||
|
||||
## Runtime Flow
|
||||
|
||||
`POST /v1/chat/completions`
|
||||
|
||||
1. 사용자 API 키 인증
|
||||
2. 사용자가 접근 가능한 backend id 목록 로드
|
||||
3. 접근 가능한 활성 백엔드 중 아직 메모리 카탈로그가 초기화되지 않은 백엔드만 `/v1/models` 로 lazy fetch
|
||||
4. 요청 `model` 에 대해 전역 `model_rewrites` 규칙 평가
|
||||
5. 최종 모델을 서빙하는 허용 가능한 활성 백엔드만 후보로 선택
|
||||
6. 후보 중 1개를 랜덤 선택 후 업스트림으로 포워딩
|
||||
|
||||
`GET /v1/models`
|
||||
|
||||
1. 사용자 API 키 인증
|
||||
2. 접근 가능한 활성 백엔드의 메모리 카탈로그를 확인
|
||||
3. 모델 ID 합집합을 반환
|
||||
|
||||
## Caching Rules
|
||||
|
||||
- 메모리 캐시는 요청 라우팅의 단일 소스다
|
||||
- `backend_models` 테이블은 관리자 조회와 오프라인 확인용 스냅샷이다
|
||||
- 비활성 백엔드는 어떤 트리거에서도 `/v1/models` 조회를 시도하지 않는다
|
||||
- 캐시 갱신 트리거는 아래와 같다
|
||||
- 서버 시작 시 활성 백엔드 초기화
|
||||
- 첫 요청 시 아직 초기화되지 않은 활성 백엔드 lazy fetch
|
||||
- 요청 실패 후 사용된 활성 백엔드 재동기화 시도
|
||||
- 관리자 백엔드 수정 후 강제 refresh
|
||||
- 관리자 수정 기반 refresh 를 제외한 나머지 갱신은 최소 refresh 간격을 따른다
|
||||
|
||||
## Rewrite Rules
|
||||
|
||||
전역 rewrite 규칙은 `model_rewrites` 테이블에 저장된다.
|
||||
|
||||
| Mode | Condition | Result |
|
||||
|------|-----------|--------|
|
||||
| `force=true` | 항상 | `source_model` 을 즉시 `target_model` 로 치환 |
|
||||
| `force=false` | 원본 모델 후보가 없을 때만 | `target_model` 을 fallback 으로 사용 |
|
||||
|
||||
해석 기준:
|
||||
- “원본 모델 후보가 있다”는 것은 사용자가 접근 가능하고 활성 상태이며, 메모리 카탈로그상 해당 모델을 서빙하는 백엔드가 하나 이상 있다는 뜻이다
|
||||
- 원본 모델 후보가 있으면 fallback 규칙은 무시된다
|
||||
- 최종 모델 후보가 없으면 라우터는 포워딩하지 않고 모델 미지원 오류를 반환한다
|
||||
|
||||
## Admin Surface
|
||||
|
||||
- `/admin/backends/:id/models`: 백엔드별 DB 스냅샷 + 메모리 캐시 상태
|
||||
- `/admin/backends/:id/models/refresh`: 활성 백엔드 강제 refresh
|
||||
- `/admin/models/cache`: 전체 메모리 카탈로그 상태
|
||||
- `/admin/model-rewrites`: 전역 rewrite 규칙 CRUD
|
||||
|
||||
관리자 UI:
|
||||
- `/dashboard/backends`: 백엔드별 캐시 상태, 모델 수, 수동 refresh, 모델 목록 확인
|
||||
- `/dashboard/models`: 전체 카탈로그와 rewrite 규칙 관리
|
||||
|
|
@ -19,6 +19,7 @@ server/src/
|
|||
analytics.ts # /admin/analytics 핸들러
|
||||
services/
|
||||
RouterService.ts # 백엔드 선택 및 포워딩
|
||||
ModelCatalogService.ts # 백엔드 모델 캐시, 모델 인덱스, rewrite/fallback 규칙
|
||||
AnalyticsService.ts # 사용량 집계 및 요청 로그 조회
|
||||
ScriptEngine.ts # 스크립트 오케스트레이션
|
||||
utils/
|
||||
|
|
@ -34,6 +35,8 @@ server/src/
|
|||
- `/admin/**` 는 관리자 API 표면을 유지한다
|
||||
- `/dashboard` 와 `/dashboard/**` 는 빌드된 관리자 SPA를 서빙한다
|
||||
- 빌드된 `client/dist` 가 있으면 관리자 UI를 함께 제공하고, 없으면 API 전용 모드처럼 동작한다
|
||||
- 서버 시작 시 활성 백엔드의 `/v1/models` 를 조회해 메모리 모델 카탈로그를 초기화한다
|
||||
- 라우팅은 DB가 아니라 메모리 모델 카탈로그를 사용하고, 비활성 백엔드는 모델 조회와 후보 선택에서 항상 제외된다
|
||||
|
||||
라우트 우선순위는 다음과 같다.
|
||||
|
||||
|
|
@ -49,8 +52,20 @@ server/src/
|
|||
|
||||
- `DB_DIR` 에 `core.db`, `analytics.db`, `request_logs/request_logs_YYYY-MM.db` 가 저장된다
|
||||
- `core.db` 에는 `admin_sessions`, `admin_api_tokens` 도 함께 저장된다
|
||||
- `core.db` 에는 `backend_models`, `model_rewrites` 도 저장된다
|
||||
- 시간 경계 계산은 `TZ` 기준이다
|
||||
|
||||
## Model Routing
|
||||
|
||||
- 요청 모델명은 먼저 전역 `model_rewrites` 규칙을 확인한다
|
||||
- `force=1` 규칙은 항상 `source_model -> target_model` 로 변환한다
|
||||
- `force=0` 규칙은 원본 모델을 서빙하는 허용 가능한 활성 백엔드가 없을 때만 fallback 으로 적용한다
|
||||
- 최종 모델을 서빙하는 허용 가능한 활성 백엔드가 없으면 `/v1/chat/completions` 는 모델 미지원 오류를 반환한다
|
||||
- `/v1/models` 는 허용 가능한 활성 백엔드들의 캐시된 모델 목록 합집합을 반환한다
|
||||
|
||||
참고:
|
||||
- 세부 라우팅 규칙과 캐시 트리거는 [docs/model-routing.md](./model-routing.md) 참고
|
||||
|
||||
## Deployment Notes
|
||||
|
||||
- 권장 런타임은 단일 OCI 이미지다
|
||||
|
|
|
|||
|
|
@ -5,6 +5,17 @@ import { ensureDir, getCoreDbPath } from './db-paths';
|
|||
|
||||
let db: Database.Database;
|
||||
|
||||
function hasColumn(database: Database.Database, tableName: string, columnName: string): boolean {
|
||||
const columns = database.prepare(`PRAGMA table_info(${tableName})`).all() as Array<{ name: string }>;
|
||||
return columns.some((column) => column.name === columnName);
|
||||
}
|
||||
|
||||
function runCoreMigrations(database: Database.Database): void {
|
||||
if (hasColumn(database, 'model_rewrites', 'force') === false) {
|
||||
database.exec('ALTER TABLE model_rewrites ADD COLUMN force BOOLEAN DEFAULT 0');
|
||||
}
|
||||
}
|
||||
|
||||
export function getDb(): Database.Database {
|
||||
if (!db) {
|
||||
const coreDbPath = getCoreDbPath();
|
||||
|
|
@ -16,6 +27,7 @@ export function getDb(): Database.Database {
|
|||
const schemaPath = path.join(__dirname, '..', '..', '..', 'database', 'schema.sql');
|
||||
const schema = fs.readFileSync(schemaPath, 'utf-8');
|
||||
db.exec(schema);
|
||||
runCoreMigrations(db);
|
||||
}
|
||||
return db;
|
||||
}
|
||||
|
|
@ -35,6 +47,7 @@ export function initDb(): Database.Database {
|
|||
const schemaPath = path.join(__dirname, '..', '..', '..', 'database', 'schema.sql');
|
||||
const schema = fs.readFileSync(schemaPath, 'utf-8');
|
||||
db.exec(schema);
|
||||
runCoreMigrations(db);
|
||||
|
||||
return db;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,10 +6,18 @@ import { getLocalMonthKey } from '../utils/time';
|
|||
|
||||
const connections = new Map<string, Database.Database>();
|
||||
|
||||
function hasColumn(database: Database.Database, tableName: string, columnName: string): boolean {
|
||||
const columns = database.prepare(`PRAGMA table_info(${tableName})`).all() as Array<{ name: string }>;
|
||||
return columns.some((column) => column.name === columnName);
|
||||
}
|
||||
|
||||
function initRequestLogsSchema(db: Database.Database): void {
|
||||
const schemaPath = path.join(__dirname, '..', '..', '..', 'database', 'request-logs-schema.sql');
|
||||
const schema = fs.readFileSync(schemaPath, 'utf-8');
|
||||
db.exec(schema);
|
||||
if (hasColumn(db, 'request_logs', 'routed_model') === false) {
|
||||
db.exec('ALTER TABLE request_logs ADD COLUMN routed_model TEXT');
|
||||
}
|
||||
}
|
||||
|
||||
export function getRequestLogsDb(monthKey: string = getLocalMonthKey()): Database.Database {
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import analyticsRoutes from './routes/analytics';
|
|||
import { requireAdminAccess, requireSessionCsrf } from './utils/adminAuth';
|
||||
import { logger } from './utils/logger';
|
||||
import { getUtcTimestamp } from './utils/time';
|
||||
import { ModelCatalogService } from './services/ModelCatalogService';
|
||||
|
||||
const envPathCandidates = [
|
||||
path.resolve(__dirname, '..', '..', '.env'),
|
||||
|
|
@ -26,6 +27,7 @@ dotenv.config({
|
|||
});
|
||||
|
||||
export function createServer(): Application {
|
||||
void ModelCatalogService.initialize();
|
||||
const app = express();
|
||||
const adminDistCandidates = [
|
||||
path.resolve(__dirname, '..', '..', '..', 'client', 'dist'),
|
||||
|
|
|
|||
42
server/src/models/BackendModelSnapshot.ts
Normal file
42
server/src/models/BackendModelSnapshot.ts
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
import { BackendModelSnapshot } from '../../../shared/types';
|
||||
import { getDb } from '../config/database';
|
||||
import { getUtcTimestamp } from '../utils/time';
|
||||
|
||||
function asSnapshot(row: any): BackendModelSnapshot {
|
||||
return row as BackendModelSnapshot;
|
||||
}
|
||||
|
||||
export class BackendModelSnapshotModel {
|
||||
static findByBackendId(backendId: number): BackendModelSnapshot[] {
|
||||
return getDb()
|
||||
.prepare('SELECT * FROM backend_models WHERE backend_id = ? ORDER BY model_id')
|
||||
.all(backendId)
|
||||
.map(asSnapshot);
|
||||
}
|
||||
|
||||
static replaceForBackend(backendId: number, models: Array<{ model_id: string; raw_json?: string }>, fetchedAt: string): void {
|
||||
const db = getDb();
|
||||
const timestamp = getUtcTimestamp();
|
||||
const transaction = db.transaction(() => {
|
||||
db.prepare('DELETE FROM backend_models WHERE backend_id = ?').run(backendId);
|
||||
|
||||
const stmt = db.prepare(`
|
||||
INSERT INTO backend_models (backend_id, model_id, raw_json, fetched_at, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
`);
|
||||
|
||||
for (const model of models) {
|
||||
stmt.run(
|
||||
backendId,
|
||||
model.model_id,
|
||||
model.raw_json || null,
|
||||
fetchedAt,
|
||||
timestamp,
|
||||
timestamp
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
transaction();
|
||||
}
|
||||
}
|
||||
86
server/src/models/ModelRewrite.ts
Normal file
86
server/src/models/ModelRewrite.ts
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
import {
|
||||
CreateModelRewriteData,
|
||||
ModelRewriteRule,
|
||||
UpdateModelRewriteData,
|
||||
} from '../../../shared/types';
|
||||
import { getDb } from '../config/database';
|
||||
import { getUtcTimestamp } from '../utils/time';
|
||||
|
||||
function asRule(row: any): ModelRewriteRule {
|
||||
row.is_active = !!row.is_active;
|
||||
row.force = !!row.force;
|
||||
return row as ModelRewriteRule;
|
||||
}
|
||||
|
||||
export class ModelRewriteModel {
|
||||
static findAll(): ModelRewriteRule[] {
|
||||
return getDb()
|
||||
.prepare('SELECT * FROM model_rewrites ORDER BY source_model')
|
||||
.all()
|
||||
.map(asRule);
|
||||
}
|
||||
|
||||
static findById(id: number): ModelRewriteRule | undefined {
|
||||
const row = getDb().prepare('SELECT * FROM model_rewrites WHERE id = ?').get(id);
|
||||
return row ? asRule(row) : undefined;
|
||||
}
|
||||
|
||||
static create(data: CreateModelRewriteData): ModelRewriteRule {
|
||||
const timestamp = getUtcTimestamp();
|
||||
const result = getDb()
|
||||
.prepare(`
|
||||
INSERT INTO model_rewrites (source_model, target_model, is_active, force, note, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
`)
|
||||
.run(
|
||||
data.source_model,
|
||||
data.target_model,
|
||||
data.is_active === false ? 0 : 1,
|
||||
data.force ? 1 : 0,
|
||||
data.note || null,
|
||||
timestamp,
|
||||
timestamp
|
||||
);
|
||||
|
||||
return this.findById(result.lastInsertRowid as number)!;
|
||||
}
|
||||
|
||||
static update(id: number, data: UpdateModelRewriteData): ModelRewriteRule | undefined {
|
||||
const updates: string[] = [];
|
||||
const values: unknown[] = [];
|
||||
|
||||
if (data.source_model !== undefined) {
|
||||
updates.push('source_model = ?');
|
||||
values.push(data.source_model);
|
||||
}
|
||||
if (data.target_model !== undefined) {
|
||||
updates.push('target_model = ?');
|
||||
values.push(data.target_model);
|
||||
}
|
||||
if (data.is_active !== undefined) {
|
||||
updates.push('is_active = ?');
|
||||
values.push(data.is_active ? 1 : 0);
|
||||
}
|
||||
if (data.force !== undefined) {
|
||||
updates.push('force = ?');
|
||||
values.push(data.force ? 1 : 0);
|
||||
}
|
||||
if (data.note !== undefined) {
|
||||
updates.push('note = ?');
|
||||
values.push(data.note || null);
|
||||
}
|
||||
if (updates.length === 0) {
|
||||
return this.findById(id);
|
||||
}
|
||||
|
||||
updates.push('updated_at = ?');
|
||||
values.push(getUtcTimestamp(), id);
|
||||
getDb().prepare(`UPDATE model_rewrites SET ${updates.join(', ')} WHERE id = ?`).run(...values);
|
||||
return this.findById(id);
|
||||
}
|
||||
|
||||
static delete(id: number): boolean {
|
||||
const result = getDb().prepare('DELETE FROM model_rewrites WHERE id = ?').run(id);
|
||||
return result.changes > 0;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,10 +1,20 @@
|
|||
import { Router, Request, Response } from 'express';
|
||||
import { UserModel } from '../models/User';
|
||||
import { BackendModel } from '../models/Backend';
|
||||
import { ModelRewriteModel } from '../models/ModelRewrite';
|
||||
import { PermissionModel } from '../models/Permission';
|
||||
import scriptRoutes from './scripts';
|
||||
import { CreateUserData, CreateBackendData, CreatePermissionData, UpdateUserData, UpdateBackendData } from '../../../shared/types';
|
||||
import {
|
||||
CreateBackendData,
|
||||
CreateModelRewriteData,
|
||||
CreatePermissionData,
|
||||
CreateUserData,
|
||||
UpdateBackendData,
|
||||
UpdateModelRewriteData,
|
||||
UpdateUserData,
|
||||
} from '../../../shared/types';
|
||||
import { getUtcTimestamp } from '../utils/time';
|
||||
import { ModelCatalogService } from '../services/ModelCatalogService';
|
||||
|
||||
const router: Router = Router();
|
||||
|
||||
|
|
@ -99,7 +109,7 @@ router.post('/users/:id/regenerate-api-key', (req: Request, res: Response) => {
|
|||
// ============ Backend Management ============
|
||||
|
||||
router.get('/backends', (req: Request, res: Response) => {
|
||||
const backends = BackendModel.findAll();
|
||||
const backends = ModelCatalogService.getBackendsWithSummary();
|
||||
res.json(backends);
|
||||
});
|
||||
|
||||
|
|
@ -117,7 +127,7 @@ router.post('/backends', (req: Request, res: Response) => {
|
|||
|
||||
router.get('/backends/:id', (req: Request, res: Response) => {
|
||||
const id = Number(req.params.id);
|
||||
const backend = BackendModel.findById(id);
|
||||
const backend = ModelCatalogService.getBackendsWithSummary().find((item) => item.id === id);
|
||||
|
||||
if (!backend) {
|
||||
res.status(404).json({ error: 'Backend not found' });
|
||||
|
|
@ -127,7 +137,7 @@ router.get('/backends/:id', (req: Request, res: Response) => {
|
|||
res.json(backend);
|
||||
});
|
||||
|
||||
router.put('/backends/:id', (req: Request, res: Response) => {
|
||||
router.put('/backends/:id', async (req: Request, res: Response) => {
|
||||
const id = Number(req.params.id);
|
||||
const backend = BackendModel.findById(id);
|
||||
|
||||
|
|
@ -138,11 +148,11 @@ router.put('/backends/:id', (req: Request, res: Response) => {
|
|||
|
||||
const { name, base_url, api_key, is_active, detail_logging } = req.body as UpdateBackendData;
|
||||
const updatedBackend = BackendModel.update(id, { name, base_url, api_key, is_active, detail_logging });
|
||||
|
||||
res.json(updatedBackend);
|
||||
await ModelCatalogService.handleBackendUpdated(id);
|
||||
res.json(ModelCatalogService.getBackendsWithSummary().find((item) => item.id === id) || updatedBackend);
|
||||
});
|
||||
|
||||
router.delete('/backends/:id', (req: Request, res: Response) => {
|
||||
router.delete('/backends/:id', async (req: Request, res: Response) => {
|
||||
const id = Number(req.params.id);
|
||||
const success = BackendModel.delete(id);
|
||||
|
||||
|
|
@ -151,9 +161,49 @@ router.delete('/backends/:id', (req: Request, res: Response) => {
|
|||
return;
|
||||
}
|
||||
|
||||
await ModelCatalogService.handleBackendUpdated(id);
|
||||
res.status(204).send();
|
||||
});
|
||||
|
||||
router.get('/backends/:id/models', (req: Request, res: Response) => {
|
||||
const id = Number(req.params.id);
|
||||
const payload = ModelCatalogService.getBackendModelsResponse(id);
|
||||
|
||||
if (!payload) {
|
||||
res.status(404).json({ error: 'Backend not found' });
|
||||
return;
|
||||
}
|
||||
|
||||
res.json(payload);
|
||||
});
|
||||
|
||||
router.post('/backends/:id/models/refresh', async (req: Request, res: Response) => {
|
||||
const id = Number(req.params.id);
|
||||
const backend = BackendModel.findById(id);
|
||||
|
||||
if (!backend) {
|
||||
res.status(404).json({ error: 'Backend not found' });
|
||||
return;
|
||||
}
|
||||
|
||||
if (!backend.is_active) {
|
||||
res.status(409).json({ error: 'Inactive backends cannot refresh model cache' });
|
||||
return;
|
||||
}
|
||||
|
||||
const cache = await ModelCatalogService.refreshBackendModels(id, { force: true, reason: 'admin-manual' });
|
||||
res.json({
|
||||
backend: ModelCatalogService.getBackendsWithSummary().find((item) => item.id === id) || backend,
|
||||
cache,
|
||||
snapshots: ModelCatalogService.getBackendModelsResponse(id)?.snapshots || [],
|
||||
models: ModelCatalogService.getBackendModelsResponse(id)?.models || [],
|
||||
});
|
||||
});
|
||||
|
||||
router.get('/models/cache', (req: Request, res: Response) => {
|
||||
res.json(ModelCatalogService.getCacheOverview());
|
||||
});
|
||||
|
||||
// ============ Permission Management ============
|
||||
|
||||
router.get('/permissions', (req: Request, res: Response) => {
|
||||
|
|
@ -211,6 +261,71 @@ router.delete('/permissions', (req: Request, res: Response) => {
|
|||
res.status(204).send();
|
||||
});
|
||||
|
||||
router.get('/model-rewrites', (req: Request, res: Response) => {
|
||||
res.json(ModelRewriteModel.findAll());
|
||||
});
|
||||
|
||||
router.post('/model-rewrites', (req: Request, res: Response) => {
|
||||
const { source_model, target_model, is_active, force, note } = req.body as CreateModelRewriteData;
|
||||
|
||||
if (!source_model?.trim() || !target_model?.trim()) {
|
||||
res.status(400).json({ error: 'source_model and target_model are required' });
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const rule = ModelRewriteModel.create({
|
||||
source_model: source_model.trim(),
|
||||
target_model: target_model.trim(),
|
||||
is_active,
|
||||
force,
|
||||
note,
|
||||
});
|
||||
ModelCatalogService.loadRewriteMap();
|
||||
res.status(201).json(rule);
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.message.includes('UNIQUE')) {
|
||||
res.status(409).json({ error: 'Rewrite rule already exists for this source_model' });
|
||||
return;
|
||||
}
|
||||
res.status(500).json({ error: 'Failed to create model rewrite rule' });
|
||||
}
|
||||
});
|
||||
|
||||
router.put('/model-rewrites/:id', (req: Request, res: Response) => {
|
||||
const id = Number(req.params.id);
|
||||
const existing = ModelRewriteModel.findById(id);
|
||||
if (!existing) {
|
||||
res.status(404).json({ error: 'Model rewrite rule not found' });
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const updated = ModelRewriteModel.update(id, req.body as UpdateModelRewriteData);
|
||||
ModelCatalogService.loadRewriteMap();
|
||||
res.json(updated);
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.message.includes('UNIQUE')) {
|
||||
res.status(409).json({ error: 'Rewrite rule already exists for this source_model' });
|
||||
return;
|
||||
}
|
||||
res.status(500).json({ error: 'Failed to update model rewrite rule' });
|
||||
}
|
||||
});
|
||||
|
||||
router.delete('/model-rewrites/:id', (req: Request, res: Response) => {
|
||||
const id = Number(req.params.id);
|
||||
const success = ModelRewriteModel.delete(id);
|
||||
|
||||
if (!success) {
|
||||
res.status(404).json({ error: 'Model rewrite rule not found' });
|
||||
return;
|
||||
}
|
||||
|
||||
ModelCatalogService.loadRewriteMap();
|
||||
res.status(204).send();
|
||||
});
|
||||
|
||||
// ============ Health Check ============
|
||||
|
||||
router.get('/health', (req: Request, res: Response) => {
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
import { Router, Request, Response } from 'express';
|
||||
import { authenticate, AuthenticatedRequest } from './auth';
|
||||
import { BackendModel } from '../models/Backend';
|
||||
import { RouterService } from '../services/RouterService';
|
||||
import { AnalyticsService } from '../services/AnalyticsService';
|
||||
import { ScriptEngine } from '../services/ScriptEngine';
|
||||
import { logger } from '../utils/logger';
|
||||
import { ModelCatalogService } from '../services/ModelCatalogService';
|
||||
|
||||
const router: Router = Router();
|
||||
|
||||
|
|
@ -30,15 +32,55 @@ router.post('/chat/completions', async (req: AuthenticatedRequest, res: Response
|
|||
return;
|
||||
}
|
||||
|
||||
const backend = RouterService.selectBackend(allowedBackendIds);
|
||||
if (!backend) {
|
||||
const requestedModel = typeof req.body?.model === 'string' ? req.body.model : '';
|
||||
await ModelCatalogService.ensureInitializedForBackends(allowedBackendIds);
|
||||
const resolution = ModelCatalogService.resolveRequestedModel(requestedModel, allowedBackendIds);
|
||||
const activeAllowedBackendIds = BackendModel.findActive()
|
||||
.map((item) => item.id)
|
||||
.filter((backendId) => allowedBackendIds.includes(backendId));
|
||||
if (activeAllowedBackendIds.length === 0) {
|
||||
AnalyticsService.logRequest({
|
||||
user_id: user.id,
|
||||
backend_id: 0,
|
||||
endpoint: '/v1/chat/completions',
|
||||
request_model: requestedModel,
|
||||
routed_model: resolution.routedModel,
|
||||
status_code: 403,
|
||||
error_message: 'No active backends available',
|
||||
detail_logged: user.detail_logging,
|
||||
request_headers: user.detail_logging ? normalizeHeaders(req.headers) : undefined,
|
||||
request_body: user.detail_logging ? req.body : undefined,
|
||||
});
|
||||
res.status(403).json({ error: 'No active backends available' });
|
||||
return;
|
||||
}
|
||||
const candidateBackendIds = ModelCatalogService.getCandidateBackendIds(resolution.routedModel, allowedBackendIds);
|
||||
const backend = RouterService.selectBackend(candidateBackendIds);
|
||||
if (!backend) {
|
||||
AnalyticsService.logRequest({
|
||||
user_id: user.id,
|
||||
backend_id: 0,
|
||||
endpoint: '/v1/chat/completions',
|
||||
request_model: resolution.requestedModel,
|
||||
routed_model: resolution.routedModel,
|
||||
status_code: 404,
|
||||
error_message: 'Requested model is not available for your account',
|
||||
detail_logged: user.detail_logging,
|
||||
request_headers: user.detail_logging ? normalizeHeaders(req.headers) : undefined,
|
||||
request_body: user.detail_logging ? req.body : undefined,
|
||||
});
|
||||
res.status(404).json({
|
||||
error: 'Requested model is not available for your account',
|
||||
request_model: resolution.requestedModel,
|
||||
routed_model: resolution.routedModel,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const { model, messages, ...rest } = req.body;
|
||||
const detailLoggingEnabled = user.detail_logging || backend.detail_logging;
|
||||
const rewrittenBody = { model: resolution.routedModel, messages, ...rest };
|
||||
|
||||
const execContext = {
|
||||
user: { id: user.id, name: user.name, email: user.email },
|
||||
|
|
@ -50,7 +92,7 @@ router.post('/chat/completions', async (req: AuthenticatedRequest, res: Response
|
|||
...normalizeHeaders(req.headers),
|
||||
'content-type': req.get('content-type') || 'application/json',
|
||||
},
|
||||
body: req.body,
|
||||
body: rewrittenBody,
|
||||
isStream: req.body.stream === true,
|
||||
},
|
||||
};
|
||||
|
|
@ -94,6 +136,7 @@ router.post('/chat/completions', async (req: AuthenticatedRequest, res: Response
|
|||
backend_id: backend.id,
|
||||
endpoint: '/v1/chat/completions',
|
||||
request_model: model,
|
||||
routed_model: resolution.routedModel,
|
||||
response_model: response.data && typeof response.data === 'object' && 'model' in response.data ? String(response.data.model) : undefined,
|
||||
prompt_tokens: response.data && typeof response.data === 'object' && 'usage' in response.data && typeof (response.data as { usage?: { prompt_tokens?: number } }).usage === 'object' ? (response.data as { usage: { prompt_tokens: number } }).usage?.prompt_tokens : undefined,
|
||||
completion_tokens: response.data && typeof response.data === 'object' && 'usage' in response.data && typeof (response.data as { usage?: { completion_tokens?: number } }).usage === 'object' ? (response.data as { usage: { completion_tokens: number } }).usage?.completion_tokens : undefined,
|
||||
|
|
@ -115,6 +158,7 @@ router.post('/chat/completions', async (req: AuthenticatedRequest, res: Response
|
|||
const causeInfo = errorDetails.cause ? ` (Cause: ${errorDetails.cause})` : '';
|
||||
const backendInfo = errorDetails.backend ? ` [Backend: ${errorDetails.backend}]` : '';
|
||||
logger.error(`Backend error for user ${user.id}: ${errorInfo}${causeInfo}${backendInfo}`);
|
||||
void ModelCatalogService.refreshBackendAfterFailure(backend.id);
|
||||
}
|
||||
|
||||
res.status(response.status).json(response.data);
|
||||
|
|
@ -128,6 +172,7 @@ router.post('/chat/completions', async (req: AuthenticatedRequest, res: Response
|
|||
backend_id: backend.id,
|
||||
endpoint: '/v1/chat/completions',
|
||||
request_model: req.body.model,
|
||||
routed_model: resolution.routedModel,
|
||||
status_code: 502,
|
||||
response_time_ms: responseTime,
|
||||
error_message: errorMsg,
|
||||
|
|
@ -140,6 +185,7 @@ router.post('/chat/completions', async (req: AuthenticatedRequest, res: Response
|
|||
});
|
||||
|
||||
logger.error(`Request failed for user ${user.id}: ${errorMsg}`);
|
||||
void ModelCatalogService.refreshBackendAfterFailure(backend.id);
|
||||
res.status(502).json({ error: 'Backend request failed', details: errorMsg });
|
||||
}
|
||||
});
|
||||
|
|
@ -152,26 +198,19 @@ router.get('/models', async (req: AuthenticatedRequest, res: Response) => {
|
|||
return;
|
||||
}
|
||||
|
||||
const backend = RouterService.selectBackend(allowedBackendIds);
|
||||
if (!backend) {
|
||||
await ModelCatalogService.ensureInitializedForBackends(allowedBackendIds);
|
||||
const activeAllowedBackendIds = BackendModel.findActive()
|
||||
.map((item) => item.id)
|
||||
.filter((backendId) => allowedBackendIds.includes(backendId));
|
||||
if (activeAllowedBackendIds.length === 0) {
|
||||
res.status(403).json({ error: 'No active backends available' });
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await RouterService.forwardRequest(
|
||||
backend,
|
||||
'/v1/models',
|
||||
'GET',
|
||||
{}
|
||||
);
|
||||
|
||||
res.status(response.status).json(response.data);
|
||||
} catch (error) {
|
||||
const errorMsg = error instanceof Error ? error.message : 'Unknown error';
|
||||
logger.error(`Models request failed for user ${req.user!.id}: ${errorMsg}`);
|
||||
res.status(502).json({ error: 'Failed to fetch models from backend', details: errorMsg });
|
||||
}
|
||||
const models = ModelCatalogService.getModelsForAllowedBackends(activeAllowedBackendIds).map((entry) => ({
|
||||
id: entry.model_id,
|
||||
object: 'model',
|
||||
}));
|
||||
res.json({ object: 'list', data: models });
|
||||
});
|
||||
|
||||
export default router;
|
||||
|
|
|
|||
|
|
@ -10,8 +10,10 @@ export class AnalyticsService {
|
|||
try {
|
||||
RequestLogService.logRequest(logData);
|
||||
|
||||
this.updateUsageStats(logData.user_id, logData.backend_id, logData.total_tokens || 0);
|
||||
this.updateBackendMetrics(logData.backend_id, logData);
|
||||
if (logData.backend_id > 0) {
|
||||
this.updateUsageStats(logData.user_id, logData.backend_id, logData.total_tokens || 0);
|
||||
this.updateBackendMetrics(logData.backend_id, logData);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Failed to log analytics:', error);
|
||||
}
|
||||
|
|
|
|||
435
server/src/services/ModelCatalogService.ts
Normal file
435
server/src/services/ModelCatalogService.ts
Normal file
|
|
@ -0,0 +1,435 @@
|
|||
import {
|
||||
Backend,
|
||||
BackendModelCacheStatus,
|
||||
BackendModelCatalogEntry,
|
||||
BackendModelsResponse,
|
||||
ModelCacheOverview,
|
||||
ModelRewriteRule,
|
||||
} from '../../../shared/types';
|
||||
import { BackendModel } from '../models/Backend';
|
||||
import { BackendModelSnapshotModel } from '../models/BackendModelSnapshot';
|
||||
import { ModelRewriteModel } from '../models/ModelRewrite';
|
||||
import { getUtcTimestamp } from '../utils/time';
|
||||
import { logger } from '../utils/logger';
|
||||
|
||||
interface BackendCacheEntry {
|
||||
backendId: number;
|
||||
initialized: boolean;
|
||||
modelIds: string[];
|
||||
lastSyncedAt?: string;
|
||||
lastAttemptedAt?: string;
|
||||
lastError?: string;
|
||||
}
|
||||
|
||||
interface RefreshOptions {
|
||||
force?: boolean;
|
||||
reason?: string;
|
||||
}
|
||||
|
||||
interface FetchModelsResponse {
|
||||
models: string[];
|
||||
rawModels: Array<{ model_id: string; raw_json?: string }>;
|
||||
}
|
||||
|
||||
interface RewriteResolution {
|
||||
requestedModel: string;
|
||||
routedModel: string;
|
||||
wasRewritten: boolean;
|
||||
ruleType: 'none' | 'force' | 'fallback';
|
||||
}
|
||||
|
||||
interface RewriteConfig {
|
||||
targetModel: string;
|
||||
force: boolean;
|
||||
}
|
||||
|
||||
const DEFAULT_REFRESH_MIN_MS = 5 * 60 * 1000;
|
||||
|
||||
export class ModelCatalogService {
|
||||
private static backendModelsByBackendId = new Map<number, BackendCacheEntry>();
|
||||
private static backendIdsByModel = new Map<string, Set<number>>();
|
||||
private static modelRewriteMap = new Map<string, RewriteConfig>();
|
||||
private static inFlightRefreshes = new Map<number, Promise<BackendModelCacheStatus>>();
|
||||
private static initialized = false;
|
||||
|
||||
private static getRefreshMinMs(): number {
|
||||
const raw = process.env.MODEL_CATALOG_REFRESH_MIN_MS;
|
||||
if (!raw) return DEFAULT_REFRESH_MIN_MS;
|
||||
const parsed = Number(raw);
|
||||
return Number.isFinite(parsed) && parsed >= 0 ? parsed : DEFAULT_REFRESH_MIN_MS;
|
||||
}
|
||||
|
||||
private static normalizeModelId(modelId: string): string {
|
||||
return modelId.trim();
|
||||
}
|
||||
|
||||
private static getCacheEntry(backendId: number): BackendCacheEntry {
|
||||
const existing = this.backendModelsByBackendId.get(backendId);
|
||||
if (existing) return existing;
|
||||
|
||||
const created: BackendCacheEntry = {
|
||||
backendId,
|
||||
initialized: false,
|
||||
modelIds: [],
|
||||
};
|
||||
this.backendModelsByBackendId.set(backendId, created);
|
||||
return created;
|
||||
}
|
||||
|
||||
private static statusFromEntry(entry: BackendCacheEntry, backend?: Backend): BackendModelCacheStatus {
|
||||
const active = backend ? backend.is_active : true;
|
||||
let state: BackendModelCacheStatus['state'];
|
||||
if (!active) {
|
||||
state = 'inactive';
|
||||
} else if (entry.lastError) {
|
||||
state = 'error';
|
||||
} else if (entry.initialized) {
|
||||
state = 'ready';
|
||||
} else {
|
||||
state = 'uninitialized';
|
||||
}
|
||||
|
||||
return {
|
||||
backend_id: entry.backendId,
|
||||
initialized: entry.initialized,
|
||||
state,
|
||||
model_count: entry.modelIds.length,
|
||||
last_synced_at: entry.lastSyncedAt,
|
||||
last_attempted_at: entry.lastAttemptedAt,
|
||||
last_error: entry.lastError,
|
||||
};
|
||||
}
|
||||
|
||||
private static rebuildModelIndex(): void {
|
||||
this.backendIdsByModel.clear();
|
||||
const backends = new Map(BackendModel.findAll().map((backend) => [backend.id, backend]));
|
||||
|
||||
for (const entry of this.backendModelsByBackendId.values()) {
|
||||
const backend = backends.get(entry.backendId);
|
||||
if (!backend?.is_active) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const modelId of entry.modelIds) {
|
||||
const normalized = this.normalizeModelId(modelId);
|
||||
const ids = this.backendIdsByModel.get(normalized) || new Set<number>();
|
||||
ids.add(entry.backendId);
|
||||
this.backendIdsByModel.set(normalized, ids);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static async fetchBackendModels(backend: Backend): Promise<FetchModelsResponse> {
|
||||
let backendPath = '/v1/models';
|
||||
if (backend.base_url.includes('/v1')) {
|
||||
backendPath = '/models';
|
||||
}
|
||||
const url = backend.base_url.replace(/\/$/, '') + backendPath;
|
||||
const headers: Record<string, string> = {};
|
||||
if (backend.api_key) {
|
||||
headers.Authorization = `Bearer ${backend.api_key}`;
|
||||
}
|
||||
|
||||
const response = await fetch(url, { method: 'GET', headers });
|
||||
if (!response.ok) {
|
||||
throw new Error(`Backend model fetch failed with HTTP ${response.status}`);
|
||||
}
|
||||
|
||||
const payload = await response.json().catch(() => ({} as any));
|
||||
const data = payload && typeof payload === 'object' && Array.isArray((payload as any).data)
|
||||
? (payload as any).data
|
||||
: [];
|
||||
|
||||
const seen = new Set<string>();
|
||||
const rawModels: Array<{ model_id: string; raw_json?: string }> = [];
|
||||
const models: string[] = [];
|
||||
|
||||
for (const item of data) {
|
||||
if (!item || typeof item !== 'object' || typeof item.id !== 'string') {
|
||||
continue;
|
||||
}
|
||||
const modelId = this.normalizeModelId(item.id);
|
||||
if (!modelId || seen.has(modelId)) {
|
||||
continue;
|
||||
}
|
||||
seen.add(modelId);
|
||||
models.push(modelId);
|
||||
rawModels.push({
|
||||
model_id: modelId,
|
||||
raw_json: JSON.stringify(item),
|
||||
});
|
||||
}
|
||||
|
||||
return { models, rawModels };
|
||||
}
|
||||
|
||||
static async initialize(): Promise<void> {
|
||||
this.loadRewriteMap();
|
||||
this.syncActiveBackendCacheState();
|
||||
|
||||
if (this.initialized) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.initialized = true;
|
||||
const activeBackends = BackendModel.findActive();
|
||||
await Promise.allSettled(activeBackends.map((backend) => this.refreshBackendModels(backend.id, { reason: 'startup' })));
|
||||
}
|
||||
|
||||
static reset(): void {
|
||||
this.backendModelsByBackendId.clear();
|
||||
this.backendIdsByModel.clear();
|
||||
this.modelRewriteMap.clear();
|
||||
this.inFlightRefreshes.clear();
|
||||
this.initialized = false;
|
||||
}
|
||||
|
||||
static loadRewriteMap(): void {
|
||||
this.modelRewriteMap.clear();
|
||||
for (const rule of ModelRewriteModel.findAll()) {
|
||||
if (rule.is_active) {
|
||||
this.modelRewriteMap.set(rule.source_model, {
|
||||
targetModel: rule.target_model,
|
||||
force: rule.force,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static syncActiveBackendCacheState(): void {
|
||||
const backends = BackendModel.findAll();
|
||||
const backendIds = new Set(backends.map((backend) => backend.id));
|
||||
|
||||
for (const backend of backends) {
|
||||
const entry = this.getCacheEntry(backend.id);
|
||||
if (!backend.is_active) {
|
||||
entry.modelIds = [];
|
||||
}
|
||||
}
|
||||
|
||||
for (const backendId of Array.from(this.backendModelsByBackendId.keys())) {
|
||||
if (!backendIds.has(backendId)) {
|
||||
this.backendModelsByBackendId.delete(backendId);
|
||||
}
|
||||
}
|
||||
|
||||
this.rebuildModelIndex();
|
||||
}
|
||||
|
||||
static resolveRequestedModel(modelId: string, allowedBackendIds: number[]): RewriteResolution {
|
||||
const requestedModel = this.normalizeModelId(modelId);
|
||||
const rewrite = this.modelRewriteMap.get(requestedModel);
|
||||
if (!rewrite) {
|
||||
return {
|
||||
requestedModel,
|
||||
routedModel: requestedModel,
|
||||
wasRewritten: false,
|
||||
ruleType: 'none',
|
||||
};
|
||||
}
|
||||
|
||||
if (rewrite.force) {
|
||||
return {
|
||||
requestedModel,
|
||||
routedModel: rewrite.targetModel,
|
||||
wasRewritten: rewrite.targetModel !== requestedModel,
|
||||
ruleType: 'force',
|
||||
};
|
||||
}
|
||||
|
||||
const originalCandidates = this.getCandidateBackendIds(requestedModel, allowedBackendIds);
|
||||
if (originalCandidates.length > 0) {
|
||||
return {
|
||||
requestedModel,
|
||||
routedModel: requestedModel,
|
||||
wasRewritten: false,
|
||||
ruleType: 'none',
|
||||
};
|
||||
}
|
||||
|
||||
const routedModel = rewrite.targetModel;
|
||||
return {
|
||||
requestedModel,
|
||||
routedModel,
|
||||
wasRewritten: routedModel !== requestedModel,
|
||||
ruleType: 'fallback',
|
||||
};
|
||||
}
|
||||
|
||||
static getBackendCacheStatus(backendId: number): BackendModelCacheStatus {
|
||||
const backend = BackendModel.findById(backendId);
|
||||
const entry = this.getCacheEntry(backendId);
|
||||
return this.statusFromEntry(entry, backend);
|
||||
}
|
||||
|
||||
static getBackendsWithSummary(): Backend[] {
|
||||
return BackendModel.findAll().map((backend) => {
|
||||
const status = this.getBackendCacheStatus(backend.id);
|
||||
return {
|
||||
...backend,
|
||||
cached_model_count: status.model_count,
|
||||
last_model_sync_at: status.last_synced_at,
|
||||
model_cache_initialized: status.initialized,
|
||||
model_cache_state: status.state,
|
||||
} as Backend & {
|
||||
cached_model_count: number;
|
||||
last_model_sync_at?: string;
|
||||
model_cache_initialized: boolean;
|
||||
model_cache_state: BackendModelCacheStatus['state'];
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
static async ensureInitializedForBackends(backendIds: number[]): Promise<void> {
|
||||
const refreshes: Promise<BackendModelCacheStatus>[] = [];
|
||||
for (const backendId of backendIds) {
|
||||
const backend = BackendModel.findById(backendId);
|
||||
if (!backend?.is_active) continue;
|
||||
const entry = this.getCacheEntry(backendId);
|
||||
if (!entry.initialized) {
|
||||
refreshes.push(this.refreshBackendModels(backendId, { reason: 'lazy-init' }));
|
||||
}
|
||||
}
|
||||
await Promise.allSettled(refreshes);
|
||||
}
|
||||
|
||||
static async refreshBackendModels(backendId: number, options: RefreshOptions = {}): Promise<BackendModelCacheStatus> {
|
||||
const backend = BackendModel.findById(backendId);
|
||||
const entry = this.getCacheEntry(backendId);
|
||||
|
||||
if (!backend) {
|
||||
this.backendModelsByBackendId.delete(backendId);
|
||||
this.rebuildModelIndex();
|
||||
return this.statusFromEntry(entry);
|
||||
}
|
||||
|
||||
if (!backend.is_active) {
|
||||
entry.initialized = false;
|
||||
entry.modelIds = [];
|
||||
entry.lastError = undefined;
|
||||
this.rebuildModelIndex();
|
||||
return this.statusFromEntry(entry, backend);
|
||||
}
|
||||
|
||||
const now = Date.now();
|
||||
const lastAttempt = entry.lastAttemptedAt ? Date.parse(entry.lastAttemptedAt) : 0;
|
||||
if (!options.force && lastAttempt && now - lastAttempt < this.getRefreshMinMs()) {
|
||||
return this.statusFromEntry(entry, backend);
|
||||
}
|
||||
|
||||
const existing = this.inFlightRefreshes.get(backendId);
|
||||
if (existing) {
|
||||
return existing;
|
||||
}
|
||||
|
||||
const refreshPromise = (async () => {
|
||||
entry.lastAttemptedAt = getUtcTimestamp();
|
||||
try {
|
||||
const fetchedAt = getUtcTimestamp();
|
||||
const { models, rawModels } = await this.fetchBackendModels(backend);
|
||||
entry.modelIds = models;
|
||||
entry.initialized = true;
|
||||
entry.lastSyncedAt = fetchedAt;
|
||||
entry.lastError = undefined;
|
||||
BackendModelSnapshotModel.replaceForBackend(backendId, rawModels, fetchedAt);
|
||||
this.rebuildModelIndex();
|
||||
logger.info(`Model catalog refreshed for backend ${backendId}${options.reason ? ` (${options.reason})` : ''}`);
|
||||
} catch (error) {
|
||||
entry.initialized = true;
|
||||
entry.modelIds = [];
|
||||
entry.lastError = error instanceof Error ? error.message : 'Unknown model refresh error';
|
||||
this.rebuildModelIndex();
|
||||
logger.warn(`Model catalog refresh failed for backend ${backendId}: ${entry.lastError}`);
|
||||
} finally {
|
||||
this.inFlightRefreshes.delete(backendId);
|
||||
}
|
||||
|
||||
return this.statusFromEntry(entry, backend);
|
||||
})();
|
||||
|
||||
this.inFlightRefreshes.set(backendId, refreshPromise);
|
||||
return refreshPromise;
|
||||
}
|
||||
|
||||
static async refreshBackendAfterFailure(backendId: number): Promise<void> {
|
||||
const backend = BackendModel.findById(backendId);
|
||||
if (!backend?.is_active) return;
|
||||
await this.refreshBackendModels(backendId, { reason: 'request-failure' });
|
||||
}
|
||||
|
||||
static async handleBackendUpdated(backendId: number): Promise<void> {
|
||||
const backend = BackendModel.findById(backendId);
|
||||
if (!backend) {
|
||||
this.backendModelsByBackendId.delete(backendId);
|
||||
this.rebuildModelIndex();
|
||||
return;
|
||||
}
|
||||
|
||||
if (!backend.is_active) {
|
||||
const entry = this.getCacheEntry(backendId);
|
||||
entry.initialized = false;
|
||||
entry.modelIds = [];
|
||||
entry.lastError = undefined;
|
||||
this.rebuildModelIndex();
|
||||
return;
|
||||
}
|
||||
|
||||
await this.refreshBackendModels(backendId, { force: true, reason: 'admin-update' });
|
||||
}
|
||||
|
||||
static getCandidateBackendIds(modelId: string, allowedBackendIds: number[]): number[] {
|
||||
const normalized = this.normalizeModelId(modelId);
|
||||
const backendIds = this.backendIdsByModel.get(normalized);
|
||||
if (!backendIds) return [];
|
||||
|
||||
const allowed = new Set(allowedBackendIds);
|
||||
const active = new Set(BackendModel.findActive().map((backend) => backend.id));
|
||||
return Array.from(backendIds).filter((backendId) => allowed.has(backendId) && active.has(backendId));
|
||||
}
|
||||
|
||||
static getModelsForAllowedBackends(allowedBackendIds: number[]): BackendModelCatalogEntry[] {
|
||||
const allowed = new Set(allowedBackendIds);
|
||||
const entries: BackendModelCatalogEntry[] = [];
|
||||
for (const [modelId, backendIds] of this.backendIdsByModel.entries()) {
|
||||
const matched = Array.from(backendIds).filter((backendId) => allowed.has(backendId));
|
||||
if (matched.length > 0) {
|
||||
entries.push({ model_id: modelId, backend_ids: matched.sort((a, b) => a - b) });
|
||||
}
|
||||
}
|
||||
return entries.sort((a, b) => a.model_id.localeCompare(b.model_id));
|
||||
}
|
||||
|
||||
static getBackendModelsResponse(backendId: number): BackendModelsResponse | null {
|
||||
const backend = BackendModel.findById(backendId);
|
||||
if (!backend) return null;
|
||||
|
||||
return {
|
||||
backend: {
|
||||
...backend,
|
||||
...(this.getBackendsWithSummary().find((item) => item.id === backendId) || {}),
|
||||
},
|
||||
cache: this.getBackendCacheStatus(backendId),
|
||||
snapshots: BackendModelSnapshotModel.findByBackendId(backendId),
|
||||
models: [...this.getCacheEntry(backendId).modelIds],
|
||||
};
|
||||
}
|
||||
|
||||
static getCacheOverview(): ModelCacheOverview {
|
||||
const backends = BackendModel.findAll()
|
||||
.map((backend) => this.statusFromEntry(this.getCacheEntry(backend.id), backend))
|
||||
.sort((a, b) => a.backend_id - b.backend_id);
|
||||
|
||||
const models = Array.from(this.backendIdsByModel.entries())
|
||||
.map(([modelId, backendIds]) => ({
|
||||
model_id: modelId,
|
||||
backend_ids: Array.from(backendIds).sort((a, b) => a - b),
|
||||
}))
|
||||
.sort((a, b) => a.model_id.localeCompare(b.model_id));
|
||||
|
||||
return { backends, models };
|
||||
}
|
||||
|
||||
static getRewriteRules(): ModelRewriteRule[] {
|
||||
return ModelRewriteModel.findAll();
|
||||
}
|
||||
}
|
||||
|
|
@ -7,6 +7,7 @@ export interface RequestLogInsert {
|
|||
backend_id: number;
|
||||
endpoint: string;
|
||||
request_model?: string;
|
||||
routed_model?: string;
|
||||
response_model?: string;
|
||||
prompt_tokens?: number;
|
||||
completion_tokens?: number;
|
||||
|
|
@ -74,6 +75,7 @@ function buildWhereClause(query: RequestLogQuery): { whereClause: string; params
|
|||
clauses.push(`(
|
||||
endpoint LIKE ?
|
||||
OR COALESCE(request_model, '') LIKE ?
|
||||
OR COALESCE(routed_model, '') LIKE ?
|
||||
OR COALESCE(response_model, '') LIKE ?
|
||||
OR COALESCE(error_message, '') LIKE ?
|
||||
OR COALESCE(request_headers, '') LIKE ?
|
||||
|
|
@ -81,7 +83,7 @@ function buildWhereClause(query: RequestLogQuery): { whereClause: string; params
|
|||
OR COALESCE(response_headers, '') LIKE ?
|
||||
OR COALESCE(response_body, '') LIKE ?
|
||||
)`);
|
||||
params.push(like, like, like, like, like, like, like, like);
|
||||
params.push(like, like, like, like, like, like, like, like, like);
|
||||
}
|
||||
|
||||
return {
|
||||
|
|
@ -148,16 +150,17 @@ export class RequestLogService {
|
|||
|
||||
db.prepare(`
|
||||
INSERT INTO request_logs (
|
||||
user_id, backend_id, endpoint, request_model, response_model,
|
||||
user_id, backend_id, endpoint, request_model, routed_model, response_model,
|
||||
prompt_tokens, completion_tokens, total_tokens,
|
||||
status_code, response_time_ms, error_message, detail_logged,
|
||||
local_date, request_headers, request_body, response_headers, response_body, created_at
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`).run(
|
||||
logData.user_id,
|
||||
logData.backend_id,
|
||||
logData.endpoint,
|
||||
logData.request_model || null,
|
||||
logData.routed_model || null,
|
||||
logData.response_model || null,
|
||||
logData.prompt_tokens || null,
|
||||
logData.completion_tokens || null,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import { describe, it, expect, beforeAll } from 'vitest';
|
||||
import { createTestApp } from '../utils/testApp';
|
||||
import { createAdminClient } from '../utils/adminClient';
|
||||
import { createMockBackend } from '../utils/mockBackend';
|
||||
|
||||
let app: ReturnType<typeof createTestApp>;
|
||||
let admin: Awaited<ReturnType<typeof createAdminClient>>;
|
||||
|
|
@ -210,6 +211,7 @@ describe('Admin API - Backend Management', () => {
|
|||
expect(response.status).toBe(200);
|
||||
expect(response.body.name).toBe('Updated Backend');
|
||||
expect(response.body.is_active).toBe(false);
|
||||
expect(response.body.model_cache_state).toBe('inactive');
|
||||
});
|
||||
|
||||
it('should return 404 for non-existent backend', async () => {
|
||||
|
|
@ -242,6 +244,77 @@ describe('Admin API - Backend Management', () => {
|
|||
expect(response.status).toBe(404);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Backend model cache endpoints', () => {
|
||||
it('should expose backend cache details and allow manual refresh for active backends', async () => {
|
||||
const { server, port } = createMockBackend({
|
||||
modelsResponse: [{ id: 'admin-refresh-model', object: 'model' }],
|
||||
});
|
||||
|
||||
const backendResponse = await admin.post('/admin/backends').send({
|
||||
name: 'Backend Cache Admin Test',
|
||||
base_url: `http://localhost:${port}`,
|
||||
});
|
||||
const backendId = backendResponse.body.id;
|
||||
|
||||
const beforeRefresh = await admin.get(`/admin/backends/${backendId}/models`);
|
||||
expect(beforeRefresh.status).toBe(200);
|
||||
expect(beforeRefresh.body.cache.state).toBe('uninitialized');
|
||||
|
||||
const refreshResponse = await admin.post(`/admin/backends/${backendId}/models/refresh`);
|
||||
expect(refreshResponse.status).toBe(200);
|
||||
expect(refreshResponse.body.models).toContain('admin-refresh-model');
|
||||
|
||||
const cacheOverview = await admin.get('/admin/models/cache');
|
||||
expect(cacheOverview.status).toBe(200);
|
||||
expect(Array.isArray(cacheOverview.body.models)).toBe(true);
|
||||
|
||||
await new Promise<void>((resolve) => server.close(() => resolve()));
|
||||
});
|
||||
|
||||
it('should reject manual refresh for inactive backends', async () => {
|
||||
const backendResponse = await admin.post('/admin/backends').send({
|
||||
name: 'Inactive Refresh Reject',
|
||||
base_url: 'http://localhost:8041',
|
||||
});
|
||||
|
||||
await admin.put(`/admin/backends/${backendResponse.body.id}`).send({ is_active: false });
|
||||
const refreshResponse = await admin.post(`/admin/backends/${backendResponse.body.id}/models/refresh`);
|
||||
expect(refreshResponse.status).toBe(409);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('Admin API - Model Rewrite Management', () => {
|
||||
it('should create, update, list, and delete model rewrite rules', async () => {
|
||||
const createResponse = await admin.post('/admin/model-rewrites').send({
|
||||
source_model: 'gpt-3.5-turbo-admin-test',
|
||||
target_model: 'gpt-3.5-admin-test',
|
||||
force: true,
|
||||
note: 'fallback alias',
|
||||
});
|
||||
|
||||
expect(createResponse.status).toBe(201);
|
||||
expect(createResponse.body.source_model).toBe('gpt-3.5-turbo-admin-test');
|
||||
expect(createResponse.body.force).toBe(true);
|
||||
|
||||
const listResponse = await admin.get('/admin/model-rewrites');
|
||||
expect(listResponse.status).toBe(200);
|
||||
expect(listResponse.body.some((rule: any) => rule.source_model === 'gpt-3.5-turbo-admin-test')).toBe(true);
|
||||
|
||||
const updateResponse = await admin.put(`/admin/model-rewrites/${createResponse.body.id}`).send({
|
||||
target_model: 'gpt-3.5-mini-admin-test',
|
||||
is_active: false,
|
||||
force: false,
|
||||
});
|
||||
expect(updateResponse.status).toBe(200);
|
||||
expect(updateResponse.body.target_model).toBe('gpt-3.5-mini-admin-test');
|
||||
expect(updateResponse.body.is_active).toBe(false);
|
||||
expect(updateResponse.body.force).toBe(false);
|
||||
|
||||
const deleteResponse = await admin.delete(`/admin/model-rewrites/${createResponse.body.id}`);
|
||||
expect(deleteResponse.status).toBe(204);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Admin API - Permission Management', () => {
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ describe('Auth & Proxy API', () => {
|
|||
});
|
||||
|
||||
describe('POST /v1/chat/completions with valid auth', () => {
|
||||
it('should return 502 when backend is unreachable (but auth passes)', async () => {
|
||||
it('should return 404 when model catalog cannot confirm the requested model', async () => {
|
||||
const response = await request(app)
|
||||
.post('/v1/chat/completions')
|
||||
.set('Authorization', `Bearer ${userApiKey}`)
|
||||
|
|
@ -79,9 +79,9 @@ describe('Auth & Proxy API', () => {
|
|||
messages: [{ role: 'user', content: 'Hello' }]
|
||||
});
|
||||
|
||||
// Should authenticate successfully but fail to connect to backend
|
||||
expect(response.status).toBe(502);
|
||||
expect(response.status).toBe(404);
|
||||
expect(response.body).toHaveProperty('error');
|
||||
expect(response.body).toHaveProperty('request_model', 'test-model');
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -120,7 +120,7 @@ describe('Auth & Proxy API', () => {
|
|||
|
||||
// Find our logged request
|
||||
const loggedRequest = analyticsResponse.body.rows.find((r: any) =>
|
||||
r.status_code === 502 && r.endpoint === '/v1/chat/completions'
|
||||
r.status_code === 404 && r.endpoint === '/v1/chat/completions'
|
||||
);
|
||||
|
||||
expect(loggedRequest).toBeDefined();
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ describe('Permission-based Routing', () => {
|
|||
});
|
||||
|
||||
describe('Scenario 1: Authorized backend routing', () => {
|
||||
it('should route to authorized backend (auth passes, backend may fail)', async () => {
|
||||
it('should return model-not-available when catalog refresh fails for an authorized backend', async () => {
|
||||
const userResponse = await admin.post('/admin/users').send({ name: 'Auth User 1-1' });
|
||||
const userApiKey = userResponse.body.api_key;
|
||||
const userId = userResponse.body.id;
|
||||
|
|
@ -39,7 +39,7 @@ describe('Permission-based Routing', () => {
|
|||
.set('Authorization', `Bearer ${userApiKey}`)
|
||||
.send({ model: 'test', messages: [{ role: 'user', content: 'Hello' }] });
|
||||
|
||||
expect(response.status).toBe(502);
|
||||
expect(response.status).toBe(404);
|
||||
expect(response.body).toHaveProperty('error');
|
||||
});
|
||||
});
|
||||
|
|
@ -83,13 +83,13 @@ describe('Permission-based Routing', () => {
|
|||
expect(response.body.error).toBe('No backends available for your account');
|
||||
});
|
||||
|
||||
it('should successfully route user with permissions', async () => {
|
||||
it('should return model-not-available when the permitted backend has no cached model match', async () => {
|
||||
const response = await request(app)
|
||||
.post('/v1/chat/completions')
|
||||
.set('Authorization', `Bearer ${userBApiKey}`)
|
||||
.send({ model: 'test', messages: [{ role: 'user', content: 'Hello' }] });
|
||||
|
||||
expect(response.status).toBe(502);
|
||||
expect(response.status).toBe(404);
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -122,21 +122,40 @@ describe('Multi-backend Routing', () => {
|
|||
admin = await createAdminClient(app);
|
||||
});
|
||||
|
||||
describe('Scenario 4: Random selection from multiple backends', () => {
|
||||
it('should route to different backends across multiple requests', async () => {
|
||||
describe('Scenario 4: Model-aware candidate selection', () => {
|
||||
it('should use only backends that serve the requested model', async () => {
|
||||
const userResponse = await admin.post('/admin/users').send({ name: 'Multi Backend User 4-4' });
|
||||
const userApiKey = userResponse.body.api_key;
|
||||
const userId = userResponse.body.id;
|
||||
|
||||
const backendServerA = createMockBackend({
|
||||
chatResponse: {
|
||||
id: 'candidate-a',
|
||||
model: 'model-a',
|
||||
choices: [{ index: 0, message: { role: 'assistant', content: 'A' }, finish_reason: 'stop' }],
|
||||
usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
|
||||
},
|
||||
modelsResponse: [{ id: 'model-a', object: 'model' }],
|
||||
});
|
||||
const backendServerB = createMockBackend({
|
||||
chatResponse: {
|
||||
id: 'candidate-b',
|
||||
model: 'model-b',
|
||||
choices: [{ index: 0, message: { role: 'assistant', content: 'B' }, finish_reason: 'stop' }],
|
||||
usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
|
||||
},
|
||||
modelsResponse: [{ id: 'model-b', object: 'model' }],
|
||||
});
|
||||
|
||||
const backend1Response = await admin.post('/admin/backends').send({
|
||||
name: 'Multi Backend 4-4-1',
|
||||
base_url: 'http://localhost:8010/v1'
|
||||
base_url: `http://localhost:${backendServerA.port}`
|
||||
});
|
||||
const backend1Id = backend1Response.body.id;
|
||||
|
||||
const backend2Response = await admin.post('/admin/backends').send({
|
||||
name: 'Multi Backend 4-4-2',
|
||||
base_url: 'http://localhost:8011/v1'
|
||||
base_url: `http://localhost:${backendServerB.port}`
|
||||
});
|
||||
const backend2Id = backend2Response.body.id;
|
||||
|
||||
|
|
@ -148,15 +167,22 @@ describe('Multi-backend Routing', () => {
|
|||
.post('/admin/permissions')
|
||||
.send({ user_id: userId, backend_id: backend2Id });
|
||||
|
||||
const responses = await Promise.all([
|
||||
request(app).post('/v1/chat/completions').set('Authorization', `Bearer ${userApiKey}`).send({ model: 'test', messages: [] }),
|
||||
request(app).post('/v1/chat/completions').set('Authorization', `Bearer ${userApiKey}`).send({ model: 'test', messages: [] }),
|
||||
request(app).post('/v1/chat/completions').set('Authorization', `Bearer ${userApiKey}`).send({ model: 'test', messages: [] })
|
||||
]);
|
||||
const responseA = await request(app)
|
||||
.post('/v1/chat/completions')
|
||||
.set('Authorization', `Bearer ${userApiKey}`)
|
||||
.send({ model: 'model-a', messages: [] });
|
||||
const responseB = await request(app)
|
||||
.post('/v1/chat/completions')
|
||||
.set('Authorization', `Bearer ${userApiKey}`)
|
||||
.send({ model: 'model-b', messages: [] });
|
||||
|
||||
responses.forEach(response => {
|
||||
expect(response.status).toBe(502);
|
||||
});
|
||||
expect(responseA.status).toBe(200);
|
||||
expect(responseA.body.id).toBe('candidate-a');
|
||||
expect(responseB.status).toBe(200);
|
||||
expect(responseB.body.id).toBe('candidate-b');
|
||||
|
||||
await new Promise<void>((resolve) => backendServerA.server.close(() => resolve()));
|
||||
await new Promise<void>((resolve) => backendServerB.server.close(() => resolve()));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -351,7 +377,7 @@ describe('OpenAI Compatible Backend Integration', () => {
|
|||
});
|
||||
|
||||
describe('Scenario 7: Models endpoint routing', () => {
|
||||
it('should proxy models request to mock backend', async () => {
|
||||
it('should return the union of cached models from allowed active backends', async () => {
|
||||
// First, deactivate all existing backends to ensure only our mock backend is selected
|
||||
const allBackendsResponse = await admin.get('/admin/backends');
|
||||
const allBackends = allBackendsResponse.body;
|
||||
|
|
@ -396,6 +422,7 @@ describe('OpenAI Compatible Backend Integration', () => {
|
|||
expect(response.body).toHaveProperty('data');
|
||||
expect(Array.isArray(response.body.data)).toBe(true);
|
||||
expect(response.body.data.length).toBe(2);
|
||||
expect(response.body.data.map((item: any) => item.id)).toEqual(['test-model-1', 'test-model-2']);
|
||||
});
|
||||
|
||||
it('should return 403 for models when user has no permissions', async () => {
|
||||
|
|
@ -446,4 +473,100 @@ describe('OpenAI Compatible Backend Integration', () => {
|
|||
expect(receivedAuthorization).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('Scenario 8: Rewrite-based routing', () => {
|
||||
it('should rewrite the requested model before backend selection and upstream forwarding', async () => {
|
||||
let receivedModel: string | undefined;
|
||||
const { server, port } = createMockBackend({
|
||||
onRequest: (req) => {
|
||||
if (req.path === '/v1/chat/completions') {
|
||||
receivedModel = req.body.model;
|
||||
}
|
||||
},
|
||||
chatResponse: {
|
||||
id: 'rewrite-success',
|
||||
model: 'gpt-3.5',
|
||||
choices: [{ index: 0, message: { role: 'assistant', content: 'rewritten' }, finish_reason: 'stop' }],
|
||||
usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
|
||||
},
|
||||
modelsResponse: [{ id: 'gpt-3.5', object: 'model' }],
|
||||
});
|
||||
mockServer = server;
|
||||
mockPort = port;
|
||||
|
||||
const userResponse = await admin.post('/admin/users').send({ name: 'Rewrite Route User 8-8' });
|
||||
const userApiKey = userResponse.body.api_key;
|
||||
const userId = userResponse.body.id;
|
||||
|
||||
const backendResponse = await admin.post('/admin/backends').send({
|
||||
name: 'Rewrite Backend 8-8',
|
||||
base_url: `http://localhost:${port}`,
|
||||
});
|
||||
const backendId = backendResponse.body.id;
|
||||
|
||||
await admin.post('/admin/permissions').send({ user_id: userId, backend_id: backendId });
|
||||
const rewriteResponse = await admin.post('/admin/model-rewrites').send({
|
||||
source_model: 'gpt-3.5-turbo',
|
||||
target_model: 'gpt-3.5',
|
||||
force: true,
|
||||
});
|
||||
expect(rewriteResponse.status).toBe(201);
|
||||
|
||||
const response = await request(app)
|
||||
.post('/v1/chat/completions')
|
||||
.set('Authorization', `Bearer ${userApiKey}`)
|
||||
.send({ model: 'gpt-3.5-turbo', messages: [{ role: 'user', content: 'Hello' }] });
|
||||
|
||||
expect(response.status).toBe(200);
|
||||
expect(receivedModel).toBe('gpt-3.5');
|
||||
expect(response.body.model).toBe('gpt-3.5');
|
||||
});
|
||||
|
||||
it('should use fallback rewrite only when the original model is unavailable', async () => {
|
||||
let receivedModel: string | undefined;
|
||||
const { server, port } = createMockBackend({
|
||||
onRequest: (req) => {
|
||||
if (req.path === '/v1/chat/completions') {
|
||||
receivedModel = req.body.model;
|
||||
}
|
||||
},
|
||||
chatResponse: {
|
||||
id: 'fallback-success',
|
||||
model: 'fallback-model',
|
||||
choices: [{ index: 0, message: { role: 'assistant', content: 'fallback' }, finish_reason: 'stop' }],
|
||||
usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
|
||||
},
|
||||
modelsResponse: [{ id: 'fallback-model', object: 'model' }],
|
||||
});
|
||||
mockServer = server;
|
||||
mockPort = port;
|
||||
|
||||
const userResponse = await admin.post('/admin/users').send({ name: 'Fallback Route User 8-9' });
|
||||
const userApiKey = userResponse.body.api_key;
|
||||
const userId = userResponse.body.id;
|
||||
|
||||
const backendResponse = await admin.post('/admin/backends').send({
|
||||
name: 'Fallback Backend 8-9',
|
||||
base_url: `http://localhost:${port}`,
|
||||
});
|
||||
const backendId = backendResponse.body.id;
|
||||
|
||||
await admin.post('/admin/permissions').send({ user_id: userId, backend_id: backendId });
|
||||
const rewriteResponse = await admin.post('/admin/model-rewrites').send({
|
||||
source_model: 'missing-model',
|
||||
target_model: 'fallback-model',
|
||||
force: false,
|
||||
});
|
||||
expect(rewriteResponse.status).toBe(201);
|
||||
|
||||
const response = await request(app)
|
||||
.post('/v1/chat/completions')
|
||||
.set('Authorization', `Bearer ${userApiKey}`)
|
||||
.send({ model: 'missing-model', messages: [{ role: 'user', content: 'Hello' }] });
|
||||
|
||||
expect(response.status).toBe(200);
|
||||
expect(receivedModel).toBe('fallback-model');
|
||||
expect(response.body.model).toBe('fallback-model');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -9,12 +9,15 @@ import analyticsRoutes from '../../src/routes/analytics';
|
|||
import { initRequestLogsDb } from '../../src/config/request-logs-db';
|
||||
import { getUtcTimestamp } from '../../src/utils/time';
|
||||
import { requireAdminAccess, requireSessionCsrf } from '../../src/utils/adminAuth';
|
||||
import { ModelCatalogService } from '../../src/services/ModelCatalogService';
|
||||
|
||||
export function createTestApp() {
|
||||
// Initialize both databases
|
||||
initDb();
|
||||
initAnalyticsDb();
|
||||
initRequestLogsDb();
|
||||
ModelCatalogService.reset();
|
||||
void ModelCatalogService.initialize();
|
||||
|
||||
const app = express();
|
||||
|
||||
|
|
|
|||
|
|
@ -18,6 +18,76 @@ export interface Backend {
|
|||
detail_logging: boolean;
|
||||
created_at: string;
|
||||
updated_at: string;
|
||||
cached_model_count?: number;
|
||||
last_model_sync_at?: string;
|
||||
model_cache_initialized?: boolean;
|
||||
model_cache_state?: ModelCacheState;
|
||||
}
|
||||
|
||||
export interface BackendModelSnapshot {
|
||||
id: number;
|
||||
backend_id: number;
|
||||
model_id: string;
|
||||
raw_json?: string;
|
||||
fetched_at: string;
|
||||
created_at: string;
|
||||
updated_at: string;
|
||||
}
|
||||
|
||||
export type ModelCacheState = 'ready' | 'uninitialized' | 'error' | 'inactive';
|
||||
|
||||
export interface BackendModelCacheStatus {
|
||||
backend_id: number;
|
||||
initialized: boolean;
|
||||
state: ModelCacheState;
|
||||
model_count: number;
|
||||
last_synced_at?: string;
|
||||
last_attempted_at?: string;
|
||||
last_error?: string;
|
||||
}
|
||||
|
||||
export interface BackendModelCatalogEntry {
|
||||
model_id: string;
|
||||
backend_ids: number[];
|
||||
}
|
||||
|
||||
export interface BackendModelsResponse {
|
||||
backend: Backend;
|
||||
cache: BackendModelCacheStatus;
|
||||
snapshots: BackendModelSnapshot[];
|
||||
models: string[];
|
||||
}
|
||||
|
||||
export interface ModelCacheOverview {
|
||||
backends: BackendModelCacheStatus[];
|
||||
models: BackendModelCatalogEntry[];
|
||||
}
|
||||
|
||||
export interface ModelRewriteRule {
|
||||
id: number;
|
||||
source_model: string;
|
||||
target_model: string;
|
||||
is_active: boolean;
|
||||
force: boolean;
|
||||
note?: string;
|
||||
created_at: string;
|
||||
updated_at: string;
|
||||
}
|
||||
|
||||
export interface CreateModelRewriteData {
|
||||
source_model: string;
|
||||
target_model: string;
|
||||
is_active?: boolean;
|
||||
force?: boolean;
|
||||
note?: string;
|
||||
}
|
||||
|
||||
export interface UpdateModelRewriteData {
|
||||
source_model?: string;
|
||||
target_model?: string;
|
||||
is_active?: boolean;
|
||||
force?: boolean;
|
||||
note?: string;
|
||||
}
|
||||
|
||||
export interface Permission {
|
||||
|
|
@ -66,6 +136,7 @@ export interface RequestLog {
|
|||
backend_id: number;
|
||||
endpoint: string;
|
||||
request_model?: string;
|
||||
routed_model?: string;
|
||||
response_model?: string;
|
||||
prompt_tokens?: number;
|
||||
completion_tokens?: number;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue