feat(routing): add MODEL_LIST_INCLUDE_ROUTING_METADATA for enhanced model metadata in responses

This commit is contained in:
Kyush 2026-04-23 18:12:51 +09:00
commit 3fcc017c0c
9 changed files with 138 additions and 7 deletions

View file

@ -17,6 +17,9 @@ ADMIN_API_TOKEN_TTL_DAYS=30
CORS_ORIGINS=http://localhost:3000,http://127.0.0.1:3000
ADMIN_TRUSTED_PROXY_IPS=
# Model routing
MODEL_LIST_INCLUDE_ROUTING_METADATA=false
# OpenID Connect
OIDC_ISSUER_URL=
OIDC_CLIENT_ID=

View file

@ -78,6 +78,7 @@ pnpm run bench # 벤치마크 실행
| `OIDC_ALLOWED_EMAILS` | empty | 관리자 접근을 허용할 이메일 목록 |
| `OIDC_SCOPES` | `openid profile email` | OIDC authorization scope |
| `MODEL_CATALOG_REFRESH_MIN_MS` | `300000` 예시 | 모델 카탈로그 refresh 최소 간격(ms) |
| `MODEL_LIST_INCLUDE_ROUTING_METADATA` | `false` | `true`이면 `/v1/models` model object에 비표준 `kyush_router` routing metadata 추가 |
| `DETAIL_STREAM_LOG_MODE` | `compact` | 상세 로그에서 stream response body 저장 방식 (`compact`, `raw`, `both`, `off`) |
## Detailed Docs

View file

@ -27,6 +27,7 @@
- `force=true` rewrite 는 항상 적용되고 target 모델의 다음 규칙까지 계속 평가한다
- `force=false` rewrite 는 현재 모델을 서빙하는 허용 가능한 활성 백엔드가 없을 때만 fallback 으로 적용되고 target 모델의 다음 규칙까지 계속 평가한다
- `/v1/models` 는 native backend 모델뿐 아니라 현재 사용자 권한에서 최종 후보가 있는 rewrite source alias도 함께 반환한다
- `MODEL_LIST_INCLUDE_ROUTING_METADATA=1|true|yes|on` 이면 `/v1/models` 의 각 model object에 비표준 `kyush_router` metadata를 추가한다. 이 metadata는 `requested_model`, `routed_model`, `was_rewritten`, `rule_type`, `rewrite_path` 를 포함한다
- 최종 후보가 없으면 모델 미지원 오류를 반환하고 `request_model`, `routed_model` 을 함께 내려준다
## Admin API

View file

@ -19,6 +19,7 @@
2. 접근 가능한 활성 백엔드의 메모리 카탈로그를 확인
3. native backend 모델과 rewrite `source_model` alias를 같은 체인 해석기로 평가
4. 최종 모델 후보가 있는 requestable 모델 ID 합집합을 반환
5. `MODEL_LIST_INCLUDE_ROUTING_METADATA` 가 켜져 있으면 각 model object에 비표준 `kyush_router` routing metadata를 추가한다
## Caching Rules
@ -49,6 +50,8 @@
- 활성 rewrite 그래프에 cycle이 생기는 관리자 생성/수정은 거부된다
- 직접 DB 조작 등으로 runtime cycle이 발견되면 라우터는 설정 오류를 반환한다
- 체인 평가는 요청별 allowed backend set과 candidate memo를 사용해 반복 DB 조회를 피한다
- `/v1/models``kyush_router` metadata는 적용된 rewrite hop만 `rewrite_path` 에 담는다. 후보가 있어 fallback이 중단된 규칙은 path에 포함하지 않는다
- `kyush_router` 는 public routing 설명용이며 backend id/name 같은 내부 라우팅 대상 정보는 포함하지 않는다
예시:
`AutoModelTranslate -(Force)-> Qwen3.5 -(Force)-> Qwen/Qwen3.5-397B-A17B-FP8 -(Fallback)-> Gemma4 -(Force)-> cyankiwi/gemma-4-26B-A4B-it-AWQ-4bit`

View file

@ -68,6 +68,7 @@ server/src/
- 활성 rewrite cycle은 관리자 생성/수정 시 거부하고, runtime에서도 방어한다
- 최종 모델을 서빙하는 허용 가능한 활성 백엔드가 없으면 `/v1/chat/completions` 는 모델 미지원 오류를 반환한다
- `/v1/models` 는 허용 가능한 활성 백엔드들의 native 모델과 실제 요청 가능한 rewrite alias 합집합을 반환한다
- `MODEL_LIST_INCLUDE_ROUTING_METADATA` 가 켜져 있으면 `/v1/models` 는 비표준 `kyush_router` metadata를 추가해 요청 모델, 최종 라우팅 모델, 적용된 rewrite path를 노출한다.
참고:
- 세부 라우팅 규칙과 캐시 트리거는 [docs/model-routing.md](./model-routing.md) 참고

View file

@ -0,0 +1,4 @@
export function shouldIncludeModelListRoutingMetadata(): boolean {
const value = process.env.MODEL_LIST_INCLUDE_ROUTING_METADATA?.trim().toLowerCase();
return value === '1' || value === 'true' || value === 'yes' || value === 'on';
}

View file

@ -6,6 +6,7 @@ import { ScriptEngine } from '../services/ScriptEngine';
import { logger } from '../utils/logger';
import { ModelCatalogService, ModelRewriteCycleError } from '../services/ModelCatalogService';
import { getDetailStreamLogMode } from '../config/stream-logging';
import { shouldIncludeModelListRoutingMetadata } from '../config/model-list-metadata';
import { ChatStreamLogAccumulator } from '../utils/streamLog';
const router: Router = Router();
@ -364,12 +365,40 @@ router.get('/models', async (req: AuthenticatedRequest, res: Response) => {
res.status(403).json({ error: 'No active backends available' });
return;
}
let models: Array<{ id: string; object: string }>;
let models: Array<{
id: string;
object: string;
kyush_router?: {
requested_model: string;
routed_model: string;
was_rewritten: boolean;
rule_type: string;
rewrite_path: Array<{ source_model: string; target_model: string; mode: string }>;
};
}>;
try {
models = ModelCatalogService.getRequestableModelsForAllowedBackends(activeAllowedBackendIds).map((entry) => ({
id: entry.model_id,
object: 'model',
}));
const includeRoutingMetadata = shouldIncludeModelListRoutingMetadata();
models = ModelCatalogService.getRequestableModelsForAllowedBackends(activeAllowedBackendIds).map((entry) => {
const model = {
id: entry.model_id,
object: 'model',
};
if (!includeRoutingMetadata) {
return model;
}
return {
...model,
kyush_router: {
requested_model: entry.routing.requestedModel,
routed_model: entry.routing.routedModel,
was_rewritten: entry.routing.wasRewritten,
rule_type: entry.routing.ruleType,
rewrite_path: entry.routing.rewritePath,
},
};
});
} catch (error) {
const errorMsg = error instanceof Error ? error.message : 'Model rewrite resolution failed';
logger.error(`Model list resolution failed: ${errorMsg}`);

View file

@ -31,11 +31,18 @@ interface FetchModelsResponse {
rawModels: Array<{ model_id: string; raw_json?: string }>;
}
interface RewriteResolution {
export interface RewritePathHop {
source_model: string;
target_model: string;
mode: 'force' | 'fallback';
}
export interface RewriteResolution {
requestedModel: string;
routedModel: string;
wasRewritten: boolean;
ruleType: 'none' | 'force' | 'fallback' | 'chain';
rewritePath: RewritePathHop[];
}
interface RewriteConfig {
@ -51,6 +58,10 @@ interface ResolutionContext {
candidateMemo: Map<string, number[]>;
}
export interface RequestableModelCatalogEntry extends BackendModelCatalogEntry {
routing: RewriteResolution;
}
const DEFAULT_REFRESH_MIN_MS = 5 * 60 * 1000;
export class ModelRewriteCycleError extends Error {
@ -303,6 +314,11 @@ export class ModelCatalogService {
routedModel: currentModel,
wasRewritten: currentModel !== requestedModel,
ruleType: this.getRuleTypeFromAppliedRules(appliedRules),
rewritePath: appliedRules.map((rule) => ({
source_model: rule.sourceModel,
target_model: rule.targetModel,
mode: rule.force ? 'force' : 'fallback',
})),
};
}
@ -314,6 +330,11 @@ export class ModelCatalogService {
routedModel: currentModel,
wasRewritten: currentModel !== requestedModel,
ruleType: this.getRuleTypeFromAppliedRules(appliedRules),
rewritePath: appliedRules.map((rule) => ({
source_model: rule.sourceModel,
target_model: rule.targetModel,
mode: rule.force ? 'force' : 'fallback',
})),
};
}
}
@ -377,7 +398,7 @@ export class ModelCatalogService {
return null;
}
static getRequestableModelsForAllowedBackends(allowedBackendIds: number[]): BackendModelCatalogEntry[] {
static getRequestableModelsForAllowedBackends(allowedBackendIds: number[]): RequestableModelCatalogEntry[] {
const context = this.createResolutionContext(allowedBackendIds);
const requestableModelIds = new Set<string>();
const candidateModelIds = new Set<string>([
@ -400,6 +421,7 @@ export class ModelCatalogService {
return {
model_id: modelId,
backend_ids: this.getCandidateBackendIdsWithContext(resolution.routedModel, context),
routing: resolution,
};
});
}

View file

@ -280,6 +280,8 @@ describe('OpenAI Compatible Backend Integration', () => {
});
afterEach(async () => {
delete process.env.MODEL_LIST_INCLUDE_ROUTING_METADATA;
if (mockServer) {
await new Promise<void>(resolve => mockServer.close(resolve));
mockServer = undefined;
@ -778,11 +780,76 @@ describe('OpenAI Compatible Backend Integration', () => {
expect(ids).not.toContain('models-missing-alias');
expect(ids).not.toContain('models-denied-alias');
expect(ids).not.toContain('models-native-forced-away');
expect(response.body.data.every((item: any) => item.kyush_router === undefined)).toBe(true);
} finally {
await new Promise<void>((resolve) => deniedBackend.server.close(() => resolve()));
}
});
it('should include kyush_router metadata for /v1/models when enabled', async () => {
process.env.MODEL_LIST_INCLUDE_ROUTING_METADATA = 'true';
const { server, port } = createMockBackend({
modelsResponse: [
{ id: 'metadata-native', object: 'model' },
{ id: 'metadata-final', object: 'model' },
{ id: 'metadata-skip-current', object: 'model' },
],
});
mockServer = server;
mockPort = port;
const userResponse = await admin.post('/admin/users').send({ name: 'Model Metadata User 8-14' });
const backendResponse = await admin.post('/admin/backends').send({
name: 'Model Metadata Backend 8-14',
base_url: `http://localhost:${port}`,
});
await admin.post('/admin/permissions').send({ user_id: userResponse.body.id, backend_id: backendResponse.body.id });
expect((await admin.post('/admin/model-rewrites').send({ source_model: 'metadata-alias-a', target_model: 'metadata-alias-b', force: true })).status).toBe(201);
expect((await admin.post('/admin/model-rewrites').send({ source_model: 'metadata-alias-b', target_model: 'metadata-missing-c', force: true })).status).toBe(201);
expect((await admin.post('/admin/model-rewrites').send({ source_model: 'metadata-missing-c', target_model: 'metadata-final', force: false })).status).toBe(201);
expect((await admin.post('/admin/model-rewrites').send({ source_model: 'metadata-skip-current', target_model: 'metadata-skip-target', force: false })).status).toBe(201);
const response = await request(app)
.get('/v1/models')
.set('Authorization', `Bearer ${userResponse.body.api_key}`);
expect(response.status).toBe(200);
expect(response.body.data.length).toBeGreaterThan(0);
expect(response.body.data.every((item: any) => item.kyush_router && !('backend_ids' in item.kyush_router))).toBe(true);
const native = response.body.data.find((item: any) => item.id === 'metadata-native');
expect(native.kyush_router).toEqual({
requested_model: 'metadata-native',
routed_model: 'metadata-native',
was_rewritten: false,
rule_type: 'none',
rewrite_path: [],
});
const alias = response.body.data.find((item: any) => item.id === 'metadata-alias-a');
expect(alias.kyush_router).toEqual({
requested_model: 'metadata-alias-a',
routed_model: 'metadata-final',
was_rewritten: true,
rule_type: 'chain',
rewrite_path: [
{ source_model: 'metadata-alias-a', target_model: 'metadata-alias-b', mode: 'force' },
{ source_model: 'metadata-alias-b', target_model: 'metadata-missing-c', mode: 'force' },
{ source_model: 'metadata-missing-c', target_model: 'metadata-final', mode: 'fallback' },
],
});
const skippedFallback = response.body.data.find((item: any) => item.id === 'metadata-skip-current');
expect(skippedFallback.kyush_router).toEqual({
requested_model: 'metadata-skip-current',
routed_model: 'metadata-skip-current',
was_rewritten: false,
rule_type: 'none',
rewrite_path: [],
});
});
it('should reject active rewrite cycles while allowing inactive cycles until activation', async () => {
const selfLoop = await admin.post('/admin/model-rewrites').send({
source_model: 'cycle-self-a',