mirror of
https://github.com/ggml-org/llama.vscode.git
synced 2026-05-07 01:15:23 +00:00
Compare commits
4 commits
master
...
agent_next
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
811e76bfd8 | ||
|
|
b81e164dd8 | ||
|
|
8e3b6b2de7 | ||
|
|
bb931008da |
19 changed files with 2290 additions and 985 deletions
|
|
@ -18,6 +18,10 @@ import {LlamaWebviewProvider} from "./llama-webview-provider"
|
|||
import * as vscode from "vscode"
|
||||
import path from "path";
|
||||
import { Persistence } from "./persistence";
|
||||
import { ModelService } from "./services/model-service";
|
||||
import { HfModelStrategy } from "./services/hf-model-strategy";
|
||||
import { LocalModelStrategy } from "./services/local-model-strategy";
|
||||
import { ExternalModelStrategy } from "./services/external-model-strategy";
|
||||
|
||||
export class Application {
|
||||
private static instance: Application;
|
||||
|
|
@ -39,6 +43,10 @@ export class Application {
|
|||
public llamaAgent: LlamaAgent
|
||||
public llamaWebviewProvider: LlamaWebviewProvider
|
||||
public persistence: Persistence
|
||||
public modelService: ModelService
|
||||
public hfModelStrategy: HfModelStrategy
|
||||
public localModelStrategy: LocalModelStrategy
|
||||
public externalModelStrategy: ExternalModelStrategy
|
||||
|
||||
private constructor(context: vscode.ExtensionContext) {
|
||||
this.configuration = new Configuration()
|
||||
|
|
@ -59,6 +67,11 @@ export class Application {
|
|||
this.llamaAgent = new LlamaAgent(this)
|
||||
this.llamaWebviewProvider = new LlamaWebviewProvider(context.extensionUri, this, context)
|
||||
this.persistence = new Persistence(this, context)
|
||||
// strategies should be initialized before modelService constructor as they are needed there.
|
||||
this.hfModelStrategy = new HfModelStrategy(this)
|
||||
this.localModelStrategy = new LocalModelStrategy(this)
|
||||
this.externalModelStrategy = new ExternalModelStrategy(this)
|
||||
this.modelService = new ModelService(this)
|
||||
}
|
||||
|
||||
public static getInstance(context: vscode.ExtensionContext): Application {
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import {LlamaWebviewProvider} from './llama-webview-provider'
|
|||
import { Utils } from './utils';
|
||||
import { Env, LlmModel } from './types';
|
||||
import { env } from 'process';
|
||||
import { SETTING_NAME_FOR_LIST } from './constants';
|
||||
|
||||
export class Architect {
|
||||
private app: Application
|
||||
|
|
@ -485,9 +486,9 @@ export class Architect {
|
|||
Utils.removeFaOptionFromModels(toolsModels);
|
||||
Utils.removeFaOptionFromEnvs(envs);
|
||||
|
||||
this.app.configuration.updateConfigValue("chat_models_list", chatModels);
|
||||
this.app.configuration.updateConfigValue("tools_models_list", toolsModels);
|
||||
this.app.configuration.updateConfigValue("envs_list", envs);
|
||||
this.app.configuration.updateConfigValue(SETTING_NAME_FOR_LIST.CHAT_MODELS, chatModels);
|
||||
this.app.configuration.updateConfigValue(SETTING_NAME_FOR_LIST.TOOLS_MODELS, toolsModels);
|
||||
this.app.configuration.updateConfigValue(SETTING_NAME_FOR_LIST.ENVS, envs);
|
||||
}
|
||||
} else {
|
||||
let questionStopAskingLlamaCppUpgrade = "Do you prefer to stop getting a suggestion to upgrade llama.cpp?"
|
||||
|
|
|
|||
|
|
@ -51,22 +51,24 @@ export class ChatWithAi {
|
|||
let aiPanel = this.askAiPanel
|
||||
let extraCont = aiInitialExtraContext ? aiInitialExtraContext + "\n\n" : "";
|
||||
let query: string|undefined = undefined
|
||||
let targetUrl = this.app.configuration.endpoint_chat ? this.app.configuration.endpoint_chat + "/" : "";
|
||||
let targetUrl = this.app.configuration.endpoint_chat
|
||||
? this.app.configuration.endpoint_chat + "/"
|
||||
: this.app.configuration.endpoint_tools ? this.app.configuration.endpoint_tools + "/" : "";
|
||||
|
||||
let chatModel = this.app.menu.getChatModel();
|
||||
let chatModel = this.app.menu.getChatModel();
|
||||
if (!this.app.menu.isChatModelSelected()) chatModel = this.app.menu.getToolsModel();
|
||||
if (chatModel.endpoint) {
|
||||
const chatEndpoint = Utils.trimTrailingSlash(chatModel.endpoint)
|
||||
targetUrl = chatEndpoint ? chatEndpoint + "/" : "";
|
||||
}
|
||||
if (!targetUrl) {
|
||||
const shouldSelectModel = await Utils.showUserChoiceDialog("Select a chat model or an env with chat model to chat with AI.","Select")
|
||||
const shouldSelectModel = await Utils.showUserChoiceDialog("Select a chat or tools model run by llama-server or an env with chat or tools model run on llama-server to chat with AI.","Select")
|
||||
if (shouldSelectModel){
|
||||
// await this.app.menu.selectEnvFromList(this.app.configuration.envs_list.filter(item => item.chat != undefined && item.chat.name)) // .selectStartModel(chatTypeDetails);
|
||||
this.app.menu.showEnvView();
|
||||
vscode.window.showInformationMessage("After the chat model is loaded, try again opening Chat with AI.")
|
||||
vscode.window.showInformationMessage("After the chat/tools model is loaded, try again opening Chat with AI.")
|
||||
return;
|
||||
} else {
|
||||
vscode.window.showErrorMessage("No endpoint for the chat model. Select an env with chat model or enter the endpoint of a running llama.cpp server with chat model in setting endpoint_chat. ")
|
||||
vscode.window.showErrorMessage("No endpoint for the chat or tools model. Select a chat or tools model run on llama-server or an env with chat or tools model or enter the endpoint of a running llama.cpp server with chat model in setting endpoint_chat. ")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
|
|
|||
171
src/constants.ts
171
src/constants.ts
|
|
@ -84,7 +84,172 @@ export enum AGENT_NAME {
|
|||
}
|
||||
|
||||
export const UI_TEXT_KEYS = {
|
||||
// Note: These are keys for getUiText calls; actual strings not extracted here to avoid touching getUiText parameters.
|
||||
// Example: selectEnv: 'Select/start env...',
|
||||
// Map all relevant keys as needed in future phases.
|
||||
// Menu separators and sections
|
||||
actions: "Actions",
|
||||
entities: "Entities",
|
||||
maintenance: "Maintenance",
|
||||
help: "Help",
|
||||
|
||||
// Env related
|
||||
selectStartEnv: "Select/start env...",
|
||||
envSelectDescription: "Stops the currently running models and starts the selected env - (a predefined group of models for completion, chat, embeddings and tools).",
|
||||
deselectStopEnv: "Deselect/stop env and models",
|
||||
deselectStopEnvDescription: "Deselects/stops env, completion, chat, embeddings and tools models",
|
||||
showSelectedEnv: "Show selected env",
|
||||
showSelectedEnvDescription: "Shows details about the selected env",
|
||||
addEnv: "Add env...",
|
||||
addEnvDescription: "Opens a panel for adding an env.",
|
||||
viewEnvDetails: "View env details...",
|
||||
deleteEnv: "Delete env...",
|
||||
exportEnv: "Export env...",
|
||||
importEnv: "Import env...",
|
||||
downloadUploadEnvsOnline: "Download/upload envs online",
|
||||
|
||||
// Models lists
|
||||
envs: "Envs...",
|
||||
completionModels: "Completion models...",
|
||||
chatModels: "Chat models...",
|
||||
embeddingsModels: "Embeddings models...",
|
||||
toolsModels: "Tools models...",
|
||||
|
||||
// Common toggles
|
||||
disable: "Disable",
|
||||
enable: "Enable",
|
||||
allCompletions: "All Completions",
|
||||
turnOffCompletionsGlobally: "Turn off completions globally",
|
||||
turnOnCompletionsGlobally: "Turn on completions globally",
|
||||
completionsFor: "Completions for",
|
||||
currently: "Currently",
|
||||
enabled: "enabled",
|
||||
disabled: "disabled",
|
||||
rag: "RAG",
|
||||
turnOffRAG: "Turn off RAG related features like Chat with AI with project context",
|
||||
turnOnRAG: "Turn on RAG related features like Chat with AI with project context",
|
||||
|
||||
// UI actions
|
||||
showLlamaAgent: "Show Llama Agent",
|
||||
showLlamaAgentDescription: "Shows Llama Agent panel",
|
||||
chatWithAI: "Chat with AI",
|
||||
chatWithAIDescription: "Opens a chat with AI window inside VS Code using the selected chat model (or setting endpoint_chat)",
|
||||
chatWithAIWithProjectContext: "Chat with AI with project context",
|
||||
showSelectedModels: "Show selected models",
|
||||
showSelectedModelsDescription: "Displays a list of currently selected models",
|
||||
useAsLocalAIRunner: "Use as local AI runner",
|
||||
localAIRunnerDescription: "Download models automatically from Huggingface and chat with them (as LM Studio, Ollama, etc.)",
|
||||
editSettings: "Edit Settings...",
|
||||
apiKeys: "API keys...",
|
||||
apiKeysDescription: "Edit or remove API keys. New API keys are added on first use of an endpoint.",
|
||||
agents: "Agents...",
|
||||
agentCommands: "Agent commands...",
|
||||
chats: "Chats...",
|
||||
|
||||
// Help and maintenance
|
||||
howToUseLlamaVscode: "How to use llama-vscode",
|
||||
chatWithAIAboutLlamaVscode: "Chat with AI about llama-vscode",
|
||||
chatWithAIAboutLlamaVscodeDescription: "Selects llama-vscode help agent and opens llama agent view for asking ai about llama-vscode",
|
||||
howToDeleteModels: "How to delete models",
|
||||
howToDeleteModelsDescription: "Explains how to delete the downloaded models",
|
||||
viewDocumentation: "View Documentation...",
|
||||
startTrainingCompletionModel: "Start training completion model",
|
||||
launchTrainingCompletionDescription: "Runs the command from property launch_training_completion",
|
||||
startTrainingChatModel: "Start training chat model",
|
||||
launchTrainingChatDescription: "Runs the command from property launch_training_chat",
|
||||
stopTraining: "Stop training",
|
||||
stopTrainingDescription: "Stops training if it was started from llama.vscode menu",
|
||||
|
||||
// API keys
|
||||
addAPIKey: "Add API key...",
|
||||
editDeleteAPIKey: "Edit/delete API key...",
|
||||
|
||||
// Agent actions
|
||||
selectStartAgent: "Select/start agent...",
|
||||
deselectStopAgent: "Deselect/stop agent...",
|
||||
addAgent: "Add agent...",
|
||||
viewAgentDetails: "View agent details...",
|
||||
deleteAgent: "Delete agent...",
|
||||
exportAgent: "Export agent...",
|
||||
importAgent: "Import agent...",
|
||||
|
||||
// Agent command actions
|
||||
addAgentCommand: "Add agent command...",
|
||||
viewAgentCommandDetails: "View agent command details...",
|
||||
deleteAgentCommand: "Delete agent command...",
|
||||
exportAgentCommand: "Export agent command...",
|
||||
importAgentCommand: "Import agent command...",
|
||||
|
||||
// Chat actions
|
||||
selectStartChat: "Select/start chat...",
|
||||
deleteChat: "Delete chat...",
|
||||
exportChat: "Export chat...",
|
||||
importChat: "Import chat...",
|
||||
|
||||
// Model actions for Completion
|
||||
selectStartCompletionModel: "Select/start completion model...",
|
||||
deselectStopCompletionModel: "Deselect/stop completion model",
|
||||
addLocalCompletionModel: "Add local completion model...",
|
||||
addExternalCompletionModel: "Add external completion model...",
|
||||
addCompletionModelFromHuggingface: "Add completion model from huggingface...",
|
||||
viewCompletionModelDetails: "View completion model details...",
|
||||
deleteCompletionModel: "Delete completion model...",
|
||||
exportCompletionModel: "Export completion model...",
|
||||
importCompletionModel: "Import completion model...",
|
||||
|
||||
// Model actions for Chat
|
||||
selectStartChatModel: "Select/start chat model...",
|
||||
deselectStopChatModel: "Deselect/stop chat model",
|
||||
addLocalChatModel: "Add local chat model...",
|
||||
addExternalChatModel: "Add external chat model...",
|
||||
addChatModelFromHuggingface: "Add chat model from huggingface...",
|
||||
viewChatModelDetails: "View chat model details...",
|
||||
deleteChatModel: "Delete chat model...",
|
||||
exportChatModel: "Export chat model...",
|
||||
importChatModel: "Import chat model...",
|
||||
|
||||
// Model actions for Embeddings
|
||||
selectStartEmbeddingsModel: "Select/start embeddings model...",
|
||||
deselectStopEmbeddingsModel: "Deselect/stop embeddings model",
|
||||
addLocalEmbeddingsModel: "Add local embeddings model...",
|
||||
addExternalEmbeddingsModel: "Add external embeddings model...",
|
||||
addEmbeddingsModelFromHuggingface: "Add embeddings model from huggingface...",
|
||||
viewEmbeddingsModelDetails: "View embeddings model details...",
|
||||
deleteEmbeddingsModel: "Delete embeddings model...",
|
||||
exportEmbeddingsModel: "Export embeddings model...",
|
||||
importEmbeddingsModel: "Import embeddings model...",
|
||||
|
||||
// Model actions for Tools
|
||||
selectStartToolsModel: "Select/start tools model...",
|
||||
deselectStopToolsModel: "Deselect/stop tools model",
|
||||
addLocalToolsModel: "Add local tools model...",
|
||||
addExternalToolsModel: "Add external tools model...",
|
||||
addToolsModelFromHuggingface: "Add tools model from huggingface...",
|
||||
viewToolsModelDetails: "View tools model details...",
|
||||
deleteToolsModel: "Delete tools model...",
|
||||
exportToolsModel: "Export tools model...",
|
||||
importToolsModel: "Import tools model...",
|
||||
} as const;
|
||||
|
||||
export const PERSISTENCE_KEYS = {
|
||||
SELECTED_CHAT: 'selectedChat' as const,
|
||||
SELECTED_AGENT: 'selectedAgent' as const,
|
||||
SELECTED_ENV: 'selectedEnv' as const,
|
||||
} as const;
|
||||
|
||||
export const SETTING_NAME_FOR_LIST = {
|
||||
COMPLETION_MODELS: MODEL_TYPE_CONFIG[ModelType.Completion].settingName,
|
||||
CHAT_MODELS: MODEL_TYPE_CONFIG[ModelType.Chat].settingName,
|
||||
EMBEDDINGS_MODELS: MODEL_TYPE_CONFIG[ModelType.Embeddings].settingName,
|
||||
TOOLS_MODELS: MODEL_TYPE_CONFIG[ModelType.Tools].settingName,
|
||||
ENVS: 'envs_list' as const,
|
||||
AGENTS: 'agents_list' as const,
|
||||
AGENT_COMMANDS: 'agent_commands' as const
|
||||
} as const;
|
||||
|
||||
export const PREDEFINED_LISTS_KEYS = {
|
||||
COMPLETIONS: ModelType.Completion as const,
|
||||
CHATS: ModelType.Chat as const,
|
||||
EMBEDDINGS: ModelType.Embeddings as const,
|
||||
TOOLS: ModelType.Tools as const,
|
||||
ENVS: SETTING_NAME_FOR_LIST.ENVS,
|
||||
AGENTS: SETTING_NAME_FOR_LIST.AGENTS,
|
||||
AGENT_COMMANDS: SETTING_NAME_FOR_LIST.AGENT_COMMANDS,
|
||||
} as const;
|
||||
|
|
@ -12,20 +12,22 @@ export class Git {
|
|||
|
||||
generateCommitMessage = async (): Promise<void> => {
|
||||
let chatUrl = this.app.configuration.endpoint_chat
|
||||
if (!chatUrl) chatUrl = this.app.configuration.endpoint_tools;
|
||||
let chatModel = this.app.menu.getChatModel();
|
||||
if (!this.app.menu.isChatModelSelected()) chatModel = this.app.menu.getToolsModel();
|
||||
if (chatModel.endpoint) {
|
||||
const chatEndpoint = Utils.trimTrailingSlash(chatModel.endpoint)
|
||||
chatUrl = chatEndpoint ? chatEndpoint + "/" : "";
|
||||
}
|
||||
if (!chatUrl) {
|
||||
const shouldSelectModel = await Utils.showUserChoiceDialog("Select a chat model or an env with chat model to generate a commit message.","Select")
|
||||
const shouldSelectModel = await Utils.showUserChoiceDialog("Select a chat or tools model or an env with chat or tools model to generate a commit message.","Select")
|
||||
if (shouldSelectModel){
|
||||
this.app.menu.showEnvView();
|
||||
vscode.window.showInformationMessage("After the chat model is loaded, try again generating commit message.")
|
||||
vscode.window.showInformationMessage("After the chat/tools model is loaded, try again generating commit message.")
|
||||
return;
|
||||
}
|
||||
else {
|
||||
vscode.window.showErrorMessage("No endpoint for the chat model. Select a chat model or an env with chat model or enter the endpoint of a running llama.cpp server with chat model in setting endpoint_chat. ")
|
||||
vscode.window.showErrorMessage("No endpoint for the chat model. Select a chat or tools model or an env with chat or tools model or enter the endpoint of a running llama.cpp server with chat model in setting endpoint_chat. ")
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
743
src/lists.ts
Normal file
743
src/lists.ts
Normal file
|
|
@ -0,0 +1,743 @@
|
|||
import { PREDEFINED_LISTS_KEYS, ModelType } from "./constants";
|
||||
|
||||
export const PREDEFINED_LISTS = new Map<string, any>([
|
||||
[PREDEFINED_LISTS_KEYS.COMPLETIONS, [
|
||||
{
|
||||
"name": "Qwen2.5-Coder-1.5B-Q8_0-GGUF (<= 8GB VRAM)",
|
||||
"localStartCommand": "llama-server --fim-qwen-1.5b-default -ngl 99 --port 8012",
|
||||
"endpoint": "http://localhost:8012",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
{
|
||||
"name": "Qwen2.5-Coder-3B-Q8_0-GGUF (<= 16GB VRAM)",
|
||||
"localStartCommand": "llama-server --fim-qwen-3b-default -ngl 99 --port 8012",
|
||||
"endpoint": "http://localhost:8012",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
{
|
||||
"name": "Qwen2.5-Coder-7B-Q8_0-GGUF (> 16GB VRAM)",
|
||||
"localStartCommand": "llama-server --fim-qwen-7b-default -ngl 99 --port 8012",
|
||||
"endpoint": "http://localhost:8012",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
{
|
||||
"name": "Qwen2.5-Coder-1.5B-Q8_0-GGUF (CPU Only)",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-1.5B-Q8_0-GGUF -ub 1024 -b 1024 -dt 0.1 --ctx-size 0 --cache-reuse 256 --port 8012",
|
||||
"endpoint": "http://localhost:8012",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
}
|
||||
]],
|
||||
[PREDEFINED_LISTS_KEYS.CHATS, [
|
||||
{
|
||||
"name": "Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF (<= 8GB VRAM)",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
|
||||
"endpoint": "http://127.0.0.1:8011"
|
||||
},
|
||||
{
|
||||
"name": "Qwen2.5-Coder-3B-Instruct-Q8_0-GGUF (<= 16GB VRAM)",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-3B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
|
||||
"endpoint": "http://127.0.0.1:8011"
|
||||
},
|
||||
{
|
||||
"name": "Qwen2.5-Coder-7B-Instruct-Q8_0-GGUF (> 16GB VRAM)",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-7B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
|
||||
"endpoint": "http://127.0.0.1:8011"
|
||||
},
|
||||
{
|
||||
"name": "Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF (CPU Only)",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF -ub 1024 -b 1024 -dt 0.1 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
|
||||
"endpoint": "http://127.0.0.1:8011"
|
||||
},
|
||||
{
|
||||
"name": "gemini qat tools",
|
||||
"localStartCommand": "llama-server -m c:\\ai\\gemma-3-4B-it-QAT-Q4_0.gguf --port 8011",
|
||||
"endpoint": "http://localhost:8011",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
{
|
||||
"name": "OpenAI gpt-oss 20B",
|
||||
"localStartCommand": "llama-server -hf ggml-org/gpt-oss-20b-GGUF -c 0 --jinja --reasoning-format none -np 2 --port 8011",
|
||||
"endpoint": "http://localhost:8011",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
}
|
||||
]],
|
||||
[PREDEFINED_LISTS_KEYS.EMBEDDINGS, [
|
||||
{
|
||||
"name": "Nomic-Embed-Text-V2-GGUF",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Nomic-Embed-Text-V2-GGUF -ub 2048 -b 2048 --ctx-size 2048 --embeddings --port 8010",
|
||||
"endpoint": "http://127.0.0.1:8010"
|
||||
}
|
||||
]],
|
||||
[PREDEFINED_LISTS_KEYS.TOOLS, [
|
||||
{
|
||||
"name": "OpenAI gpt-oss 20B (LOCAL) (> 19GB VRAM)",
|
||||
"localStartCommand": "llama-server -hf ggml-org/gpt-oss-20b-GGUF -c 0 --jinja --reasoning-format none -np 2 --port 8009",
|
||||
"endpoint": "http://localhost:8009",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
{
|
||||
"name": "xAI: Grok 4 Fast (free for limited period), context: 2 000 000",
|
||||
"localStartCommand": "",
|
||||
"endpoint": "https://openrouter.ai/api",
|
||||
"isKeyRequired": true,
|
||||
"aiModel": "x-ai/grok-4-fast:free"
|
||||
},
|
||||
{
|
||||
"name": "Sonoma Sky - 2,000,000 context $0/M input tokens $0/M output tokens as of 19.09.25 (OpenRouter)",
|
||||
"localStartCommand": "",
|
||||
"endpoint": "https://openrouter.ai/api",
|
||||
"aiModel": "openrouter/sonoma-sky-alpha",
|
||||
"isKeyRequired": true
|
||||
},
|
||||
{
|
||||
"name": "Sonoma Dusk - 2,000,000 context $0/M input tokens $0/M output tokens as of 19.09.25 (OpenRouter)",
|
||||
"localStartCommand": "",
|
||||
"endpoint": "https://openrouter.ai/api",
|
||||
"aiModel": "openrouter/sonoma-dusk-alpha",
|
||||
"isKeyRequired": true
|
||||
},
|
||||
{
|
||||
"name": "Z.AI: GLM 4.5 - 128000 context $0.60/M input tokens $2.20/M output tokens (OpenRouter)",
|
||||
"endpoint": "https://openrouter.ai/api",
|
||||
"isKeyRequired": true,
|
||||
"aiModel": "z-ai/glm-4.5"
|
||||
},
|
||||
{
|
||||
"name": "Z.AI: GLM 4.5 Air - 128.000 context $0.20/M input tokens $1.10/M output tokens (OpenRouter)",
|
||||
"endpoint": "https://openrouter.ai/api",
|
||||
"isKeyRequired": true,
|
||||
"aiModel": "z-ai/glm-4.5-air"
|
||||
},
|
||||
{
|
||||
"name": "Qwen: Qwen3 235B A22B Thinking 2507 - 262.144 context $0.118/M input tokens $0.118/M output tokens (OpenRouter)",
|
||||
"endpoint": "https://openrouter.ai/api",
|
||||
"isKeyRequired": true,
|
||||
"aiModel": "qwen/qwen3-235b-a22b-thinking-2507"
|
||||
},
|
||||
{
|
||||
"name": "Qwen: Qwen3 Coder - 262K context $0.30/M input tokens $1.20/M output tokens (OpenRouter)",
|
||||
"endpoint": "https://openrouter.ai/api",
|
||||
"isKeyRequired": true,
|
||||
"aiModel": "qwen/qwen3-coder"
|
||||
},
|
||||
{
|
||||
"name": "Qwen: Qwen3 235B A22B Instruct 2507 - 262K context $0.12/M input tokens $0.59/M output tokens (OpenRouter)",
|
||||
"endpoint": "https://openrouter.ai/api",
|
||||
"isKeyRequired": true,
|
||||
"aiModel": "qwen/qwen3-235b-a22b-2507"
|
||||
},
|
||||
{
|
||||
"name": "MoonshotAI: Kimi K2 - 131K context $0.55/M input tokens $2.20/M output tokens (OpenRouter)",
|
||||
"endpoint": "https://openrouter.ai/api",
|
||||
"isKeyRequired": true,
|
||||
"aiModel": "moonshotai/kimi-k2"
|
||||
},
|
||||
{
|
||||
"name": "Google: Gemini 2.5 Flash Lite - 1.05M context $0.10/M input tokens $0.40/M output tokens (OpenRouter)",
|
||||
"endpoint": "https://openrouter.ai/api",
|
||||
"isKeyRequired": true,
|
||||
"aiModel": "google/gemini-2.5-flash-lite"
|
||||
},
|
||||
{
|
||||
"name": "Google: Gemini 2.5 Flash - 1.05M context $0.30/M input tokens $2.50/M output tokens $1.238/K input imgs (OpenRouter)",
|
||||
"endpoint": "https://openrouter.ai/api",
|
||||
"isKeyRequired": true,
|
||||
"aiModel": "google/gemini-2.5-flash"
|
||||
},
|
||||
{
|
||||
"name": "openai/gpt-oss-20b - 131K context, $0,04/M input tokens, $0,16/M output tokens (OpenRouter)",
|
||||
"localStartCommand": "",
|
||||
"endpoint": "https://openrouter.ai/api",
|
||||
"aiModel": "openai/gpt-oss-20b",
|
||||
"isKeyRequired": true
|
||||
},
|
||||
{
|
||||
"name": "OpenAI gpt-oss 120B - 131K context, $0,09/M input tokens, $0,45/M output tokens (OpenRouter)",
|
||||
"localStartCommand": "",
|
||||
"endpoint": "https://openrouter.ai/api",
|
||||
"aiModel": "openai/gpt-oss-120b",
|
||||
"isKeyRequired": true
|
||||
}
|
||||
]],
|
||||
|
||||
[PREDEFINED_LISTS_KEYS.ENVS, [
|
||||
{
|
||||
"name": "Local, full package - min, gpt-oss 20B ( > 24GB VRAM | HD: 16 GB)",
|
||||
"description": "Everything local, gpt-oss 20B for agent",
|
||||
"completion": {
|
||||
"name": "Qwen2.5-Coder-1.5B-Q8_0-GGUF (<= 8GB VRAM)",
|
||||
"localStartCommand": "llama-server --fim-qwen-1.5b-default -ngl 99 --port 8012",
|
||||
"endpoint": "http://localhost:8012",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
"chat": {
|
||||
"name": "Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF (<= 8GB VRAM)",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
|
||||
"endpoint": "http://127.0.0.1:8011"
|
||||
},
|
||||
"embeddings": {
|
||||
"name": "Nomic-Embed-Text-V2-GGUF",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Nomic-Embed-Text-V2-GGUF -ngl 99 -ub 2048 -b 2048 --ctx-size 2048 --embeddings --port 8010",
|
||||
"endpoint": "http://127.0.0.1:8010"
|
||||
},
|
||||
"tools": {
|
||||
"name": "OpenAI gpt-oss 20B",
|
||||
"localStartCommand": "llama-server -hf ggml-org/gpt-oss-20b-GGUF -c 0 --jinja --reasoning-format none -np 2 --port 8009",
|
||||
"endpoint": "http://localhost:8009",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Local, full package - medium, gpt-oss 20B (> 34 GB VRAM | HD: 20 GB)",
|
||||
"description": "Everything local, gpt-oss 20B for agent",
|
||||
"completion": {
|
||||
"name": "Qwen2.5-Coder-3B-Q8_0-GGUF (<= 16GB VRAM)",
|
||||
"localStartCommand": "llama-server --fim-qwen-3b-default -ngl 99 --port 8012",
|
||||
"endpoint": "http://localhost:8012",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
"chat": {
|
||||
"name": "Qwen2.5-Coder-3B-Instruct-Q8_0-GGUF (<= 16GB VRAM)",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-3B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
|
||||
"endpoint": "http://127.0.0.1:8011"
|
||||
},
|
||||
"embeddings": {
|
||||
"name": "Nomic-Embed-Text-V2-GGUF",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Nomic-Embed-Text-V2-GGUF -ngl 99 -ub 2048 -b 2048 --ctx-size 2048 --embeddings --port 8010",
|
||||
"endpoint": "http://127.0.0.1:8010"
|
||||
},
|
||||
"tools": {
|
||||
"name": "OpenAI gpt-oss 20B",
|
||||
"localStartCommand": "llama-server -hf ggml-org/gpt-oss-20b-GGUF -c 0 --jinja --reasoning-format none -np 2 --port 8009",
|
||||
"endpoint": "http://localhost:8009",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Local, full package - max, gpt-oss 20B (>48GB VRAM | HD: 30 GB)",
|
||||
"description": "Everything local, gpt-oss 20B for agent",
|
||||
"completion": {
|
||||
"name": "Qwen2.5-Coder-7B-Q8_0-GGUF (> 16GB VRAM)",
|
||||
"localStartCommand": "llama-server --fim-qwen-7b-default -ngl 99 --port 8012",
|
||||
"endpoint": "http://localhost:8012",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
"chat": {
|
||||
"name": "Qwen2.5-Coder-7B-Instruct-Q8_0-GGUF (> 16GB VRAM)",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-7B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
|
||||
"endpoint": "http://127.0.0.1:8011"
|
||||
},
|
||||
"embeddings": {
|
||||
"name": "Nomic-Embed-Text-V2-GGUF",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Nomic-Embed-Text-V2-GGUF -ngl 99 -ub 2048 -b 2048 --ctx-size 2048 --embeddings --port 8010",
|
||||
"endpoint": "http://127.0.0.1:8010"
|
||||
},
|
||||
"tools": {
|
||||
"name": "OpenAI gpt-oss 20B",
|
||||
"localStartCommand": "llama-server -hf ggml-org/gpt-oss-20b-GGUF -c 0 --jinja --reasoning-format none -np 2 --port 8009",
|
||||
"endpoint": "http://localhost:8009",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Local, only completions - CPU (HD: 1.6 GB)",
|
||||
"description": "For laptops only with CPU, lightweight model for completion ",
|
||||
"completion": {
|
||||
"name": "Qwen2.5-Coder-1.5B-Q8_0-GGUF (CPU Only)",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-1.5B-Q8_0-GGUF -ub 1024 -b 1024 -dt 0.1 --ctx-size 0 --cache-reuse 256 --port 8012",
|
||||
"endpoint": "http://localhost:8012",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
"chat": {
|
||||
"name": "",
|
||||
"localStartCommand": "",
|
||||
"endpoint": "",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
"embeddings": {
|
||||
"name": "",
|
||||
"localStartCommand": "",
|
||||
"endpoint": "",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
"tools": {
|
||||
"name": "",
|
||||
"localStartCommand": "",
|
||||
"endpoint": "",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Local, only completions (<= 8GB VRAM | HD: 1.6 GB) ",
|
||||
"description": "Only for code completions model Qwen2.5-Coder-1.5B-Q8_0-GGUF (<= 8GB VRAM)",
|
||||
"completion": {
|
||||
"name": "Qwen2.5-Coder-1.5B-Q8_0-GGUF (<= 8GB VRAM)",
|
||||
"localStartCommand": "llama-server --fim-qwen-1.5b-default -ngl 99 --port 8012",
|
||||
"endpoint": "http://localhost:8012",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
"chat": {
|
||||
"name": "",
|
||||
"localStartCommand": "",
|
||||
"endpoint": "",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
"embeddings": {
|
||||
"name": "",
|
||||
"localStartCommand": "",
|
||||
"endpoint": "",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
"tools": {
|
||||
"name": "",
|
||||
"localStartCommand": "",
|
||||
"endpoint": "",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Local, only completions (<= 16GB VRAM | HD: 3,2 GB)",
|
||||
"description": "Only for completions, model Qwen2.5-Coder-3B-Q8_0-GGUF (<= 16GB VRAM | HD: 3,2 GB)",
|
||||
"completion": {
|
||||
"name": "Qwen2.5-Coder-3B-Q8_0-GGUF (<= 16GB VRAM)",
|
||||
"localStartCommand": "llama-server --fim-qwen-3b-default -ngl 99 --port 8012",
|
||||
"endpoint": "http://localhost:8012",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
"chat": {
|
||||
"name": "",
|
||||
"localStartCommand": "",
|
||||
"endpoint": "",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
"embeddings": {
|
||||
"name": "",
|
||||
"localStartCommand": "",
|
||||
"endpoint": "",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
"tools": {
|
||||
"name": "",
|
||||
"localStartCommand": "",
|
||||
"endpoint": "",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Local, only completions (> 16GB VRAM)",
|
||||
"description": "Only for code completions, model Qwen2.5-Coder-7B-Q8_0-GGUF (> 16GB VRAM)",
|
||||
"completion": {
|
||||
"name": "Qwen2.5-Coder-7B-Q8_0-GGUF (> 16GB VRAM | HD: 8.1 GB)",
|
||||
"localStartCommand": "llama-server --fim-qwen-7b-default -ngl 99 --port 8012",
|
||||
"endpoint": "http://localhost:8012",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
"chat": {
|
||||
"name": "",
|
||||
"localStartCommand": "",
|
||||
"endpoint": "",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
"embeddings": {
|
||||
"name": "",
|
||||
"localStartCommand": "",
|
||||
"endpoint": "",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
"tools": {
|
||||
"name": "",
|
||||
"localStartCommand": "",
|
||||
"endpoint": "",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Local, only chat & edit (CPU Only | HD: 2.2 GB)",
|
||||
"description": "Only for chat with AI, model Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF (CPU Only)",
|
||||
"completion": {
|
||||
"name": "",
|
||||
"localStartCommand": ""
|
||||
},
|
||||
"chat": {
|
||||
"name": "Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF (CPU Only)",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF -ub 1024 -b 1024 -dt 0.1 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
|
||||
"endpoint": "http://127.0.0.1:8011"
|
||||
},
|
||||
"embeddings": {
|
||||
"name": "",
|
||||
"localStartCommand": ""
|
||||
},
|
||||
"tools": {
|
||||
"name": "",
|
||||
"localStartCommand": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Local, only chat, chat with project context & edit (<= 16GB VRAM | HD: 4 GB)",
|
||||
"description": "Could be used for edit with AI, chat with AI, chat with AI with project context Qwen2.5-Coder-3B-Instruct-Q8_0-GGUF + embeddings model (<= 16GB VRAM)",
|
||||
"completion": {
|
||||
"name": "",
|
||||
"localStartCommand": ""
|
||||
},
|
||||
"chat": {
|
||||
"name": "Qwen2.5-Coder-3B-Instruct-Q8_0-GGUF (<= 16GB VRAM)",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-3B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
|
||||
"endpoint": "http://127.0.0.1:8011"
|
||||
},
|
||||
"embeddings": {
|
||||
"name": "Nomic-Embed-Text-V2-GGUF",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Nomic-Embed-Text-V2-GGUF -ngl 99 -ub 2048 -b 2048 --ctx-size 2048 --embeddings --port 8010",
|
||||
"endpoint": "http://127.0.0.1:8010"
|
||||
},
|
||||
"tools": {
|
||||
"name": "",
|
||||
"localStartCommand": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Local, only chat & edit (<= 8GB VRAM | HD: 1.65)",
|
||||
"description": "Only for chat with AI and edit with AI, Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF (<= 8GB VRAM)",
|
||||
"completion": {
|
||||
"name": "",
|
||||
"localStartCommand": ""
|
||||
},
|
||||
"chat": {
|
||||
"name": "Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF (<= 8GB VRAM)",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
|
||||
"endpoint": "http://127.0.0.1:8011"
|
||||
},
|
||||
"embeddings": {
|
||||
"name": "",
|
||||
"localStartCommand": ""
|
||||
},
|
||||
"tools": {
|
||||
"name": "",
|
||||
"localStartCommand": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Local, only chat, chat with project context & edit (> 16GB VRAM | HD: 8.6 GB)",
|
||||
"description": "Good for chat with AI, chat with AI with project context, edit Qwen2.5-Coder-7B-Instruct-Q8_0-GGUF + embeddings model (> 16GB VRAM)",
|
||||
"completion": {
|
||||
"name": "",
|
||||
"localStartCommand": ""
|
||||
},
|
||||
"chat": {
|
||||
"name": "Qwen2.5-Coder-7B-Instruct-Q8_0-GGUF (> 16GB VRAM)",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-7B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
|
||||
"endpoint": "http://127.0.0.1:8011"
|
||||
},
|
||||
"embeddings": {
|
||||
"name": "Nomic-Embed-Text-V2-GGUF",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Nomic-Embed-Text-V2-GGUF -ngl 99 -ub 2048 -b 2048 --ctx-size 2048 --embeddings --port 8010",
|
||||
"endpoint": "http://127.0.0.1:8010"
|
||||
},
|
||||
"tools": {
|
||||
"name": "",
|
||||
"localStartCommand": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Agent & chat (<= 16GB VRAM | HD: 3.8 GB) (requires OpenRouter API key)",
|
||||
"description": "Agent qwen 3 from OpenRouter (requires OpenRouter API key), chat and edit with small models (<= 16GB VRAM) ",
|
||||
"completion": {
|
||||
"name": "",
|
||||
"localStartCommand": ""
|
||||
},
|
||||
"chat": {
|
||||
"name": "Qwen2.5-Coder-3B-Instruct-Q8_0-GGUF (<= 16GB VRAM)",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-3B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
|
||||
"endpoint": "http://127.0.0.1:8011"
|
||||
},
|
||||
"embeddings": {
|
||||
"name": "Nomic-Embed-Text-V2-GGUF",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Nomic-Embed-Text-V2-GGUF -ngl 99 -ub 2048 -b 2048 --ctx-size 2048 --embeddings --port 8010",
|
||||
"endpoint": "http://127.0.0.1:8010"
|
||||
},
|
||||
"tools": {
|
||||
"name": "Qwen: Qwen3 235B A22B Thinking 2507 - 262.144 context $0.118/M input tokens $0.118/M output tokens",
|
||||
"endpoint": "https://openrouter.ai/api",
|
||||
"isKeyRequired": true,
|
||||
"aiModel": "qwen/qwen3-235b-a22b-thinking-2507"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Full package - min (<= 16GB VRAM | HD: 4 GB) (requires OpenRouter API key)",
|
||||
"description": "The minimal configuration for completions (local), chat (local) and agent (remote - OpenRouter), requires OpenRouter API key for agent",
|
||||
"completion": {
|
||||
"name": "Qwen2.5-Coder-1.5B-Q8_0-GGUF (<= 8GB VRAM)",
|
||||
"localStartCommand": "llama-server --fim-qwen-1.5b-default -ngl 99 --port 8012",
|
||||
"endpoint": "http://localhost:8012",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
"chat": {
|
||||
"name": "Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF (<= 8GB VRAM)",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
|
||||
"endpoint": "http://127.0.0.1:8011"
|
||||
},
|
||||
"embeddings": {
|
||||
"name": "Nomic-Embed-Text-V2-GGUF",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Nomic-Embed-Text-V2-GGUF -ngl 99 -ub 2048 -b 2048 --ctx-size 2048 --embeddings --port 8010",
|
||||
"endpoint": "http://127.0.0.1:8010"
|
||||
},
|
||||
"tools": {
|
||||
"name": "Qwen: Qwen3 235B A22B Thinking 2507 - 262.144 context $0.118/M input tokens $0.118/M output tokens",
|
||||
"endpoint": "https://openrouter.ai/api",
|
||||
"isKeyRequired": true,
|
||||
"aiModel": "qwen/qwen3-235b-a22b-thinking-2507"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Full package - medium (<= 32GB VRAM | HD: 7.1 GB) (requires OpenRouter API key)",
|
||||
"description": "Agent qwen 3 from OpenRouter, completions & chat - medium size models, embeddings (<= 32GB VRAM))",
|
||||
"completion": {
|
||||
"name": "Qwen2.5-Coder-3B-Q8_0-GGUF (<= 16GB VRAM)",
|
||||
"localStartCommand": "llama-server --fim-qwen-3b-default -ngl 99 --port 8012",
|
||||
"endpoint": "http://localhost:8012",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
"chat": {
|
||||
"name": "Qwen2.5-Coder-3B-Instruct-Q8_0-GGUF (<= 16GB VRAM)",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-3B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
|
||||
"endpoint": "http://127.0.0.1:8011"
|
||||
},
|
||||
"embeddings": {
|
||||
"name": "Nomic-Embed-Text-V2-GGUF",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Nomic-Embed-Text-V2-GGUF -ngl 99 -ub 2048 -b 2048 --ctx-size 2048 --embeddings --port 8010",
|
||||
"endpoint": "http://127.0.0.1:8010"
|
||||
},
|
||||
"tools": {
|
||||
"name": "Qwen: Qwen3 235B A22B Thinking 2507 - 262.144 context $0.118/M input tokens $0.118/M output tokens",
|
||||
"endpoint": "https://openrouter.ai/api",
|
||||
"isKeyRequired": true,
|
||||
"aiModel": "qwen/qwen3-235b-a22b-thinking-2507"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Full package - max (>32 GB VRAM | HD: 17 GB) (requires OpenRouter API key)",
|
||||
"description": "Agent - qwen 3 from OpenRouter (API key required), completions, chat (>32 GB VRAM) ",
|
||||
"completion": {
|
||||
"name": "Qwen2.5-Coder-7B-Q8_0-GGUF (> 16GB VRAM)",
|
||||
"localStartCommand": "llama-server --fim-qwen-7b-default -ngl 99 --port 8012",
|
||||
"endpoint": "http://localhost:8012",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
"chat": {
|
||||
"name": "Qwen2.5-Coder-7B-Instruct-Q8_0-GGUF (> 16GB VRAM)",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-7B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
|
||||
"endpoint": "http://127.0.0.1:8011"
|
||||
},
|
||||
"embeddings": {
|
||||
"name": "Nomic-Embed-Text-V2-GGUF",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Nomic-Embed-Text-V2-GGUF -ngl 99 -ub 2048 -b 2048 --ctx-size 2048 --embeddings --port 8010",
|
||||
"endpoint": "http://127.0.0.1:8010"
|
||||
},
|
||||
"tools": {
|
||||
"name": "Qwen: Qwen3 235B A22B Thinking 2507 - 262.144 context $0.118/M input tokens $0.118/M output tokens",
|
||||
"endpoint": "https://openrouter.ai/api",
|
||||
"isKeyRequired": true,
|
||||
"aiModel": "qwen/qwen3-235b-a22b-thinking-2507"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "OpenAI gpt-oss, 20B agent, chat - ( < 8GB VRAM | HD: 2.2 GB) (requires OpenRouter API key)",
|
||||
"description": "agent - Open AI gpt-oss 20GB from OpenRouter (requires API key), chat - small model (< 8GB VRAM)",
|
||||
"completion": {
|
||||
"name": "",
|
||||
"localStartCommand": ""
|
||||
},
|
||||
"chat": {
|
||||
"name": "Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF (<= 8GB VRAM)",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Qwen2.5-Coder-1.5B-Instruct-Q8_0-GGUF -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port 8011",
|
||||
"endpoint": "http://127.0.0.1:8011"
|
||||
},
|
||||
"embeddings": {
|
||||
"name": "Nomic-Embed-Text-V2-GGUF",
|
||||
"localStartCommand": "llama-server -hf ggml-org/Nomic-Embed-Text-V2-GGUF -ngl 99 -ub 2048 -b 2048 --ctx-size 2048 --embeddings --port 8010",
|
||||
"endpoint": "http://127.0.0.1:8010"
|
||||
},
|
||||
"tools": {
|
||||
"name": "openai/gpt-oss-20b",
|
||||
"localStartCommand": "",
|
||||
"endpoint": "https://openrouter.ai/api",
|
||||
"aiModel": "openai/gpt-oss-20b",
|
||||
"isKeyRequired": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Empty - no models",
|
||||
"description": "For cases when the settings (endpoint*, Launch_*, Api_key*, Ai_model) are used for configuring which servers to be used by llama-vscode instead of env.",
|
||||
"completion": {
|
||||
"name": "",
|
||||
"localStartCommand": "",
|
||||
"endpoint": "",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
"chat": {
|
||||
"name": "",
|
||||
"localStartCommand": "",
|
||||
"endpoint": "",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
"embeddings": {
|
||||
"name": "",
|
||||
"localStartCommand": "",
|
||||
"endpoint": "",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
},
|
||||
"tools": {
|
||||
"name": "",
|
||||
"localStartCommand": "",
|
||||
"endpoint": "",
|
||||
"aiModel": "",
|
||||
"isKeyRequired": false
|
||||
}
|
||||
}
|
||||
]],
|
||||
[PREDEFINED_LISTS_KEYS.AGENTS, [
|
||||
{
|
||||
"name": "llama-vscode help",
|
||||
"description": "This is an agent for helping how to use llama-vscode.",
|
||||
"systemInstruction": [
|
||||
"You are an agent for helping the user how to use llama-vscode.",
|
||||
"Use the available tools to get the help documentation for llama-vscode and answer the questions from the user.",
|
||||
"Base your answers on the help documentation from the tools."
|
||||
],
|
||||
"tools": [
|
||||
"llama_vscode_help"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "default",
|
||||
"description": "This is the default agent.",
|
||||
"systemInstruction": [
|
||||
"You are an agent for software development - please keep going until the user’s query is completely resolved, before ending your turn and yielding back to the user.",
|
||||
"Only terminate your turn when you are sure that the problem is solved.",
|
||||
"If you are not sure about anything pertaining to the user’s request, use your tools to read files and gather the relevant information: do NOT guess or make up an answer.",
|
||||
"You MUST plan extensively before each function call, and reflect extensively on the outcomes of the previous function calls. DO NOT do this entire process by making function calls only, as this can impair your ability to solve the problem and think insightfully.",
|
||||
"Read the file content or a section of the file before editing a the file.",
|
||||
"",
|
||||
"# Workflow",
|
||||
"",
|
||||
"## High-Level Problem Solving Strategy",
|
||||
"",
|
||||
"1. Understand the problem deeply. Carefully read the issue and think critically about what is required.",
|
||||
"2. Investigate the codebase. Explore relevant files, search for key functions, and gather context.",
|
||||
"3. Develop a clear, step-by-step plan. Break down the fix into manageable, incremental steps.",
|
||||
"4. Implement the fix incrementally. Make small, testable code changes.",
|
||||
"5. Debug as needed. Use debugging techniques to isolate and resolve issues.",
|
||||
"6. Iterate until the root cause is fixed.",
|
||||
"7. Reflect and validate comprehensively.",
|
||||
"",
|
||||
"Refer to the detailed sections below for more information on each step.",
|
||||
"",
|
||||
"## 1. Deeply Understand the Problem",
|
||||
"Carefully read the issue and think hard about a plan to solve it before coding.",
|
||||
"",
|
||||
"## 2. Codebase Investigation",
|
||||
"- Explore relevant files and directories.",
|
||||
"- Search for key functions, classes, or variables related to the issue.",
|
||||
"- Read and understand relevant code snippets.",
|
||||
"- Identify the root cause of the problem.",
|
||||
"- Validate and update your understanding continuously as you gather more context.",
|
||||
"",
|
||||
"## 3. Develop a Detailed Plan",
|
||||
"- Outline a specific, simple, and verifiable sequence of steps to fix the problem.",
|
||||
"- Break down the fix into small, incremental changes.",
|
||||
"",
|
||||
"## 4. Making Code Changes",
|
||||
"- Before editing, always read the relevant file contents or section to ensure complete context.",
|
||||
"- If a patch is not applied correctly, attempt to reapply it.",
|
||||
"- Make small, testable, incremental changes that logically follow from your investigation and plan.",
|
||||
"",
|
||||
"## 5. Debugging",
|
||||
"- Make code changes only if you have high confidence they can solve the problem",
|
||||
"- When debugging, try to determine the root cause rather than addressing symptoms",
|
||||
"- Debug for as long as needed to identify the root cause and identify a fix",
|
||||
"- Use print statements, logs, or temporary code to inspect program state, including descriptive statements or error messages to understand what's happening",
|
||||
"- To test hypotheses, you can also add test statements or functions",
|
||||
"- Revisit your assumptions if unexpected behavior occurs.",
|
||||
"",
|
||||
"",
|
||||
"## 6. Final Verification",
|
||||
"- Confirm the root cause is fixed.",
|
||||
"- Review your solution for logic correctness and robustness.",
|
||||
"- Iterate until you are extremely confident the fix is complete.",
|
||||
"",
|
||||
"## 7. Final Reflection",
|
||||
"- If there are changed files, build the application to check for errors.",
|
||||
"- Reflect carefully on the original intent of the user and the problem statement.",
|
||||
"- Think about potential edge cases or scenarios.",
|
||||
"- Continue refining until you are confident the fix is robust and comprehensive.",
|
||||
""
|
||||
],
|
||||
"tools": [
|
||||
"run_terminal_command",
|
||||
"search_source",
|
||||
"read_file",
|
||||
"list_directory",
|
||||
"regex_search",
|
||||
"delete_file",
|
||||
"get_diff",
|
||||
"edit_file",
|
||||
"ask_user"
|
||||
]
|
||||
}
|
||||
]],
|
||||
[PREDEFINED_LISTS_KEYS.AGENT_COMMANDS, [
|
||||
{
|
||||
"name": "about",
|
||||
"description": "Reviews the project and provides information about it.",
|
||||
"prompt": [
|
||||
"What is this project about?",
|
||||
"Provide an overview of the project - purpose, architecture, language, etc."
|
||||
],
|
||||
"context": [
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "explain",
|
||||
"description": "Explains the attached code/file.",
|
||||
"prompt": [
|
||||
"Explain the provided source code."
|
||||
],
|
||||
"context": [
|
||||
]
|
||||
}
|
||||
]],
|
||||
])
|
||||
|
|
@ -47,7 +47,6 @@ export class LlamaAgent {
|
|||
} else {
|
||||
const absolutePath = Utils.getAbsolutFilePath("llama-vscode-rules.md");
|
||||
if (fs.existsSync(absolutePath)) projectContext += " \n\nAdditional rules from the user: \n" + fs.readFileSync(absolutePath, "utf-8");
|
||||
else vscode.window.showErrorMessage(`File with the user defined rules not found: ${this.app.configuration.agent_rules}`);
|
||||
}
|
||||
this.messages = [
|
||||
{
|
||||
|
|
|
|||
|
|
@ -313,8 +313,8 @@ export class LlamaServer {
|
|||
chunks: any,
|
||||
nindent: number
|
||||
): Promise<LlamaChatResponse | undefined> => {
|
||||
let selectedModel: LlmModel = this.app.menu.getChatModel();
|
||||
let { endpoint, model, requestConfig } = this.getChatModelProperties(selectedModel);
|
||||
|
||||
let { endpoint, model, requestConfig } = this.getChatModelProperties();
|
||||
|
||||
const response = await axios.post<LlamaChatResponse>(
|
||||
`${Utils.trimTrailingSlash(endpoint)}/${this.app.configuration.ai_api_version}/chat/completions`,
|
||||
|
|
@ -328,8 +328,7 @@ export class LlamaServer {
|
|||
getChatCompletion = async (
|
||||
prompt: string,
|
||||
): Promise<LlamaChatResponse | undefined> => {
|
||||
let selectedModel: LlmModel = this.app.menu.getChatModel();
|
||||
let { endpoint, model, requestConfig } = this.getChatModelProperties(selectedModel);
|
||||
let { endpoint, model, requestConfig } = this.getChatModelProperties();
|
||||
|
||||
const response = await axios.post<LlamaChatResponse>(
|
||||
`${Utils.trimTrailingSlash(endpoint)}/${this.app.configuration.ai_api_version}/chat/completions`,
|
||||
|
|
@ -742,12 +741,15 @@ export class LlamaServer {
|
|||
}
|
||||
|
||||
|
||||
|
||||
private getChatModelProperties(selectedModel: LlmModel) {
|
||||
private getChatModelProperties() {
|
||||
let selectedModel: LlmModel = this.app.menu.getChatModel();
|
||||
if (!this.app.menu.isChatModelSelected()) selectedModel = this.app.menu.getToolsModel();
|
||||
|
||||
let model = this.app.configuration.ai_model;
|
||||
if (selectedModel?.aiModel !== undefined && selectedModel.aiModel) model = selectedModel.aiModel;
|
||||
|
||||
let endpoint = this.app.configuration.endpoint_chat;
|
||||
if (!endpoint) endpoint = this.app.configuration.endpoint_tools;
|
||||
if (selectedModel?.endpoint !== undefined && selectedModel.endpoint) endpoint = selectedModel.endpoint;
|
||||
|
||||
let requestConfig = this.app.configuration.axiosRequestConfigChat;
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import { LlmModel, Env, Agent, ContextCustom } from './types';
|
|||
import { Configuration } from './configuration';
|
||||
import { Plugin } from './plugin';
|
||||
import { Utils } from './utils';
|
||||
import { ModelType, SETTING_NAME_FOR_LIST } from './constants';
|
||||
|
||||
export class LlamaWebviewProvider implements vscode.WebviewViewProvider {
|
||||
public static readonly viewType = 'llama-vscode.webview';
|
||||
|
|
@ -69,47 +70,43 @@ export class LlamaWebviewProvider implements vscode.WebviewViewProvider {
|
|||
this.app.llamaAgent.stopAgent();
|
||||
break;
|
||||
case 'selectModelWithTools':
|
||||
let toolsTypeDetails = this.app.menu.getToolsTypeDetails();
|
||||
await this.app.menu.selectStartModel(toolsTypeDetails);
|
||||
await this.app.menu.selectAndSetModel(ModelType.Tools, this.app.configuration.tools_models_list);
|
||||
break;
|
||||
case 'selectModelForChat':
|
||||
let chatTypeDetails = this.app.menu.getChatTypeDetails();
|
||||
await this.app.menu.selectStartModel(chatTypeDetails);
|
||||
await this.app.menu.selectAndSetModel(ModelType.Chat, this.app.configuration.chat_models_list);
|
||||
break;
|
||||
case 'selectModelForEmbeddings':
|
||||
let embsTypeDetails = this.app.menu.getEmbsTypeDetails()
|
||||
await this.app.menu.selectStartModel(embsTypeDetails);
|
||||
await this.app.menu.selectAndSetModel(ModelType.Embeddings, this.app.configuration.embeddings_models_list);
|
||||
break;
|
||||
case 'selectModelForCompletion':
|
||||
let complTypeDetails = this.app.menu.getComplTypeDetails()
|
||||
await this.app.menu.selectStartModel(complTypeDetails);
|
||||
await this.app.menu.selectAndSetModel(ModelType.Completion, this.app.configuration.completion_models_list);
|
||||
break;
|
||||
case 'deselectCompletionModel':
|
||||
await this.app.menu.deselectStopModel(this.app.menu.getComplTypeDetails())
|
||||
await this.app.menu.deselectAndClearModel(ModelType.Completion);
|
||||
break;
|
||||
case 'deselectChatModel':
|
||||
await this.app.menu.deselectStopModel(this.app.menu.getChatTypeDetails())
|
||||
await this.app.menu.deselectAndClearModel(ModelType.Chat);
|
||||
break;
|
||||
case 'deselectEmbsModel':
|
||||
await this.app.menu.deselectStopModel(this.app.menu.getEmbsTypeDetails())
|
||||
await this.app.menu.deselectAndClearModel(ModelType.Embeddings);
|
||||
break;
|
||||
case 'deselectToolsModel':
|
||||
await this.app.menu.deselectStopModel(this.app.menu.getToolsTypeDetails())
|
||||
await this.app.menu.deselectAndClearModel(ModelType.Tools);
|
||||
break;
|
||||
case 'deselectAgent':
|
||||
await this.app.menu.deselectAgent();
|
||||
break;
|
||||
case 'showCompletionModel':
|
||||
this.app.menu.showModelDetails(this.app.menu.getComplModel())
|
||||
this.app.modelService.showModelDetails(this.app.menu.getComplModel());
|
||||
break;
|
||||
case 'showChatModel':
|
||||
this.app.menu.showModelDetails(this.app.menu.getChatModel())
|
||||
this.app.modelService.showModelDetails(this.app.menu.getChatModel());
|
||||
break;
|
||||
case 'showEmbsModel':
|
||||
this.app.menu.showModelDetails(this.app.menu.getEmbeddingsModel())
|
||||
this.app.modelService.showModelDetails(this.app.menu.getEmbeddingsModel());
|
||||
break;
|
||||
case 'showToolsModel':
|
||||
this.app.menu.showModelDetails(this.app.menu.getToolsModel())
|
||||
this.app.modelService.showModelDetails(this.app.menu.getToolsModel());
|
||||
break;
|
||||
case 'showAgentDetails':
|
||||
this.app.menu.showAgentDetails(this.app.menu.getAgent())
|
||||
|
|
@ -126,8 +123,7 @@ export class LlamaWebviewProvider implements vscode.WebviewViewProvider {
|
|||
this.app.menu.installLlamacpp();
|
||||
break;
|
||||
case 'addHuggingfaceModel':
|
||||
let chatTypeDetailsHf = this.app.menu.getChatTypeDetails();
|
||||
await this.app.menu.addHuggingfaceModelToList(chatTypeDetailsHf);
|
||||
await this.app.modelService.addModel(ModelType.Chat, "hf");
|
||||
break;
|
||||
case 'selectEnv':
|
||||
await this.app.menu.selectEnvFromList(this.app.configuration.envs_list);
|
||||
|
|
@ -190,7 +186,7 @@ export class LlamaWebviewProvider implements vscode.WebviewViewProvider {
|
|||
await vscode.window.showTextDocument(document);
|
||||
break;
|
||||
case 'addEnv':
|
||||
this.app.menu.addEnvToList(this.app.configuration.envs_list, "envs_list")
|
||||
this.app.menu.addEnvToList(this.app.configuration.envs_list, SETTING_NAME_FOR_LIST.ENVS)
|
||||
break;
|
||||
case 'toggleCompletionsEnabled':
|
||||
this.app.configuration.updateConfigValue("enabled", message.enabled)
|
||||
|
|
|
|||
1289
src/menu.ts
1289
src/menu.ts
File diff suppressed because it is too large
Load diff
131
src/services/external-model-strategy.ts
Normal file
131
src/services/external-model-strategy.ts
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
import * as vscode from "vscode";
|
||||
import { QuickPickItem } from "vscode";
|
||||
import { Application } from "../application";
|
||||
import { IAddStrategy, LlmModel, ModelTypeDetails } from "../types";
|
||||
import { Utils } from "../utils";
|
||||
import * as axios from "axios";
|
||||
import { ModelType, UI_TEXT_KEYS, HF_MODEL_TEMPLATES, SETTING_TO_MODEL_TYPE, MODEL_TYPE_CONFIG } from "../constants";
|
||||
import * as path from "path";
|
||||
import * as fs from "fs";
|
||||
import { Configuration } from "../configuration";
|
||||
|
||||
|
||||
export class ExternalModelStrategy implements IAddStrategy {
|
||||
private app: Application;
|
||||
|
||||
constructor(app: Application) {
|
||||
this.app = app;
|
||||
}
|
||||
|
||||
async add(details: ModelTypeDetails): Promise<void> {
|
||||
const hostEndpoint = "http://" + details.newModelHost;
|
||||
let name = await Utils.getValidatedInput(
|
||||
'name for your model (required)',
|
||||
(input) => input.trim() !== '',
|
||||
5,
|
||||
{
|
||||
placeHolder: 'Enter a user friendly name for your model (required)',
|
||||
value: ''
|
||||
}
|
||||
);
|
||||
if (name === undefined) {
|
||||
vscode.window.showInformationMessage("Model addition cancelled.");
|
||||
return;
|
||||
}
|
||||
name = this.sanitizeInput(name);
|
||||
|
||||
let endpoint = await Utils.getValidatedInput(
|
||||
'Endpoint for your model (required)',
|
||||
(input) => input.trim() !== '',
|
||||
5,
|
||||
{
|
||||
placeHolder: 'Endpoint for accessing your model, i.e. ' + hostEndpoint + ':' + details.newModelPort + ' or https://openrouter.ai/api (required)',
|
||||
value: ''
|
||||
}
|
||||
);
|
||||
if (endpoint === undefined) {
|
||||
vscode.window.showInformationMessage("Model addition cancelled.");
|
||||
return;
|
||||
}
|
||||
endpoint = this.sanitizeInput(endpoint);
|
||||
let aiModel = await vscode.window.showInputBox({
|
||||
placeHolder: 'Model name, exactly as expected by the provider, i.e. kimi-latest ',
|
||||
prompt: 'Enter model name as expected by the provider (leave empty if llama-server is used)',
|
||||
value: ''
|
||||
});
|
||||
aiModel = this.sanitizeInput(aiModel || '');
|
||||
const isKeyRequired = await Utils.confirmAction(`Is API key required for this endpoint (${endpoint})?`, "");
|
||||
let newModel: LlmModel = {
|
||||
name: name,
|
||||
localStartCommand: "",
|
||||
endpoint: endpoint,
|
||||
aiModel: aiModel,
|
||||
isKeyRequired: isKeyRequired
|
||||
};
|
||||
|
||||
const shouldAddModel = await Utils.confirmAction("You have entered:",
|
||||
"\nname: " + name +
|
||||
"\nlocal start command: " +
|
||||
"\nendpoint: " + endpoint +
|
||||
"\nmodel name for provider: " + aiModel +
|
||||
"\napi key required: " + isKeyRequired +
|
||||
"\nDo you want to add a model with these properties?"
|
||||
);
|
||||
|
||||
if (shouldAddModel) {
|
||||
let shouldOverwrite = false;
|
||||
[newModel.name, shouldOverwrite] = await this.getUniqueModelName(details.modelsList, newModel);
|
||||
if (!newModel.name) {
|
||||
vscode.window.showInformationMessage("The model was not added as the name was not provided.")
|
||||
return;
|
||||
}
|
||||
if (shouldOverwrite) {
|
||||
const index = details.modelsList.findIndex(model => model.name === newModel.name);
|
||||
if (index !== -1) {
|
||||
details.modelsList.splice(index, 1);
|
||||
}
|
||||
}
|
||||
details.modelsList.push(newModel);
|
||||
this.app.configuration.updateConfigValue(details.modelsListSettingName, details.modelsList);
|
||||
vscode.window.showInformationMessage("The model is added.")
|
||||
}
|
||||
}
|
||||
|
||||
private sanitizeInput(input: string): string {
|
||||
return input ? input.trim() : '';
|
||||
}
|
||||
|
||||
private async getUniqueModelName(modelsList: LlmModel[], newModel: LlmModel): Promise<[string, boolean]> {
|
||||
let uniqueName = newModel.name;
|
||||
let shouldOverwrite = false;
|
||||
let modelSameName = modelsList.find(model => model.name === uniqueName);
|
||||
while (uniqueName && !shouldOverwrite && modelSameName !== undefined) {
|
||||
shouldOverwrite = await Utils.confirmAction("A model with the same name already exists. Do you want to overwrite the existing model?",
|
||||
"Existing model:\n" +
|
||||
this.getModelDetailsAsString(modelSameName) +
|
||||
"\n\nNew model:\n" +
|
||||
this.getModelDetailsAsString(newModel)
|
||||
);
|
||||
if (!shouldOverwrite) {
|
||||
uniqueName = (await vscode.window.showInputBox({
|
||||
placeHolder: 'a unique name for your new model',
|
||||
prompt: 'Enter a unique name for your new model. Leave empty to cancel entering.',
|
||||
value: newModel.name
|
||||
})) ?? "";
|
||||
uniqueName = this.sanitizeInput(uniqueName);
|
||||
if (uniqueName) modelSameName = modelsList.find(model => model.name === uniqueName);
|
||||
}
|
||||
}
|
||||
|
||||
return [uniqueName, shouldOverwrite]
|
||||
}
|
||||
|
||||
private getModelDetailsAsString(model: LlmModel): string {
|
||||
return "model: " +
|
||||
"\nname: " + model.name +
|
||||
"\nlocal start command: " + model.localStartCommand +
|
||||
"\nendpoint: " + model.endpoint +
|
||||
"\nmodel name for provider: " + model.aiModel +
|
||||
"\napi key required: " + model.isKeyRequired
|
||||
}
|
||||
}
|
||||
264
src/services/hf-model-strategy.ts
Normal file
264
src/services/hf-model-strategy.ts
Normal file
|
|
@ -0,0 +1,264 @@
|
|||
import * as vscode from "vscode";
|
||||
import { QuickPickItem } from "vscode";
|
||||
import { Application } from "../application";
|
||||
import { IAddStrategy, LlmModel, ModelTypeDetails } from "../types";
|
||||
import { Utils } from "../utils";
|
||||
import * as axios from "axios";
|
||||
import { ModelType, UI_TEXT_KEYS, HF_MODEL_TEMPLATES, SETTING_TO_MODEL_TYPE, MODEL_TYPE_CONFIG } from "../constants";
|
||||
|
||||
interface HuggingfaceModel {
|
||||
modelId: string;
|
||||
createdAt: string;
|
||||
downloads: number;
|
||||
likes: number;
|
||||
pipeline_tag: string;
|
||||
tags: string[];
|
||||
private: boolean;
|
||||
}
|
||||
|
||||
interface HuggingfaceFile {
|
||||
type: string;
|
||||
path: string;
|
||||
size: number;
|
||||
}
|
||||
|
||||
export class HfModelStrategy implements IAddStrategy {
|
||||
private app: Application;
|
||||
|
||||
constructor(app: Application) {
|
||||
this.app = app;
|
||||
}
|
||||
|
||||
add = async (details: ModelTypeDetails): Promise<void> => {
|
||||
const modelType = SETTING_TO_MODEL_TYPE[details.modelsListSettingName];
|
||||
const template = HF_MODEL_TEMPLATES[modelType]
|
||||
.replace('MODEL_PLACEHOLDER', '<model_name>')
|
||||
.replace('PORT_PLACEHOLDER', details.newModelPort.toString())
|
||||
.replace('HOST_PLACEHOLDER', details.newModelHost);
|
||||
const hostEndpoint = "http://" + details.newModelHost;
|
||||
|
||||
let searchWords = await vscode.window.showInputBox({
|
||||
placeHolder: 'keywords for searching a model from huggingface',
|
||||
prompt: 'Enter keywords to search for models in huggingface',
|
||||
value: ""
|
||||
});
|
||||
searchWords = this.sanitizeInput(searchWords || '');
|
||||
|
||||
if (!searchWords) {
|
||||
vscode.window.showInformationMessage("No huggingface model selected.")
|
||||
return;
|
||||
}
|
||||
let hfModelName = await this.getDownloadModelName(searchWords);
|
||||
if (hfModelName == "") return;
|
||||
let localStartCommand = template.replace('<model_name>', hfModelName);
|
||||
localStartCommand = this.app.modelService.sanitizeCommand(localStartCommand);
|
||||
|
||||
let endpoint = hostEndpoint + ":" + details.newModelPort;
|
||||
endpoint = this.sanitizeInput(endpoint);
|
||||
const aiModel = ""
|
||||
const isKeyRequired = false;
|
||||
let name = "hf: " + hfModelName;
|
||||
name = this.sanitizeInput(name);
|
||||
let newHfModel: LlmModel = {
|
||||
name: name,
|
||||
localStartCommand: localStartCommand,
|
||||
endpoint: endpoint,
|
||||
aiModel: aiModel,
|
||||
isKeyRequired: isKeyRequired
|
||||
};
|
||||
|
||||
const shouldAddModel = await Utils.confirmAction("You have entered:",
|
||||
this.getModelDetailsAsString(newHfModel) +
|
||||
"\nDo you want to add a model with these properties?"
|
||||
);
|
||||
|
||||
if (shouldAddModel) {
|
||||
let shouldOverwrite = false;
|
||||
[newHfModel.name, shouldOverwrite] = await this.getUniqueModelName(details.modelsList, newHfModel);
|
||||
if (!newHfModel.name) {
|
||||
vscode.window.showInformationMessage("The model was not added as the name was not provided.")
|
||||
return;
|
||||
}
|
||||
if (shouldOverwrite) {
|
||||
const index = details.modelsList.findIndex(model => model.name === newHfModel.name);
|
||||
if (index !== -1) {
|
||||
details.modelsList.splice(index, 1);
|
||||
}
|
||||
}
|
||||
details.modelsList.push(newHfModel);
|
||||
this.app.configuration.updateConfigValue(details.modelsListSettingName, details.modelsList);
|
||||
vscode.window.showInformationMessage("The model is added: " + newHfModel.name)
|
||||
const shouldSelect = await Utils.confirmAction("Do you want to select/start the newly added model?", "");
|
||||
if (shouldSelect) {
|
||||
await this.app.modelService.selectStartModel(newHfModel, modelType, details);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async getDownloadModelName(searchWords: string): Promise<string> {
|
||||
searchWords = this.sanitizeInput(searchWords);
|
||||
const foundModels = await this.getHfModels(searchWords ?? "");
|
||||
let hfModelName = "";
|
||||
if (foundModels && foundModels.length > 0) {
|
||||
const hfModelsQp: QuickPickItem[] = [];
|
||||
for (let hfModel of foundModels) {
|
||||
if (!hfModel.private) {
|
||||
hfModelsQp.push({
|
||||
label: hfModel.modelId,
|
||||
description: "created: " + hfModel.createdAt + " | downloads: " + hfModel.downloads + " | likes: " + hfModel.likes +
|
||||
" | pipeline: " + hfModel.pipeline_tag + " | tags: " + hfModel.tags
|
||||
});
|
||||
}
|
||||
}
|
||||
const selModel = await vscode.window.showQuickPick(hfModelsQp);
|
||||
if (selModel && selModel.label) {
|
||||
let modelFiles = await this.getHfModelFiles(selModel.label);
|
||||
if (modelFiles && modelFiles.length > 0) {
|
||||
const hfModelsFilesQp: QuickPickItem[] = await this.getFilesOfModel(selModel, modelFiles);
|
||||
if (hfModelsFilesQp.length <= 0) {
|
||||
vscode.window.showInformationMessage("No files found for model " + selModel.label + " or the files are with unexpected naming conventions.");
|
||||
return "";
|
||||
} else {
|
||||
let selFile = await vscode.window.showQuickPick(hfModelsFilesQp);
|
||||
if (!selFile) {
|
||||
vscode.window.showInformationMessage("No files selected for model " + selModel.label + ".");
|
||||
return "";
|
||||
}
|
||||
if (hfModelsFilesQp.length == 1) hfModelName = selModel.label ?? "";
|
||||
else hfModelName = selFile?.label ?? "";
|
||||
}
|
||||
} else {
|
||||
vscode.window.showInformationMessage("No files found for model " + selModel.label);
|
||||
return "";
|
||||
}
|
||||
} else {
|
||||
vscode.window.showInformationMessage("No huggingface model selected.");
|
||||
return '';
|
||||
}
|
||||
} else {
|
||||
vscode.window.showInformationMessage("No model selected.");
|
||||
return "";
|
||||
}
|
||||
hfModelName = this.sanitizeInput(hfModelName);
|
||||
return hfModelName;
|
||||
}
|
||||
|
||||
private async getFilesOfModel(selModel: vscode.QuickPickItem, modelFiles: HuggingfaceFile[]): Promise<QuickPickItem[]> {
|
||||
const hfModelsFilesQp: QuickPickItem[] = [];
|
||||
const ggufSuffix = ".gguf";
|
||||
let cleanModelName = selModel.label.split("/")[1].replace(/-gguf/gi, "");
|
||||
let arePartsOfOneFile = true;
|
||||
let multiplePartsSize = 0;
|
||||
let multiplePartsCount = 0;
|
||||
for (let file of modelFiles) {
|
||||
if (file.type == "file"
|
||||
&& file.path.toLowerCase().endsWith(ggufSuffix)
|
||||
&& file.path.toLowerCase().startsWith(cleanModelName.toLowerCase())) {
|
||||
let quantization = file.path.slice(cleanModelName.length + 1, -ggufSuffix.length);
|
||||
if (arePartsOfOneFile && !this.isOneOfMany(quantization.slice(-14))) arePartsOfOneFile = false;
|
||||
if (!arePartsOfOneFile) {
|
||||
hfModelsFilesQp.push({
|
||||
label: selModel.label + (quantization ? ":" + quantization : ""),
|
||||
description: "size: " + (Math.round((file.size / 1000000000) * 100) / 100) + "GB"
|
||||
});
|
||||
} else {
|
||||
multiplePartsSize += file.size;
|
||||
multiplePartsCount++;
|
||||
}
|
||||
}
|
||||
if (file.type == "directory") {
|
||||
let subfolderFiles = await this.getHfModelSubforlderFiles(selModel.label, file.path);
|
||||
let totalSize = 0;
|
||||
let totalFiles = 0;
|
||||
for (let file of subfolderFiles) {
|
||||
if (file.path.toLowerCase().endsWith(ggufSuffix)) {
|
||||
totalSize += file.size;
|
||||
totalFiles++;
|
||||
}
|
||||
}
|
||||
hfModelsFilesQp.push({
|
||||
label: selModel.label + ":" + file.path,
|
||||
description: "size: " + (Math.round((totalSize / 1000000000) * 100) / 100) + " GB | files: " + totalFiles
|
||||
});
|
||||
}
|
||||
}
|
||||
if (arePartsOfOneFile) {
|
||||
hfModelsFilesQp.push({
|
||||
label: selModel.label,
|
||||
description: "size: " + (Math.round((multiplePartsSize / 1073741824) * 100) / 100) + " GB | files: " + multiplePartsCount
|
||||
});
|
||||
}
|
||||
return hfModelsFilesQp;
|
||||
}
|
||||
|
||||
private isOneOfMany(input: string): boolean {
|
||||
const regex = /^\d{5}-of-\d{5}$/;
|
||||
return regex.test(input);
|
||||
}
|
||||
|
||||
private async getHfModels(searchWords: string): Promise<HuggingfaceModel[]> {
|
||||
let hfEndpoint = "https://huggingface.co/api/models?limit=1500&search=" + "GGUF+" + searchWords.replace(" ", "+");
|
||||
let result = await axios.default.get(
|
||||
`${Utils.trimTrailingSlash(hfEndpoint)}`
|
||||
);
|
||||
|
||||
if (result && result.data) return result.data as HuggingfaceModel[]
|
||||
else return [];
|
||||
}
|
||||
|
||||
private async getHfModelFiles(modelId: string): Promise<HuggingfaceFile[]> {
|
||||
let hfEndpoint = "https://huggingface.co/api/models/" + modelId + "/tree/main";
|
||||
let result = await axios.default.get(
|
||||
`${Utils.trimTrailingSlash(hfEndpoint)}`
|
||||
);
|
||||
if (result && result.data) return result.data as HuggingfaceFile[]
|
||||
else return [];
|
||||
}
|
||||
|
||||
private async getHfModelSubforlderFiles(modelId: string, subfolder: string): Promise<HuggingfaceFile[]> {
|
||||
let hfEndpoint = "https://huggingface.co/api/models/" + modelId + "/tree/main/" + subfolder;
|
||||
let result = await axios.default.get(
|
||||
`${Utils.trimTrailingSlash(hfEndpoint)}`
|
||||
);
|
||||
if (result && result.data) return result.data as HuggingfaceFile[]
|
||||
else return [];
|
||||
}
|
||||
|
||||
private sanitizeInput(input: string): string {
|
||||
return input ? input.trim() : '';
|
||||
}
|
||||
|
||||
private async getUniqueModelName(modelsList: LlmModel[], newModel: LlmModel): Promise<[string, boolean]> {
|
||||
let uniqueName = newModel.name;
|
||||
let shouldOverwrite = false;
|
||||
let modelSameName = modelsList.find(model => model.name === uniqueName);
|
||||
while (uniqueName && !shouldOverwrite && modelSameName !== undefined) {
|
||||
shouldOverwrite = await Utils.confirmAction("A model with the same name already exists. Do you want to overwrite the existing model?",
|
||||
"Existing model:\n" +
|
||||
this.getModelDetailsAsString(modelSameName) +
|
||||
"\n\nNew model:\n" +
|
||||
this.getModelDetailsAsString(newModel)
|
||||
);
|
||||
if (!shouldOverwrite) {
|
||||
uniqueName = (await vscode.window.showInputBox({
|
||||
placeHolder: 'a unique name for your new model',
|
||||
prompt: 'Enter a unique name for your new model. Leave empty to cancel entering.',
|
||||
value: newModel.name
|
||||
})) ?? "";
|
||||
uniqueName = this.sanitizeInput(uniqueName);
|
||||
if (uniqueName) modelSameName = modelsList.find(model => model.name === uniqueName);
|
||||
}
|
||||
}
|
||||
|
||||
return [uniqueName, shouldOverwrite]
|
||||
}
|
||||
|
||||
private getModelDetailsAsString(model: LlmModel): string {
|
||||
return "model: " +
|
||||
"\nname: " + model.name +
|
||||
"\nlocal start command: " + model.localStartCommand +
|
||||
"\nendpoint: " + model.endpoint +
|
||||
"\nmodel name for provider: " + model.aiModel +
|
||||
"\napi key required: " + model.isKeyRequired
|
||||
}
|
||||
}
|
||||
145
src/services/local-model-strategy.ts
Normal file
145
src/services/local-model-strategy.ts
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
|
||||
import * as vscode from "vscode";
|
||||
import { QuickPickItem } from "vscode";
|
||||
import { Application } from "../application";
|
||||
import { IAddStrategy, LlmModel, ModelTypeDetails } from "../types";
|
||||
import { Utils } from "../utils";
|
||||
import * as axios from "axios";
|
||||
import { ModelType, UI_TEXT_KEYS, HF_MODEL_TEMPLATES, SETTING_TO_MODEL_TYPE, MODEL_TYPE_CONFIG } from "../constants";
|
||||
|
||||
|
||||
export class LocalModelStrategy implements IAddStrategy {
|
||||
private app: Application;
|
||||
|
||||
constructor(app: Application) {
|
||||
this.app = app;
|
||||
}
|
||||
|
||||
async add(details: ModelTypeDetails): Promise<void> {
|
||||
const hostEndpoint = "http://" + details.newModelHost;
|
||||
const modelListToLocalCommand = new Map([
|
||||
["completion_models_list", "llama-server -hf <model name from hugging face, i.e: ggml-org/Qwen2.5-Coder-1.5B-Q8_0-GGUF> -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 --port " + details.newModelPort + " --host " + details.newModelHost],
|
||||
["chat_models_list", 'llama-server -hf <model name from hugging face, i.e: ggml-org/Qwen2.5-Coder-7B-Instruct-Q8_0-GGUF> -ngl 99 -ub 1024 -b 1024 --ctx-size 0 --cache-reuse 256 -np 2 --port ' + details.newModelPort + " --host " + details.newModelHost],
|
||||
["embeddings_models_list", "llama-server -hf <model name from hugging face, i.e: ggml-org/Nomic-Embed-Text-V2-GGUF> -ngl 99 -ub 2048 -b 2048 --ctx-size 2048 --embeddings --port " + details.newModelPort + " --host " + details.newModelHost],
|
||||
["tools_models_list", "llama-server -hf <model name from hugging face, i.e: unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF:Q8_0> --jinja -ngl 99 -c 0 -ub 1024 -b 1024 --cache-reuse 256 --port " + details.newModelPort + " --host " + details.newModelHost]
|
||||
]);
|
||||
|
||||
let name = await Utils.getValidatedInput(
|
||||
'name for your model (required)',
|
||||
(input) => input.trim() !== '',
|
||||
5,
|
||||
{
|
||||
placeHolder: 'Enter a user friendly name for your model (required)',
|
||||
value: ''
|
||||
}
|
||||
);
|
||||
if (name === undefined) {
|
||||
vscode.window.showInformationMessage("Model addition cancelled.");
|
||||
return;
|
||||
}
|
||||
name = this.sanitizeInput(name);
|
||||
|
||||
let localStartCommand = await Utils.getValidatedInput(
|
||||
'Enter a command to start the model locally',
|
||||
(input) => input.trim() !== '',
|
||||
5,
|
||||
{
|
||||
placeHolder: 'A command to start the model locally, i.e. llama-server -m model_name.gguf --port '+ details.newModelPort + '. (required for local model)',
|
||||
value: modelListToLocalCommand.get(details.modelsListSettingName) || ''
|
||||
}
|
||||
);
|
||||
if (localStartCommand === undefined) {
|
||||
vscode.window.showInformationMessage("Model addition cancelled.");
|
||||
return;
|
||||
}
|
||||
localStartCommand = this.app.modelService.sanitizeCommand(localStartCommand);
|
||||
|
||||
let endpoint = await Utils.getValidatedInput(
|
||||
'Endpoint for accessing your model',
|
||||
(input) => input.trim() !== '',
|
||||
5,
|
||||
{
|
||||
placeHolder: 'Endpoint for accessing your model, i.e. ' + hostEndpoint + ':' + details.newModelPort + ' (required)',
|
||||
value: hostEndpoint + ':' + details.newModelPort
|
||||
}
|
||||
);
|
||||
if (endpoint === undefined) {
|
||||
vscode.window.showInformationMessage("Model addition cancelled.");
|
||||
return;
|
||||
}
|
||||
endpoint = this.sanitizeInput(endpoint);
|
||||
const isKeyRequired = await Utils.confirmAction(`Is API key required for this endpoint (${endpoint})?`, "");
|
||||
let newModel: LlmModel = {
|
||||
name: name,
|
||||
localStartCommand: localStartCommand,
|
||||
endpoint: endpoint,
|
||||
aiModel: "",
|
||||
isKeyRequired: isKeyRequired
|
||||
};
|
||||
|
||||
const shouldAddModel = await Utils.confirmAction("You have entered:",
|
||||
"name: " + name +
|
||||
"\nlocal start command: " + localStartCommand +
|
||||
"\nendpoint: " + endpoint +
|
||||
"\nmodel name for provider: " +
|
||||
"\napi key required: " + isKeyRequired +
|
||||
"\nDo you want to add a model with these properties?"
|
||||
);
|
||||
|
||||
if (shouldAddModel) {
|
||||
let shouldOverwrite = false;
|
||||
[newModel.name, shouldOverwrite] = await this.getUniqueModelName(details.modelsList, newModel);
|
||||
if (!newModel.name) {
|
||||
vscode.window.showInformationMessage("The model was not added as the name was not provided.")
|
||||
return;
|
||||
}
|
||||
if (shouldOverwrite) {
|
||||
const index = details.modelsList.findIndex(model => model.name === newModel.name);
|
||||
if (index !== -1) {
|
||||
details.modelsList.splice(index, 1);
|
||||
}
|
||||
}
|
||||
details.modelsList.push(newModel);
|
||||
this.app.configuration.updateConfigValue(details.modelsListSettingName, details.modelsList);
|
||||
vscode.window.showInformationMessage("The model is added.")
|
||||
}
|
||||
}
|
||||
|
||||
private sanitizeInput(input: string): string {
|
||||
return input ? input.trim() : '';
|
||||
}
|
||||
|
||||
private async getUniqueModelName(modelsList: LlmModel[], newModel: LlmModel): Promise<[string, boolean]> {
|
||||
let uniqueName = newModel.name;
|
||||
let shouldOverwrite = false;
|
||||
let modelSameName = modelsList.find(model => model.name === uniqueName);
|
||||
while (uniqueName && !shouldOverwrite && modelSameName !== undefined) {
|
||||
shouldOverwrite = await Utils.confirmAction("A model with the same name already exists. Do you want to overwrite the existing model?",
|
||||
"Existing model:\n" +
|
||||
this.getModelDetailsAsString(modelSameName) +
|
||||
"\n\nNew model:\n" +
|
||||
this.getModelDetailsAsString(newModel)
|
||||
);
|
||||
if (!shouldOverwrite) {
|
||||
uniqueName = (await vscode.window.showInputBox({
|
||||
placeHolder: 'a unique name for your new model',
|
||||
prompt: 'Enter a unique name for your new model. Leave empty to cancel entering.',
|
||||
value: newModel.name
|
||||
})) ?? "";
|
||||
uniqueName = this.sanitizeInput(uniqueName);
|
||||
if (uniqueName) modelSameName = modelsList.find(model => model.name === uniqueName);
|
||||
}
|
||||
}
|
||||
|
||||
return [uniqueName, shouldOverwrite]
|
||||
}
|
||||
|
||||
private getModelDetailsAsString(model: LlmModel): string {
|
||||
return "model: " +
|
||||
"\nname: " + model.name +
|
||||
"\nlocal start command: " + model.localStartCommand +
|
||||
"\nendpoint: " + model.endpoint +
|
||||
"\nmodel name for provider: " + model.aiModel +
|
||||
"\napi key required: " + model.isKeyRequired
|
||||
}
|
||||
}
|
||||
391
src/services/model-service.ts
Normal file
391
src/services/model-service.ts
Normal file
|
|
@ -0,0 +1,391 @@
|
|||
import * as vscode from "vscode";
|
||||
import { QuickPickItem } from "vscode";
|
||||
import { Application } from "../application";
|
||||
import { IAddStrategy, LlmModel, ModelTypeDetails } from "../types";
|
||||
import { Utils } from "../utils";
|
||||
import * as axios from "axios";
|
||||
import { ModelType, UI_TEXT_KEYS, HF_MODEL_TEMPLATES, SETTING_TO_MODEL_TYPE, MODEL_TYPE_CONFIG } from "../constants";
|
||||
import * as path from "path";
|
||||
import * as fs from "fs";
|
||||
import { Configuration } from "../configuration";
|
||||
import { PREDEFINED_LISTS } from "../lists";
|
||||
|
||||
export class ModelService {
|
||||
private app: Application;
|
||||
private strategies: Record<string, IAddStrategy>;
|
||||
|
||||
constructor(app: Application) {
|
||||
this.app = app;
|
||||
this.strategies = {
|
||||
local: this.app.localModelStrategy,
|
||||
external: this.app.externalModelStrategy,
|
||||
hf: this.app.hfModelStrategy
|
||||
};
|
||||
}
|
||||
|
||||
getActions(type: ModelType): vscode.QuickPickItem[] {
|
||||
const keys = {
|
||||
[ModelType.Completion]: [
|
||||
UI_TEXT_KEYS.selectStartCompletionModel,
|
||||
UI_TEXT_KEYS.deselectStopCompletionModel,
|
||||
UI_TEXT_KEYS.addLocalCompletionModel,
|
||||
UI_TEXT_KEYS.addExternalCompletionModel,
|
||||
UI_TEXT_KEYS.addCompletionModelFromHuggingface,
|
||||
UI_TEXT_KEYS.viewCompletionModelDetails,
|
||||
UI_TEXT_KEYS.deleteCompletionModel,
|
||||
UI_TEXT_KEYS.exportCompletionModel,
|
||||
UI_TEXT_KEYS.importCompletionModel,
|
||||
],
|
||||
[ModelType.Chat]: [
|
||||
UI_TEXT_KEYS.selectStartChatModel,
|
||||
UI_TEXT_KEYS.deselectStopChatModel,
|
||||
UI_TEXT_KEYS.addLocalChatModel,
|
||||
UI_TEXT_KEYS.addExternalChatModel,
|
||||
UI_TEXT_KEYS.addChatModelFromHuggingface,
|
||||
UI_TEXT_KEYS.viewChatModelDetails,
|
||||
UI_TEXT_KEYS.deleteChatModel,
|
||||
UI_TEXT_KEYS.exportChatModel,
|
||||
UI_TEXT_KEYS.importChatModel,
|
||||
],
|
||||
[ModelType.Embeddings]: [
|
||||
UI_TEXT_KEYS.selectStartEmbeddingsModel,
|
||||
UI_TEXT_KEYS.deselectStopEmbeddingsModel,
|
||||
UI_TEXT_KEYS.addLocalEmbeddingsModel,
|
||||
UI_TEXT_KEYS.addExternalEmbeddingsModel,
|
||||
UI_TEXT_KEYS.addEmbeddingsModelFromHuggingface,
|
||||
UI_TEXT_KEYS.viewEmbeddingsModelDetails,
|
||||
UI_TEXT_KEYS.deleteEmbeddingsModel,
|
||||
UI_TEXT_KEYS.exportEmbeddingsModel,
|
||||
UI_TEXT_KEYS.importEmbeddingsModel,
|
||||
],
|
||||
[ModelType.Tools]: [
|
||||
UI_TEXT_KEYS.selectStartToolsModel,
|
||||
UI_TEXT_KEYS.deselectStopToolsModel,
|
||||
UI_TEXT_KEYS.addLocalToolsModel,
|
||||
UI_TEXT_KEYS.addExternalToolsModel,
|
||||
UI_TEXT_KEYS.addToolsModelFromHuggingface,
|
||||
UI_TEXT_KEYS.viewToolsModelDetails,
|
||||
UI_TEXT_KEYS.deleteToolsModel,
|
||||
UI_TEXT_KEYS.exportToolsModel,
|
||||
UI_TEXT_KEYS.importToolsModel,
|
||||
],
|
||||
};
|
||||
|
||||
const modelKeys = keys[type] || [];
|
||||
return modelKeys.map(key => ({
|
||||
label: this.app.configuration.getUiText(key) ?? ""
|
||||
}));
|
||||
}
|
||||
|
||||
async processActions(type: ModelType, selected: vscode.QuickPickItem): Promise<void> {
|
||||
const details = this.getTypeDetails(type);
|
||||
const actionMap = this.getActionMap(type);
|
||||
const action = Object.keys(actionMap).find(key => selected.label === actionMap[key]);
|
||||
if (!action) return;
|
||||
|
||||
switch (action) {
|
||||
case 'select':
|
||||
await this.selectModel(type, details.modelsList);
|
||||
break;
|
||||
case 'deselect':
|
||||
await this.deselectModel(type, details);
|
||||
break;
|
||||
case 'addLocal':
|
||||
await this.addModel(type, 'local');
|
||||
break;
|
||||
case 'addExternal':
|
||||
await this.addModel(type, 'external');
|
||||
break;
|
||||
case 'addHf':
|
||||
await this.addModel(type, 'hf');
|
||||
break;
|
||||
case 'delete':
|
||||
await this.deleteModel(details.modelsList, details.modelsListSettingName);
|
||||
break;
|
||||
case 'view':
|
||||
await this.viewModel(type, details.modelsList);
|
||||
break;
|
||||
case 'export':
|
||||
await this.exportModel(type, details.modelsList);
|
||||
break;
|
||||
case 'import':
|
||||
await this.importModel(details.modelsList, details.modelsListSettingName);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private getActionMap(type: ModelType): Record<string, string> {
|
||||
const typeStr = type.charAt(0).toUpperCase() + type.slice(1);
|
||||
return {
|
||||
select: this.app.configuration.getUiText(UI_TEXT_KEYS[`selectStart${typeStr}Model` as keyof typeof UI_TEXT_KEYS]) ?? "",
|
||||
deselect: this.app.configuration.getUiText(UI_TEXT_KEYS[`deselectStop${typeStr}Model` as keyof typeof UI_TEXT_KEYS]) ?? "",
|
||||
addLocal: this.app.configuration.getUiText(UI_TEXT_KEYS[`addLocal${typeStr}Model` as keyof typeof UI_TEXT_KEYS]) ?? "",
|
||||
addExternal: this.app.configuration.getUiText(UI_TEXT_KEYS[`addExternal${typeStr}Model` as keyof typeof UI_TEXT_KEYS]) ?? "",
|
||||
addHf: this.app.configuration.getUiText(UI_TEXT_KEYS[`add${typeStr}ModelFromHuggingface` as keyof typeof UI_TEXT_KEYS]) ?? "",
|
||||
view: this.app.configuration.getUiText(UI_TEXT_KEYS[`view${typeStr}ModelDetails` as keyof typeof UI_TEXT_KEYS]) ?? "",
|
||||
delete: this.app.configuration.getUiText(UI_TEXT_KEYS[`delete${typeStr}Model` as keyof typeof UI_TEXT_KEYS]) ?? "",
|
||||
export: this.app.configuration.getUiText(UI_TEXT_KEYS[`export${typeStr}Model` as keyof typeof UI_TEXT_KEYS]) ?? "",
|
||||
import: this.app.configuration.getUiText(UI_TEXT_KEYS[`import${typeStr}Model` as keyof typeof UI_TEXT_KEYS]) ?? "",
|
||||
};
|
||||
}
|
||||
|
||||
selectModel = async (type: ModelType, modelsList: LlmModel[]): Promise<LlmModel | undefined> => {
|
||||
const details = this.getTypeDetails(type);
|
||||
let allModels = modelsList.concat(PREDEFINED_LISTS.get(type) as LlmModel[])
|
||||
let modelsItems: QuickPickItem[] = this.getModels(modelsList, "", true);
|
||||
modelsItems = modelsItems.concat(this.getModels(PREDEFINED_LISTS.get(type) as LlmModel[], "(predefined) ", true, modelsList.length));
|
||||
|
||||
const launchToEndpoint = new Map([
|
||||
["launch_completion", "endpoint"],
|
||||
["launch_chat", "endpoint_chat"],
|
||||
["launch_embeddings", "endpoint_embeddings"],
|
||||
["launch_tools", "endpoint_tools"]
|
||||
]);
|
||||
|
||||
// for (let mdl of PREDEFINED_LISTS.get(type) as LlmModel[]){
|
||||
// modelsItems.push({ label: (modelsItems.length + 1) + ". (predefined) " + mdl.name, description: mdl.localStartCommand ?? "" });
|
||||
// }
|
||||
modelsItems.push({ label: (modelsItems.length + 1) + ". Use settings", description: "" });
|
||||
|
||||
const selectedModelItem = await vscode.window.showQuickPick(modelsItems);
|
||||
if (selectedModelItem) {
|
||||
let model: LlmModel;
|
||||
if (parseInt(selectedModelItem.label.split(". ")[0], 10) == modelsItems.length) {
|
||||
// Use settings
|
||||
const aiModel = this.app.configuration.ai_model;
|
||||
const endpoint = this.app.configuration[launchToEndpoint.get(details.launchSettingName) as keyof Configuration] as string;
|
||||
const localStartCommand = this.app.configuration[details.launchSettingName as keyof Configuration] as string
|
||||
model = {
|
||||
name: "Use settings",
|
||||
aiModel: aiModel,
|
||||
isKeyRequired: false,
|
||||
endpoint: endpoint,
|
||||
localStartCommand: localStartCommand
|
||||
};
|
||||
} else {
|
||||
const index = parseInt(selectedModelItem.label.split(". ")[0], 10) - 1;
|
||||
model = allModels[index];
|
||||
}
|
||||
|
||||
// if (parseInt(selectedModelItem.label.split(". ")[0], 10) == modelsItems.length) {
|
||||
// model = {
|
||||
// name: "Use settings",
|
||||
// isKeyRequired: false,
|
||||
// endpoint: this.app.configuration[launchToEndpoint.get(details.launchSettingName) as keyof Configuration] as string,
|
||||
// localStartCommand: this.app.configuration[details.launchSettingName as keyof Configuration ] as string
|
||||
// } as LlmModel;
|
||||
// } else {
|
||||
// const index = parseInt(selectedModelItem.label.split(". ")[0], 10) - 1;
|
||||
// model = allModels[index] as LlmModel;
|
||||
// }
|
||||
await this.selectStartModel(model, type, details);
|
||||
|
||||
return model;
|
||||
}
|
||||
return undefined
|
||||
}
|
||||
|
||||
public async selectStartModel(model: LlmModel, type: ModelType, details: ModelTypeDetails) {
|
||||
await this.addApiKey(model);
|
||||
this.app.menu.setSelectedModel(type, model);
|
||||
|
||||
await details.killCmd();
|
||||
if (model.localStartCommand) await details.shellCmd(this.sanitizeCommand(model.localStartCommand ?? ""));
|
||||
await this.app.persistence.setValue(this.getSelectedProp(type), model);
|
||||
}
|
||||
|
||||
public async addModel(type: ModelType, kind: 'local' | 'external' | 'hf'): Promise<void> {
|
||||
const details = this.getTypeDetails(type);
|
||||
const strategy = this.strategies[kind];
|
||||
if (strategy) {
|
||||
await strategy.add(details);
|
||||
}
|
||||
}
|
||||
|
||||
async deleteModel(modelsList: LlmModel[], settingName: string): Promise<void> {
|
||||
const modelsItems: QuickPickItem[] = this.getModels(modelsList, "", false);
|
||||
const modelItem = await vscode.window.showQuickPick(modelsItems);
|
||||
if (modelItem) {
|
||||
let modelIndex = parseInt(modelItem.label.split(". ")[0], 10) - 1;
|
||||
const shouldDeleteModel = await Utils.confirmAction("Are you sure you want to delete the model below?",
|
||||
this.getDetails(modelsList[modelIndex])
|
||||
);
|
||||
if (shouldDeleteModel) {
|
||||
modelsList.splice(modelIndex, 1);
|
||||
this.app.configuration.updateConfigValue(settingName, modelsList);
|
||||
vscode.window.showInformationMessage("The model is deleted.")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public async viewModel(type: ModelType , modelsList: LlmModel[]): Promise<void> {
|
||||
let allModels = modelsList.concat(PREDEFINED_LISTS.get(type) as LlmModel[])
|
||||
let modelsItems: QuickPickItem[] = this.getModels(modelsList, "", false);
|
||||
modelsItems = modelsItems.concat(this.getModels(PREDEFINED_LISTS.get(type) as LlmModel[], "(predefined) ", false, modelsList.length));
|
||||
let modelItem = await vscode.window.showQuickPick(modelsItems);
|
||||
if (modelItem) {
|
||||
let modelIndex = parseInt(modelItem.label.split(". ")[0], 10) - 1;
|
||||
let selectedModel = allModels[modelIndex];
|
||||
await this.showModelDetails(selectedModel);
|
||||
}
|
||||
}
|
||||
|
||||
public async showModelDetails(model: LlmModel): Promise<void> {
|
||||
await Utils.showOkDialog("Model details: " + this.getDetails(model));
|
||||
}
|
||||
|
||||
async exportModel(type: ModelType, modelsList: LlmModel[]): Promise<void> {
|
||||
let allModels = modelsList.concat(PREDEFINED_LISTS.get(type) as LlmModel[])
|
||||
let modelsItems: QuickPickItem[] = this.getModels(modelsList, "", false);
|
||||
modelsItems = modelsItems.concat(this.getModels(PREDEFINED_LISTS.get(type) as LlmModel[], "(predefined) ", false, modelsList.length));
|
||||
let modelItem = await vscode.window.showQuickPick(modelsItems);
|
||||
if (modelItem) {
|
||||
let modelIndex = parseInt(modelItem.label.split(". ")[0], 10) - 1;
|
||||
let selectedModel = allModels[modelIndex];
|
||||
let shouldExport = await Utils.showYesNoDialog("Do you want to export the following model? \n\n" +
|
||||
this.getDetails(selectedModel)
|
||||
);
|
||||
|
||||
if (shouldExport) {
|
||||
const uri = await vscode.window.showSaveDialog({
|
||||
defaultUri: vscode.Uri.file(path.join(vscode.workspace.rootPath || '', selectedModel.name + '.json')),
|
||||
filters: {
|
||||
'Model Files': ['json'],
|
||||
'All Files': ['*']
|
||||
},
|
||||
saveLabel: 'Export Model'
|
||||
});
|
||||
|
||||
if (uri) {
|
||||
const jsonContent = JSON.stringify(selectedModel, null, 2);
|
||||
fs.writeFileSync(uri.fsPath, jsonContent, 'utf8');
|
||||
vscode.window.showInformationMessage("Model is saved.")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async importModel(modelList: LlmModel[], settingName: string): Promise<void> {
|
||||
const uris = await vscode.window.showOpenDialog({
|
||||
canSelectMany: false,
|
||||
openLabel: 'Import Model',
|
||||
filters: {
|
||||
'Model Files': ['json'],
|
||||
'All Files': ['*']
|
||||
},
|
||||
});
|
||||
|
||||
if (!uris || uris.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const filePath = uris[0].fsPath;
|
||||
|
||||
const fileContent = fs.readFileSync(filePath, 'utf8');
|
||||
const newModel = JSON.parse(fileContent);
|
||||
// Sanitize imported model
|
||||
if (newModel.name) newModel.name = this.sanitizeInput(newModel.name);
|
||||
if (newModel.localStartCommand) newModel.localStartCommand = this.sanitizeCommand(newModel.localStartCommand);
|
||||
if (newModel.endpoint) newModel.endpoint = this.sanitizeInput(newModel.endpoint);
|
||||
if (newModel.aiModel) newModel.aiModel = this.sanitizeInput(newModel.aiModel);
|
||||
|
||||
const modelDetails = this.getDetails(newModel);
|
||||
const shouldAddModel = await Utils.confirmAction("A new model will be added. Do you want to add the model?", modelDetails);
|
||||
|
||||
if (shouldAddModel) {
|
||||
modelList.push(newModel);
|
||||
this.app.configuration.updateConfigValue(settingName, modelList);
|
||||
vscode.window.showInformationMessage("The model is added.");
|
||||
}
|
||||
vscode.window.showInformationMessage("Model imported: " + newModel.name);
|
||||
}
|
||||
|
||||
public async deselectModel(type: ModelType, details: ModelTypeDetails): Promise<void> {
|
||||
await details.killCmd();
|
||||
this.app.menu.clearModel(type);
|
||||
}
|
||||
|
||||
getDetails(model: LlmModel): string {
|
||||
return "name: " + model.name +
|
||||
"\nlocal start command: " + model.localStartCommand +
|
||||
"\nendpoint: " + model.endpoint +
|
||||
"\nmodel name for provider: " + model.aiModel +
|
||||
"\napi key required: " + model.isKeyRequired;
|
||||
}
|
||||
|
||||
private getModels(models: LlmModel[], prefix: string, hasDetails: boolean, lastModelNumber: number = 0): QuickPickItem[] {
|
||||
const modelsItems: QuickPickItem[] = [];
|
||||
let i = lastModelNumber;
|
||||
for (let model of models) {
|
||||
i++;
|
||||
if (hasDetails) {
|
||||
modelsItems.push({
|
||||
label: i + ". " + prefix + model.name,
|
||||
description: model.localStartCommand,
|
||||
detail: "Selects the model" + (model.localStartCommand ? ", downloads the model (if not yet done) and starts a llama-server with it." : "")
|
||||
});
|
||||
} else {
|
||||
modelsItems.push({
|
||||
label: i + ". " + prefix + model.name,
|
||||
description: model.localStartCommand
|
||||
})
|
||||
}
|
||||
}
|
||||
return modelsItems;
|
||||
}
|
||||
|
||||
public getTypeDetails(type: ModelType): ModelTypeDetails {
|
||||
const config = MODEL_TYPE_CONFIG[type];
|
||||
return {
|
||||
modelsList: (this.app.configuration as any)[config.settingName],
|
||||
modelsListSettingName: config.settingName,
|
||||
newModelPort: (this.app.configuration as any)[config.portSetting],
|
||||
newModelHost: (this.app.configuration as any)[config.hostSetting],
|
||||
selModelPropName: config.propName,
|
||||
launchSettingName: config.launchSetting,
|
||||
killCmd: (this.app.llamaServer as any)[config.killCmdName],
|
||||
shellCmd: (this.app.llamaServer as any)[config.shellCmdName]
|
||||
};
|
||||
}
|
||||
|
||||
private getSelectedProp(type: ModelType): string {
|
||||
const propMap = {
|
||||
[ModelType.Completion]: MODEL_TYPE_CONFIG[ModelType.Completion].propName,
|
||||
[ModelType.Chat]: MODEL_TYPE_CONFIG[ModelType.Chat].propName,
|
||||
[ModelType.Embeddings]: MODEL_TYPE_CONFIG[ModelType.Embeddings].propName,
|
||||
[ModelType.Tools]: MODEL_TYPE_CONFIG[ModelType.Tools].propName
|
||||
};
|
||||
return propMap[type] || '';
|
||||
}
|
||||
|
||||
public async addApiKey(model: LlmModel): Promise<void> {
|
||||
if (model.isKeyRequired) {
|
||||
const apiKey = this.app.persistence.getApiKey(model.endpoint ?? "");
|
||||
if (!apiKey) {
|
||||
let result = await vscode.window.showInputBox({
|
||||
placeHolder: 'Enter your api key for ' + model.endpoint,
|
||||
prompt: 'your api key for ' + model.endpoint,
|
||||
value: ''
|
||||
});
|
||||
result = this.sanitizeInput(result || '');
|
||||
if (result) {
|
||||
this.app.persistence.setApiKey(model.endpoint ?? "", result);
|
||||
vscode.window.showInformationMessage("Your API key for " + model.endpoint + " was saved.")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sanitizeCommand = (command: string): string => {
|
||||
if (!command) return '';
|
||||
return command.trim().replace(/[`#$&*;\<>\?\\|~!{}()[\]^"]/g, '\\$&');
|
||||
}
|
||||
|
||||
public sanitizeInput(input: string): string {
|
||||
return input ? input.trim() : '';
|
||||
}
|
||||
|
||||
// For selectModel, adjust to return model
|
||||
// In the code above, I have return model; at the end of if.
|
||||
// Yes.
|
||||
}
|
||||
|
|
@ -2,6 +2,7 @@ import * as vscode from 'vscode';
|
|||
import { Application } from './application';
|
||||
import { Utils } from './utils';
|
||||
import { LlamaChatResponse } from "./types";
|
||||
import { Chat } from 'openai/resources';
|
||||
|
||||
export class TextEditor {
|
||||
private app: Application;
|
||||
|
|
@ -27,13 +28,15 @@ export class TextEditor {
|
|||
|
||||
async showEditPrompt(editor: vscode.TextEditor) {
|
||||
let chatUrl = this.app.configuration.endpoint_chat
|
||||
if (!chatUrl) chatUrl = this.app.configuration.endpoint_tools;
|
||||
let chatModel = this.app.menu.getChatModel();
|
||||
if (!this.app.menu.isChatModelSelected()) chatModel = this.app.menu.getToolsModel();
|
||||
if (chatModel.endpoint) {
|
||||
const chatEndpoint = Utils.trimTrailingSlash(chatModel.endpoint)
|
||||
chatUrl = chatEndpoint ? chatEndpoint + "/" : "";
|
||||
}
|
||||
if (!chatUrl) {
|
||||
const shouldSelectModel = await Utils.showUserChoiceDialog("Select a chat model or an env with chat model to edit code with AI.","Select")
|
||||
const shouldSelectModel = await Utils.showUserChoiceDialog("Select a chat or tools model or an env with chat or tools model to edit code with AI.","Select")
|
||||
if (shouldSelectModel){
|
||||
this.app.menu.showEnvView();
|
||||
vscode.window.showInformationMessage("After the chat model is loaded, try again using Edit with AI.")
|
||||
|
|
|
|||
|
|
@ -225,10 +225,11 @@ export class Tools {
|
|||
let changes = params.input;
|
||||
|
||||
if (params.input == undefined) return "The input is not provided."
|
||||
|
||||
let filePath = this.getFilePath(params.input);
|
||||
if (!filePath) return "The file is not provided.";
|
||||
if (!fs.existsSync(filePath)) return "The does not exist: " + filePath;
|
||||
try {
|
||||
if (!this.app.configuration.tool_permit_file_changes){
|
||||
let filePath = this.getFilePath(params.input);
|
||||
if (!this.app.configuration.tool_permit_file_changes){
|
||||
let [yesApply, yesDontAsk] = await Utils.showYesYesdontaskNoDialog("Do you permit file " + filePath + " to be changed?")
|
||||
if (yesDontAsk) {
|
||||
this.app.configuration.updateConfigValue("tool_permit_file_changes", true)
|
||||
|
|
@ -456,7 +457,7 @@ export class Tools {
|
|||
"properties": {
|
||||
"include_pattern": {
|
||||
"type": "string",
|
||||
"description": "Glob pattern for files to include (e.g. '*.ts' for TypeScript files)"
|
||||
"description": "Glob pattern for files to include (specify file extensions only if you are absolutely sure)"
|
||||
},
|
||||
"exclude_pattern": {
|
||||
"type": "string",
|
||||
|
|
|
|||
|
|
@ -1,3 +1,7 @@
|
|||
export interface IAddStrategy {
|
||||
add(details: ModelTypeDetails): Promise<void>;
|
||||
}
|
||||
|
||||
export interface ChatMessage {
|
||||
role: string; // or just 'string' if you need more roles
|
||||
content: string;
|
||||
|
|
|
|||
30
src/utils.ts
30
src/utils.ts
|
|
@ -806,10 +806,40 @@ export class Utils {
|
|||
return timeDifference >= twentyFourHoursInMs;
|
||||
}
|
||||
|
||||
static async confirmAction(message: string, details: string = ""): Promise<boolean> {
|
||||
const fullMessage = message + (details ? "\n\n" + details : "");
|
||||
return Utils.showYesNoDialog(fullMessage);
|
||||
}
|
||||
|
||||
static getFunctionFromFile = (filePath: string) => {
|
||||
let functionCode = fs.readFileSync(filePath, 'utf-8');
|
||||
const functionString = '(' + functionCode + ')';
|
||||
const toolFunction = eval(functionString);
|
||||
return toolFunction;
|
||||
}
|
||||
|
||||
static async getValidatedInput(prompt: string, validator: (input: string) => boolean, maxAttempts: number = 3, options: vscode.InputBoxOptions = {}): Promise<string | undefined> {
|
||||
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
||||
const fullOptions: vscode.InputBoxOptions = {
|
||||
prompt,
|
||||
...options
|
||||
};
|
||||
const input = await vscode.window.showInputBox(fullOptions);
|
||||
|
||||
if (input === undefined) {
|
||||
return undefined; // User cancelled
|
||||
}
|
||||
|
||||
if (validator(input)) {
|
||||
return input;
|
||||
}
|
||||
|
||||
if (attempt < maxAttempts) {
|
||||
vscode.window.showWarningMessage(`Invalid input on attempt ${attempt}. ${attempt + 1 - 1} more attempts.`);
|
||||
}
|
||||
}
|
||||
|
||||
vscode.window.showErrorMessage(`Maximum attempts (${maxAttempts}) reached. Input validation failed.`);
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -403,7 +403,7 @@ const AgentView: React.FC<AgentViewProps> = ({
|
|||
ref={textareaRef}
|
||||
value={inputText}
|
||||
onChange={(e) => setInputText(e.target.value)}
|
||||
placeholder="Ask me anything about your code..."
|
||||
placeholder="Ask me anything about your code... Press @ to select a file / for a command."
|
||||
className="modern-textarea"
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === 'Enter') {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue