From 31eaf04867e7a65af5d6a1aaa2df07331f6b21d9 Mon Sep 17 00:00:00 2001 From: Adam Date: Fri, 1 May 2026 10:36:21 -0400 Subject: [PATCH] persist model config, fix status bar --- src/agent/app.clj | 118 ++++++++++++++++++---- src/agent/core.clj | 247 +++++++++++++++++++++++++++++++++++---------- 2 files changed, 292 insertions(+), 73 deletions(-) diff --git a/src/agent/app.clj b/src/agent/app.clj index a5672c3..2ac4f69 100644 --- a/src/agent/app.clj +++ b/src/agent/app.clj @@ -194,10 +194,29 @@ (vec (drop-while #(= % [:text ""]) lines)))) ;; ============================================================ -;; View +;; Status Bar - Context Window Display ;; ============================================================ -(defn- view [{:keys [messages input agent-running? spinner-frame scroll-offset]}] +(defn- format-token-count [n] + (cond + (>= n 1000000000) (format "%.1fB" (/ (double n) 1e9)) + (>= n 1000000) (format "%.1fM" (/ (double n) 1e6)) + (>= n 1000) (format "%.1fk" (/ (double n) 1e3)) + :else (str n))) + +(defn- format-context-bar [prompt-tokens max-tokens percentage] + (let [bar-width 20 + filled (int (/ (* bar-width percentage) 100)) + empty (- bar-width filled) + bar (str (apply str (repeat filled "█")) + (apply str (repeat empty "░")))] + [:text {:fg (cond + (< percentage 50) :green + (< percentage 80) :yellow + :else :red)} + (str " " bar " " (format-token-count prompt-tokens) "/" (format-token-count max-tokens))])) + +(defn- view [{:keys [messages input agent-running? spinner-frame scroll-offset context-info]}] (let [{term-w :width term-h :height} (term/get-terminal-size) width (or term-w 80) height (or term-h 24) @@ -238,6 +257,11 @@ (subs input (- (count input) max-input-width)) input) + context-bar (when context-info + (format-context-bar (:prompt_tokens context-info) + (:max_tokens context-info) + (:percentage context-info))) + input-box [:box {:border :rounded :width :fill} (if (pos? clamped-offset) [:text {:fg :cyan} (str "↑" clamped-offset " " display-input "█")] @@ -248,10 +272,15 @@ [:col {:heights [:flex input-box-height]} (header-view) input-box] - ;; Chat state: scrollable messages - [:col {:heights [chat-height input-box-height]} - (into [:col] display-lines) - input-box]))) + ;; Chat state: scrollable messages with optional footer context bar + (if context-bar + [:col {:heights [(max 1 (- height input-box-height 1)) input-box-height 1]} + (into [:col] display-lines) + input-box + context-bar] + [:col {:heights [(max 1 (- height input-box-height)) input-box-height]} + (into [:col] display-lines) + input-box])))) ;; ============================================================ ;; Update @@ -268,15 +297,20 @@ "Process a single event from the agent background loop." [model event] (case (:type event) - :text (update model :messages conj {:role :assistant :content (:content event)}) - :tool (update model :messages conj {:role :tool :content (:label event)}) - :diff (update model :messages conj {:role :diff :content (:content event)}) - :error (update model :messages conj {:role :error :content (:message event)}) - :done (let [m (assoc model - :agent-running? false - :conversation (:conversation event))] - (save-current-session! m) - m) + :text (update model :messages conj {:role :assistant :content (:content event)}) + :tool (update model :messages conj {:role :tool :content (:label event)}) + :diff (update model :messages conj {:role :diff :content (:content event)}) + :error (update model :messages conj {:role :error :content (:message event)}) + :context-info + (assoc model :context-info + {:prompt_tokens (:prompt_tokens event) + :max_tokens (:max_tokens event) + :percentage (:percentage event)}) + :done (let [m (-> model + (assoc :agent-running? false + :conversation (:conversation event)))] + (save-current-session! m) + m) model)) (defn- update-fn [{:keys [model event]}] @@ -310,6 +344,40 @@ (update :messages conj {:role :assistant :content listing}) (assoc :input "" :scroll-offset 0))}) + ;; Built-in: /model — list or switch models + (str/starts-with? text "/model") + (let [arg (str/trim (subs text (count "/model"))) + switch? (seq arg)] + (if switch? + (do + (core/set-model! arg) + {:model (-> model + (update :messages conj {:role :user :content text}) + (update :messages conj {:role :assistant :content (str "Switched to model: **" arg "**")}) + (assoc :input "" :scroll-offset 0))}) + (let [result (try + (let [models (core/list-models) + current @core/model + lines (map (fn [{:keys [id state context]}] + (let [active? (= id current) + ctx (when context + (str " [" (int (/ context 1000)) "k ctx]")) + loaded? (= state "loaded") + tag (cond active? " ← current" + loaded? " (loaded)" + :else "")] + (str "- " (when active? "**") id (when active? "**") + ctx tag))) + models)] + (str "**Available models** (use `/model ` to switch):\n\n" + (str/join "\n" lines))) + (catch Exception e + (str "Error: " (.getMessage e))))] + {:model (-> model + (update :messages conj {:role :user :content text}) + (update :messages conj {:role :assistant :content result}) + (assoc :input "" :scroll-offset 0))}))) + ;; Skill expansion or normal message → send to LLM :else (let [expanded (context/expand-skill text (:skills model)) @@ -414,7 +482,22 @@ (println " agent \"fix the bug\" Start with an initial prompt") (println " agent --continue Resume the last session") (System/exit 0)) - (let [;; Load project context and skills + (let [;; Initialize model registry and select model: env > config > first-available + _ (try + (let [available (core/list-models) + env-model (System/getenv "AGENT_MODEL") + config-model (when-not env-model (:model (core/load-config))) + preferred (or env-model config-model) + in-registry? #(or (get @core/model-registry %) + (some (fn [[id _]] (str/starts-with? id (str % ":"))) + @core/model-registry)) + chosen (if (and preferred (in-registry? preferred)) + preferred + (:id (or (first (filter #(= (:state %) "loaded") available)) + (first available))))] + (when chosen (core/set-model! chosen))) + (catch Exception _)) + ;; Load project context and skills project-context (context/load-project-context) skills (context/load-skills) _ (reset! core/skills-atom skills) @@ -450,7 +533,8 @@ :agent-running? start? :agent-handle agent-handle :spinner-frame 0 - :scroll-offset 0} + :scroll-offset 0 + :context-info nil} initial-events (when start? [(ev/delayed-event 100 {:type :poll}) (ev/delayed-event 80 {:type :spinner})])] diff --git a/src/agent/core.clj b/src/agent/core.clj index 318cc50..347b41d 100644 --- a/src/agent/core.clj +++ b/src/agent/core.clj @@ -17,9 +17,41 @@ ;; ============================================================ (def ollama-host (or (System/getenv "OLLAMA_HOST") "http://localhost:11434")) -(def model (or (System/getenv "AGENT_MODEL") "qwen3-coder-next")) +(def lm-studio-host + (or (System/getenv "LM_STUDIO_HOST") + (str/replace ollama-host #":\d+$" ":1234"))) +(def model (atom (or (System/getenv "AGENT_MODEL") "qwen3.6"))) +(def active-host (atom ollama-host)) +(def active-api (atom :ollama)) +(def model-registry (atom {})) (def max-tokens 131072) +;; ============================================================ +;; Config Persistence +;; ============================================================ + +(def config-dir + (let [home (System/getProperty "user.home")] + (.getPath (io/file home ".config" "agent0")))) + +(def config-file + (io/file config-dir "agent0.edn")) + +(defn load-config [] + (when (.exists config-file) + (try (read-string (slurp config-file)) + (catch Exception _ nil)))) + +(defn update-config! [f] + (.mkdirs (io/file config-dir)) + (locking #'config-file + (let [current (when (.exists config-file) + (try (read-string (slurp config-file)) (catch Exception _ nil))) + updated (f (or current {})) + tmp (io/file config-dir "agent0.edn.tmp")] + (spit tmp (pr-str updated)) + (fs/move tmp config-file {:replace-existing true})))) + (def base-system-prompt "You are a helpful coding assistant. You can read, list, create, edit, search, and find files to help the user with their coding tasks. @@ -44,16 +76,27 @@ For predefined workflows: Always explain what you're doing before using tools. Use the tools when needed to complete the task.") (defn build-system-prompt - "Build the full system prompt by appending project context (if any) and loaded skills to the base prompt." + "Build the full system prompt by appending project context (if any), loaded skills, and available models." [project-context skills] (let [skills-section (when (seq skills) (let [listing (context/format-skill-list skills)] (str "## Available Skills\n\n" "The following skills are loaded and ready to use via the skills tool with action 'run':\n\n" - listing)))] + listing))) + models-section (when (seq @model-registry) + (let [current @model + lines (map (fn [[id {:keys [context state]}]] + (str "- " (if (= id current) (str "**" id "** (active)") id) + (when context (str " — " (int (/ context 1000)) "k ctx")) + (when (= state "loaded") " [loaded]"))) + @model-registry)] + (str "## Available Models\n\n" + "Use `/model ` to switch. Current: **" current "**\n\n" + (str/join "\n" lines))))] (cond-> base-system-prompt project-context (str "\n\n" project-context) - skills-section (str "\n\n" skills-section)))) + skills-section (str "\n\n" skills-section) + models-section (str "\n\n" models-section)))) ;; ============================================================ ;; Logging @@ -93,7 +136,7 @@ Always explain what you're doing before using tools. Use the tools when needed t (let [f (io/file dir (str session-id ".edn")) session (merge {:id session-id :updated (str (Instant/now)) - :model model} + :model @model} (when-not (:created data) {:created (str (Instant/now))}) data)] @@ -307,14 +350,99 @@ Always explain what you're doing before using tools. Use the tools when needed t "Timeout for LLM API calls in milliseconds (5 minutes)." (* 5 60 1000)) -(defn- call-llm* [sys-prompt tool-defs messages] - (let [body {:model model +;; LM Studio requires HTTP/1.1 — Java's HttpClient defaults to HTTP/2 which +;; hangs against LM Studio's Express-based server over Tailscale. +(def ^:private http1-client + (-> (java.net.http.HttpClient/newBuilder) + (.version java.net.http.HttpClient$Version/HTTP_1_1) + .build)) + +(defn- ipv4-url + "Resolve the host in a URL to an IPv4 address. Works around JVM preferring + IPv6 for hosts that LM Studio / other servers only listen on IPv4." + [url] + (try + (let [uri (java.net.URI. url) + host (.getHost uri) + port (.getPort uri) + ipv4 (->> (java.net.InetAddress/getAllByName host) + (filter #(instance? java.net.Inet4Address %)) + first)] + (if ipv4 + (str (.getScheme uri) "://" (.getHostAddress ipv4) + (when (pos? port) (str ":" port)) + (.getPath uri) + (when (.getQuery uri) (str "?" (.getQuery uri)))) + url)) + (catch Exception _ url))) + +(defn list-models + "Fetch available models from Ollama and LM Studio. + Returns a sequence of {:id :host :api :state :context} maps and updates model-registry." + [] + (let [from-ollama + (try + (let [resp (http/get (str ollama-host "/api/tags") {:timeout 5000}) + data (json/parse-string (:body resp) true)] + (mapv (fn [m] {:id (:name m) :host ollama-host :api :ollama :state "loaded"}) + (:models data))) + (catch Exception _ nil)) + from-lmstudio + (try + (let [resp (http/get (ipv4-url (str lm-studio-host "/api/v0/models")) {:client http1-client :timeout 5000}) + data (json/parse-string (:body resp) true)] + (->> (:data data) + (remove #(= (:type %) "embeddings")) + (mapv (fn [m] {:id (:id m) :host lm-studio-host :api :openai + :state (:state m) + :context (or (:loaded_context_length m) + (:max_context_length m))})))) + (catch Exception _ nil)) + all (concat from-ollama from-lmstudio)] + (when (seq all) + (reset! model-registry (into {} (map (fn [m] [(:id m) m]) all)))) + (or (seq all) + (throw (ex-info (str "No models found at " ollama-host " or " lm-studio-host) {}))))) + +(defn set-model! + "Switch the active model, updating the chat backend to match, and persisting to config." + [new-model] + (reset! model new-model) + (let [info (or (get @model-registry new-model) + ;; Ollama appends :latest — try prefix match as fallback + (some (fn [[id v]] (when (str/starts-with? id (str new-model ":")) v)) + @model-registry))] + (when info + (reset! active-host (:host info)) + (reset! active-api (:api info)))) + (update-config! #(assoc % :model new-model))) + +(defn- call-llm-ollama* [sys-prompt tool-defs messages] + (let [body {:model @model :options {:num_predict max-tokens} :messages (into [{:role "system" :content sys-prompt}] messages) :tools tool-defs :stream false} - response (http/post (str ollama-host "/api/chat") + response (http/post (str @active-host "/api/chat") {:headers {"Content-Type" "application/json"} + :body (json/generate-string body) + :timeout llm-timeout-ms}) + result (json/parse-string (:body response) true)] + {:choices [{:message (:message result) + :finish_reason (if (seq (get-in result [:message :tool_calls])) + "tool_calls" + "stop")}] + :usage {:prompt_tokens (:prompt_eval_count result) + :completion_tokens (:eval_count result)}})) + +(defn- call-llm-openai* [sys-prompt tool-defs messages] + (let [body {:model @model + :messages (into [{:role "system" :content sys-prompt}] messages) + :tools tool-defs + :stream false} + response (http/post (ipv4-url (str @active-host "/v1/chat/completions")) + {:client http1-client + :headers {"Content-Type" "application/json"} :body (json/generate-string body) :timeout llm-timeout-ms})] (json/parse-string (:body response) true))) @@ -350,7 +478,7 @@ Always explain what you're doing before using tools. Use the tools when needed t (if (>= iteration max-iter) (do (log log-file " [subagent] max iterations reached") (str/join "\n\n" texts)) - (let [result (call-llm* system-prompt tool-defs messages) + (let [result (call-llm-ollama* system-prompt tool-defs messages) message (:message result) content (:content message) tool-calls (:tool_calls message) @@ -511,13 +639,9 @@ Always explain what you're doing before using tools. Use the tools when needed t ;; ============================================================ (defn call-llm [system-prompt messages] - (let [result (call-llm* system-prompt tool-definitions messages)] - {:choices [{:message (:message result) - :finish_reason (if (seq (get-in result [:message :tool_calls])) - "tool_calls" - "stop")}] - :usage {:prompt_tokens (:prompt_eval_count result) - :completion_tokens (:eval_count result)}})) + (if (= @active-api :openai) + (call-llm-openai* system-prompt tool-definitions messages) + (call-llm-ollama* system-prompt tool-definitions messages))) ;; ============================================================ ;; Tool Execution @@ -600,24 +724,20 @@ Always explain what you're doing before using tools. Use the tools when needed t (defn- detect-stuck-loop "Detects three kinds of stuck loops: 1. Exact repeat: identical tool calls N times in a row (hard stop) - 2. Name cycle: same pattern of tool names repeating with varying args (hard stop) - 3. Research loop: delegate called N+ times with varying args (nudge, then stop) + 2. Research loop: delegate called N+ times with varying args (nudge, then stop) Returns {:signatures, :stuck? (tool name or nil), :nudge? bool}" [tool-calls previous-signatures repeat-threshold] (let [current-sigs (mapv tool-call-signature tool-calls) all-sigs (conj previous-signatures current-sigs) - ;; Exact repeat detection (existing behavior) + ;; Exact repeat detection - only flag when ALL sigs are identical + ;; This prevents false positives from reading multiple files or running different git commands exact-stuck? (when (>= (count all-sigs) repeat-threshold) (let [recent (take-last repeat-threshold all-sigs)] (when (apply = recent) (ffirst (last recent))))) - ;; Name cycle detection: same tool name pattern repeating with different args - ;; Catches e.g. [rm, create_file, rm, create_file] even when file content varies + ;; For research loop detection, we need to track tool names separately + ;; since each delegate call will have different args (different tasks) names-history (mapv (fn [sigs] (mapv first sigs)) all-sigs) - ;; 2 full repetitions of a cycle is enough to detect (e.g. rm,create,rm,create) - name-cycle? (when-not exact-stuck? - (detect-name-cycle names-history 2)) - ;; Research loop detection: delegate called N+ times with varying args name-sets (mapv set names-history) research-tools #{"delegate"} consecutive-research @@ -628,12 +748,9 @@ Always explain what you're doing before using tools. Use the tools when needed t hard-research-limit 6] {:signatures all-sigs :stuck? (or exact-stuck? - (when name-cycle? - (str "name cycle (length " name-cycle? ")")) (when (>= consecutive-research hard-research-limit) "web_search")) :nudge? (and (not exact-stuck?) - (not name-cycle?) (= consecutive-research nudge-threshold))})) ;; ============================================================ @@ -641,18 +758,20 @@ Always explain what you're doing before using tools. Use the tools when needed t ;; ============================================================ (defn- get-model-context-length - "Query Ollama for the model's context window size. Returns nil on failure." + "Returns the active model's context window size. Uses model-registry for LM Studio models, + queries Ollama /api/show for Ollama models." [] - (try - (let [response (http/post (str ollama-host "/api/show") - {:headers {"Content-Type" "application/json"} - :body (json/generate-string {:model model}) - :timeout 5000}) - result (json/parse-string (:body response) true) - info (:model_info result)] - ;; Context length key varies by architecture (e.g. "qwen3next.context_length", "llama.context_length") - (some (fn [[k v]] (when (str/ends-with? (name k) ".context_length") v)) info)) - (catch Exception _ nil))) + (if (= @active-api :openai) + (get-in @model-registry [@model :context]) + (try + (let [response (http/post (str @active-host "/api/show") + {:headers {"Content-Type" "application/json"} + :body (json/generate-string {:model @model}) + :timeout 5000}) + result (json/parse-string (:body response) true) + info (:model_info result)] + (some (fn [[k v]] (when (str/ends-with? (name k) ".context_length") v)) info)) + (catch Exception _ nil)))) (defn- trim-messages "Reduce conversation size by truncating old tool result content. @@ -685,7 +804,7 @@ Always explain what you're doing before using tools. Use the tools when needed t (let [log-file (init-log) cancelled? (atom false) context-length (get-model-context-length)] - (log log-file "Agent loop started | model:" model "| messages:" (count conversation) + (log log-file "Agent loop started | model:" @model "| messages:" (count conversation) (if context-length (str "| context_length: " context-length) "")) {:cancel! cancelled? :future @@ -694,7 +813,8 @@ Always explain what you're doing before using tools. Use the tools when needed t (loop [messages conversation iteration 0 tool-sigs [] - retries 0] + retries 0 + accum-tokens 0] (cond @cancelled? (do @@ -725,7 +845,7 @@ Always explain what you're doing before using tools. Use the tools when needed t trimmed (trim-messages messages keep-n)] (log log-file "LLM call failed:" (str error) "— retry" (inc retries) "with trimmed context (keeping last" keep-n ")") (swap! event-queue conj {:type :error :message (str "LLM error, trimming context and retrying... (" (str error) ")")}) - (recur trimmed iteration tool-sigs (inc retries))) + (recur trimmed iteration tool-sigs (inc retries) 0)) (do (log log-file "LLM call failed after retries:" (str error)) (swap! event-queue conj {:type :error :message (str error)}) @@ -739,19 +859,29 @@ Always explain what you're doing before using tools. Use the tools when needed t tool-calls (:tool_calls message) ;; Context window tracking prompt-tokens (get-in response [:usage :prompt_tokens]) - completion-tokens (get-in response [:usage :completion_tokens])] + completion-tokens (get-in response [:usage :completion_tokens]) + ;; Ollama with KV cache returns prompt_eval_count = only newly-evaluated + ;; tokens (not cached prefix), so accumulate across tool-call iterations. + ;; LM Studio / OpenAI format always returns the full prompt token count. + total-tokens (if (= @active-api :openai) + (or prompt-tokens 0) + (+ accum-tokens (or prompt-tokens 0))) + pct (when (and context-length (pos? total-tokens) (> context-length 0)) + (int (* 100 (/ total-tokens context-length))))] (log log-file "finish_reason:" finish-reason "| tool_calls:" (count (or tool-calls [])) - (when prompt-tokens (str "| tokens: " prompt-tokens "/" (or context-length "?") - " (" (when context-length (str (int (* 100 (/ prompt-tokens context-length))) "%")) ")"))) - ;; Warn user when approaching context limit - (when (and context-length prompt-tokens - (> (/ prompt-tokens context-length) 0.80)) - (let [pct (int (* 100 (/ prompt-tokens context-length)))] - (log log-file "Context window warning:" pct "% used") - (swap! event-queue conj - {:type :error - :message (str "Context " pct "% full (" prompt-tokens "/" context-length " tokens). Responses may degrade.")}))) + (when (pos? total-tokens) (str "| tokens: " total-tokens "/" (or context-length "?") + " (" (when pct (str pct "%")) ")"))) + ;; Log context window usage + (when (and pct (> pct 80)) + (log log-file "Context window warning:" pct "% used")) + ;; Push context info to status bar + (when (and pct context-length) + (swap! event-queue conj + {:type :context-info + :prompt_tokens total-tokens + :max_tokens context-length + :percentage pct})) ;; Push assistant text (when (and content (seq (str/trim content))) (swap! event-queue conj {:type :text :content content})) @@ -779,14 +909,19 @@ Always explain what you're doing before using tools. Use the tools when needed t (let [clean-results (mapv #(dissoc % :diff) tool-results) assistant-msg (select-keys message [:role :content :tool_calls]) new-messages (into (conj messages assistant-msg) clean-results) - ;; If nudge, inject a system hint to stop researching new-messages (if nudge? (do (log log-file "Research loop nudge injected") (conj new-messages {:role "system" :content "You have already performed several web searches. You have enough information to answer. Stop searching and synthesize your findings into a clear response now."})) - new-messages)] - (recur new-messages (inc iteration) signatures 0)))))) + new-messages) + ;; Proactively trim old tool results when context is filling up + [new-messages next-accum] + (if (and pct (>= pct 85)) + (do (log log-file "Proactive context trim at" pct "%") + [(trim-messages new-messages 8) 0]) + [new-messages total-tokens])] + (recur new-messages (inc iteration) signatures 0 next-accum)))))) ;; Done - no more tool calls (do (log log-file "Agent finished after" iteration "iterations")