persist model config, fix status bar

2026-05-01 10:36:21 -04:00
parent e8fc61518c
commit 31eaf04867
2 changed files with 292 additions and 73 deletions
@@ -194,10 +194,29 @@
    (vec (drop-while #(= % [:text ""]) lines))))
 ;; ============================================================
-;; View
+;; Status Bar - Context Window Display
 ;; ============================================================
-(defn- view [{:keys [messages input agent-running? spinner-frame scroll-offset]}]
+(defn- format-token-count [n]
  (cond
    (>= n 1000000000) (format "%.1fB" (/ (double n) 1e9))
    (>= n 1000000)    (format "%.1fM" (/ (double n) 1e6))
    (>= n 1000)       (format "%.1fk" (/ (double n) 1e3))
    :else             (str n)))
 (defn- format-context-bar [prompt-tokens max-tokens percentage]
  (let [bar-width 20
        filled (int (/ (* bar-width percentage) 100))
        empty  (- bar-width filled)
        bar    (str (apply str (repeat filled "█"))
                    (apply str (repeat empty "░")))]
    [:text {:fg (cond
                  (< percentage 50) :green
                  (< percentage 80) :yellow
                  :else             :red)}
     (str " " bar " " (format-token-count prompt-tokens) "/" (format-token-count max-tokens))]))
 (defn- view [{:keys [messages input agent-running? spinner-frame scroll-offset context-info]}]
  (let [{term-w :width term-h :height} (term/get-terminal-size)
        width  (or term-w 80)
        height (or term-h 24)
@@ -238,6 +257,11 @@
                        (subs input (- (count input) max-input-width))
                        input)
        context-bar (when context-info
                      (format-context-bar (:prompt_tokens context-info)
                                          (:max_tokens context-info)
                                          (:percentage context-info)))
        input-box [:box {:border :rounded :width :fill}
                   (if (pos? clamped-offset)
                     [:text {:fg :cyan} (str "↑" clamped-offset " " display-input "█")]
@@ -248,10 +272,15 @@
      [:col {:heights [:flex input-box-height]}
       (header-view)
       input-box]
-      ;; Chat state: scrollable messages
+      ;; Chat state: scrollable messages with optional footer context bar
-      [:col {:heights [chat-height input-box-height]}
+      (if context-bar
        [:col {:heights [(max 1 (- height input-box-height 1)) input-box-height 1]}
         (into [:col] display-lines)
-       input-box])))
+         input-box
         context-bar]
        [:col {:heights [(max 1 (- height input-box-height)) input-box-height]}
         (into [:col] display-lines)
         input-box]))))
 ;; ============================================================
 ;; Update
@@ -272,9 +301,14 @@
    :tool      (update model :messages conj {:role :tool :content (:label event)})
    :diff      (update model :messages conj {:role :diff :content (:content event)})
    :error     (update model :messages conj {:role :error :content (:message event)})
-    :done  (let [m (assoc model
+    :context-info
-                          :agent-running? false
+    (assoc model :context-info
-                          :conversation (:conversation event))]
+           {:prompt_tokens (:prompt_tokens event)
            :max_tokens (:max_tokens event)
            :percentage (:percentage event)})
    :done      (let [m (-> model
                           (assoc :agent-running? false
                                  :conversation (:conversation event)))]
                 (save-current-session! m)
                 m)
    model))
@@ -310,6 +344,40 @@
                      (update :messages conj {:role :assistant :content listing})
                      (assoc :input "" :scroll-offset 0))})
        ;; Built-in: /model — list or switch models
        (str/starts-with? text "/model")
        (let [arg (str/trim (subs text (count "/model")))
              switch? (seq arg)]
          (if switch?
            (do
              (core/set-model! arg)
              {:model (-> model
                          (update :messages conj {:role :user :content text})
                          (update :messages conj {:role :assistant :content (str "Switched to model: **" arg "**")})
                          (assoc :input "" :scroll-offset 0))})
            (let [result (try
                           (let [models (core/list-models)
                                 current @core/model
                                 lines (map (fn [{:keys [id state context]}]
                                              (let [active? (= id current)
                                                    ctx (when context
                                                          (str " [" (int (/ context 1000)) "k ctx]"))
                                                    loaded? (= state "loaded")
                                                    tag (cond active? " ← current"
                                                              loaded? " (loaded)"
                                                              :else "")]
                                                (str "- " (when active? "**") id (when active? "**")
                                                     ctx tag)))
                                            models)]
                             (str "**Available models** (use `/model <name>` to switch):\n\n"
                                  (str/join "\n" lines)))
                           (catch Exception e
                             (str "Error: " (.getMessage e))))]
              {:model (-> model
                          (update :messages conj {:role :user :content text})
                          (update :messages conj {:role :assistant :content result})
                          (assoc :input "" :scroll-offset 0))})))
        ;; Skill expansion or normal message → send to LLM
        :else
        (let [expanded (context/expand-skill text (:skills model))
@@ -414,7 +482,22 @@
      (println "  agent \"fix the bug\"       Start with an initial prompt")
      (println "  agent --continue          Resume the last session")
      (System/exit 0))
-    (let [;; Load project context and skills
+    (let [;; Initialize model registry and select model: env > config > first-available
          _ (try
              (let [available (core/list-models)
                    env-model (System/getenv "AGENT_MODEL")
                    config-model (when-not env-model (:model (core/load-config)))
                    preferred (or env-model config-model)
                    in-registry? #(or (get @core/model-registry %)
                                      (some (fn [[id _]] (str/starts-with? id (str % ":")))
                                            @core/model-registry))
                    chosen (if (and preferred (in-registry? preferred))
                             preferred
                             (:id (or (first (filter #(= (:state %) "loaded") available))
                                      (first available))))]
                (when chosen (core/set-model! chosen)))
              (catch Exception _))
          ;; Load project context and skills
          project-context (context/load-project-context)
          skills (context/load-skills)
          _ (reset! core/skills-atom skills)
@@ -450,7 +533,8 @@
                         :agent-running? start?
                         :agent-handle agent-handle
                         :spinner-frame 0
-                         :scroll-offset 0}
+                         :scroll-offset 0
                         :context-info nil}
          initial-events (when start?
                           [(ev/delayed-event 100 {:type :poll})
                            (ev/delayed-event 80 {:type :spinner})])]
@@ -17,9 +17,41 @@
 ;; ============================================================
 (def ollama-host (or (System/getenv "OLLAMA_HOST") "http://localhost:11434"))
-(def model (or (System/getenv "AGENT_MODEL") "qwen3-coder-next"))
+(def lm-studio-host
  (or (System/getenv "LM_STUDIO_HOST")
      (str/replace ollama-host #":\d+$" ":1234")))
 (def model (atom (or (System/getenv "AGENT_MODEL") "qwen3.6")))
 (def active-host (atom ollama-host))
 (def active-api (atom :ollama))
 (def model-registry (atom {}))
 (def max-tokens 131072)
 ;; ============================================================
 ;; Config Persistence
 ;; ============================================================
 (def config-dir
  (let [home (System/getProperty "user.home")]
    (.getPath (io/file home ".config" "agent0"))))
 (def config-file
  (io/file config-dir "agent0.edn"))
 (defn load-config []
  (when (.exists config-file)
    (try (read-string (slurp config-file))
         (catch Exception _ nil))))
 (defn update-config! [f]
  (.mkdirs (io/file config-dir))
  (locking #'config-file
    (let [current (when (.exists config-file)
                    (try (read-string (slurp config-file)) (catch Exception _ nil)))
          updated  (f (or current {}))
          tmp      (io/file config-dir "agent0.edn.tmp")]
      (spit tmp (pr-str updated))
      (fs/move tmp config-file {:replace-existing true}))))
 (def base-system-prompt
  "You are a helpful coding assistant. You can read, list, create, edit, search, and find files to help the user with their coding tasks.
@@ -44,16 +76,27 @@ For predefined workflows:
 Always explain what you're doing before using tools. Use the tools when needed to complete the task.")
 (defn build-system-prompt
-  "Build the full system prompt by appending project context (if any) and loaded skills to the base prompt."
+  "Build the full system prompt by appending project context (if any), loaded skills, and available models."
  [project-context skills]
  (let [skills-section (when (seq skills)
                         (let [listing (context/format-skill-list skills)]
                           (str "## Available Skills\n\n"
                                "The following skills are loaded and ready to use via the skills tool with action 'run':\n\n"
-                                listing)))]
+                                listing)))
        models-section (when (seq @model-registry)
                         (let [current @model
                               lines (map (fn [[id {:keys [context state]}]]
                                            (str "- " (if (= id current) (str "**" id "** (active)") id)
                                                 (when context (str " — " (int (/ context 1000)) "k ctx"))
                                                 (when (= state "loaded") " [loaded]")))
                                          @model-registry)]
                           (str "## Available Models\n\n"
                                "Use `/model <name>` to switch. Current: **" current "**\n\n"
                                (str/join "\n" lines))))]
    (cond-> base-system-prompt
      project-context (str "\n\n" project-context)
-      skills-section  (str "\n\n" skills-section))))
+      skills-section  (str "\n\n" skills-section)
      models-section  (str "\n\n" models-section))))
 ;; ============================================================
 ;; Logging
@@ -93,7 +136,7 @@ Always explain what you're doing before using tools. Use the tools when needed t
    (let [f (io/file dir (str session-id ".edn"))
          session (merge {:id session-id
                          :updated (str (Instant/now))
-                          :model model}
+                          :model @model}
                         (when-not (:created data)
                           {:created (str (Instant/now))})
                         data)]
@@ -307,14 +350,99 @@ Always explain what you're doing before using tools. Use the tools when needed t
  "Timeout for LLM API calls in milliseconds (5 minutes)."
  (* 5 60 1000))
-(defn- call-llm* [sys-prompt tool-defs messages]
+;; LM Studio requires HTTP/1.1 — Java's HttpClient defaults to HTTP/2 which
-  (let [body {:model model
+;; hangs against LM Studio's Express-based server over Tailscale.
 (def ^:private http1-client
  (-> (java.net.http.HttpClient/newBuilder)
      (.version java.net.http.HttpClient$Version/HTTP_1_1)
      .build))
 (defn- ipv4-url
  "Resolve the host in a URL to an IPv4 address. Works around JVM preferring
   IPv6 for hosts that LM Studio / other servers only listen on IPv4."
  [url]
  (try
    (let [uri (java.net.URI. url)
          host (.getHost uri)
          port (.getPort uri)
          ipv4 (->> (java.net.InetAddress/getAllByName host)
                    (filter #(instance? java.net.Inet4Address %))
                    first)]
      (if ipv4
        (str (.getScheme uri) "://" (.getHostAddress ipv4)
             (when (pos? port) (str ":" port))
             (.getPath uri)
             (when (.getQuery uri) (str "?" (.getQuery uri))))
        url))
    (catch Exception _ url)))
 (defn list-models
  "Fetch available models from Ollama and LM Studio.
   Returns a sequence of {:id :host :api :state :context} maps and updates model-registry."
  []
  (let [from-ollama
        (try
          (let [resp (http/get (str ollama-host "/api/tags") {:timeout 5000})
                data (json/parse-string (:body resp) true)]
            (mapv (fn [m] {:id (:name m) :host ollama-host :api :ollama :state "loaded"})
                  (:models data)))
          (catch Exception _ nil))
        from-lmstudio
        (try
          (let [resp (http/get (ipv4-url (str lm-studio-host "/api/v0/models")) {:client http1-client :timeout 5000})
                data (json/parse-string (:body resp) true)]
            (->> (:data data)
                 (remove #(= (:type %) "embeddings"))
                 (mapv (fn [m] {:id (:id m) :host lm-studio-host :api :openai
                                :state (:state m)
                                :context (or (:loaded_context_length m)
                                             (:max_context_length m))}))))
          (catch Exception _ nil))
        all (concat from-ollama from-lmstudio)]
    (when (seq all)
      (reset! model-registry (into {} (map (fn [m] [(:id m) m]) all))))
    (or (seq all)
        (throw (ex-info (str "No models found at " ollama-host " or " lm-studio-host) {})))))
 (defn set-model!
  "Switch the active model, updating the chat backend to match, and persisting to config."
  [new-model]
  (reset! model new-model)
  (let [info (or (get @model-registry new-model)
                 ;; Ollama appends :latest — try prefix match as fallback
                 (some (fn [[id v]] (when (str/starts-with? id (str new-model ":")) v))
                       @model-registry))]
    (when info
      (reset! active-host (:host info))
      (reset! active-api (:api info))))
  (update-config! #(assoc % :model new-model)))
 (defn- call-llm-ollama* [sys-prompt tool-defs messages]
  (let [body {:model @model
              :options {:num_predict max-tokens}
              :messages (into [{:role "system" :content sys-prompt}] messages)
              :tools tool-defs
              :stream false}
-        response (http/post (str ollama-host "/api/chat")
+        response (http/post (str @active-host "/api/chat")
                            {:headers {"Content-Type" "application/json"}
                             :body (json/generate-string body)
                             :timeout llm-timeout-ms})
        result (json/parse-string (:body response) true)]
    {:choices [{:message (:message result)
                :finish_reason (if (seq (get-in result [:message :tool_calls]))
                                 "tool_calls"
                                 "stop")}]
     :usage {:prompt_tokens (:prompt_eval_count result)
             :completion_tokens (:eval_count result)}}))
 (defn- call-llm-openai* [sys-prompt tool-defs messages]
  (let [body {:model @model
              :messages (into [{:role "system" :content sys-prompt}] messages)
              :tools tool-defs
              :stream false}
        response (http/post (ipv4-url (str @active-host "/v1/chat/completions"))
                            {:client http1-client
                             :headers {"Content-Type" "application/json"}
                             :body (json/generate-string body)
                             :timeout llm-timeout-ms})]
    (json/parse-string (:body response) true)))
@@ -350,7 +478,7 @@ Always explain what you're doing before using tools. Use the tools when needed t
      (if (>= iteration max-iter)
        (do (log log-file "  [subagent] max iterations reached")
            (str/join "\n\n" texts))
-        (let [result (call-llm* system-prompt tool-defs messages)
+        (let [result (call-llm-ollama* system-prompt tool-defs messages)
              message (:message result)
              content (:content message)
              tool-calls (:tool_calls message)
@@ -511,13 +639,9 @@ Always explain what you're doing before using tools. Use the tools when needed t
 ;; ============================================================
 (defn call-llm [system-prompt messages]
-  (let [result (call-llm* system-prompt tool-definitions messages)]
+  (if (= @active-api :openai)
-    {:choices [{:message (:message result)
+    (call-llm-openai* system-prompt tool-definitions messages)
-                :finish_reason (if (seq (get-in result [:message :tool_calls]))
+    (call-llm-ollama* system-prompt tool-definitions messages)))
                                 "tool_calls"
                                 "stop")}]
     :usage {:prompt_tokens (:prompt_eval_count result)
             :completion_tokens (:eval_count result)}}))
 ;; ============================================================
 ;; Tool Execution
@@ -600,24 +724,20 @@ Always explain what you're doing before using tools. Use the tools when needed t
 (defn- detect-stuck-loop
  "Detects three kinds of stuck loops:
   1. Exact repeat: identical tool calls N times in a row (hard stop)
-   2. Name cycle: same pattern of tool names repeating with varying args (hard stop)
+   2. Research loop: delegate called N+ times with varying args (nudge, then stop)
   3. Research loop: delegate called N+ times with varying args (nudge, then stop)
   Returns {:signatures, :stuck? (tool name or nil), :nudge? bool}"
  [tool-calls previous-signatures repeat-threshold]
  (let [current-sigs (mapv tool-call-signature tool-calls)
        all-sigs (conj previous-signatures current-sigs)
-        ;; Exact repeat detection (existing behavior)
+        ;; Exact repeat detection - only flag when ALL sigs are identical
        ;; This prevents false positives from reading multiple files or running different git commands
        exact-stuck? (when (>= (count all-sigs) repeat-threshold)
                       (let [recent (take-last repeat-threshold all-sigs)]
                         (when (apply = recent)
                           (ffirst (last recent)))))
-        ;; Name cycle detection: same tool name pattern repeating with different args
+        ;; For research loop detection, we need to track tool names separately
-        ;; Catches e.g. [rm, create_file, rm, create_file] even when file content varies
+        ;; since each delegate call will have different args (different tasks)
        names-history (mapv (fn [sigs] (mapv first sigs)) all-sigs)
        ;; 2 full repetitions of a cycle is enough to detect (e.g. rm,create,rm,create)
        name-cycle? (when-not exact-stuck?
                      (detect-name-cycle names-history 2))
        ;; Research loop detection: delegate called N+ times with varying args
        name-sets (mapv set names-history)
        research-tools #{"delegate"}
        consecutive-research
@@ -628,12 +748,9 @@ Always explain what you're doing before using tools. Use the tools when needed t
        hard-research-limit 6]
    {:signatures all-sigs
     :stuck? (or exact-stuck?
                 (when name-cycle?
                   (str "name cycle (length " name-cycle? ")"))
                 (when (>= consecutive-research hard-research-limit)
                   "web_search"))
     :nudge? (and (not exact-stuck?)
                  (not name-cycle?)
                  (= consecutive-research nudge-threshold))}))
 ;; ============================================================
@@ -641,18 +758,20 @@ Always explain what you're doing before using tools. Use the tools when needed t
 ;; ============================================================
 (defn- get-model-context-length
-  "Query Ollama for the model's context window size. Returns nil on failure."
+  "Returns the active model's context window size. Uses model-registry for LM Studio models,
   queries Ollama /api/show for Ollama models."
  []
  (if (= @active-api :openai)
    (get-in @model-registry [@model :context])
    (try
-    (let [response (http/post (str ollama-host "/api/show")
+      (let [response (http/post (str @active-host "/api/show")
                                {:headers {"Content-Type" "application/json"}
-                               :body (json/generate-string {:model model})
+                                 :body (json/generate-string {:model @model})
                                 :timeout 5000})
            result (json/parse-string (:body response) true)
            info (:model_info result)]
      ;; Context length key varies by architecture (e.g. "qwen3next.context_length", "llama.context_length")
        (some (fn [[k v]] (when (str/ends-with? (name k) ".context_length") v)) info))
-    (catch Exception _ nil)))
+      (catch Exception _ nil))))
 (defn- trim-messages
  "Reduce conversation size by truncating old tool result content.
@@ -685,7 +804,7 @@ Always explain what you're doing before using tools. Use the tools when needed t
  (let [log-file (init-log)
        cancelled? (atom false)
        context-length (get-model-context-length)]
-    (log log-file "Agent loop started | model:" model "| messages:" (count conversation)
+    (log log-file "Agent loop started | model:" @model "| messages:" (count conversation)
         (if context-length (str "| context_length: " context-length) ""))
    {:cancel! cancelled?
     :future
@@ -694,7 +813,8 @@ Always explain what you're doing before using tools. Use the tools when needed t
         (loop [messages conversation
                iteration 0
                tool-sigs []
-                retries 0]
+                retries 0
                accum-tokens 0]
           (cond
             @cancelled?
             (do
@@ -725,7 +845,7 @@ Always explain what you're doing before using tools. Use the tools when needed t
                             trimmed (trim-messages messages keep-n)]
                         (log log-file "LLM call failed:" (str error) "— retry" (inc retries) "with trimmed context (keeping last" keep-n ")")
                         (swap! event-queue conj {:type :error :message (str "LLM error, trimming context and retrying... (" (str error) ")")})
-                         (recur trimmed iteration tool-sigs (inc retries)))
+                         (recur trimmed iteration tool-sigs (inc retries) 0))
                       (do
                         (log log-file "LLM call failed after retries:" (str error))
                         (swap! event-queue conj {:type :error :message (str error)})
@@ -739,19 +859,29 @@ Always explain what you're doing before using tools. Use the tools when needed t
                         tool-calls (:tool_calls message)
                         ;; Context window tracking
                         prompt-tokens (get-in response [:usage :prompt_tokens])
-                         completion-tokens (get-in response [:usage :completion_tokens])]
+                         completion-tokens (get-in response [:usage :completion_tokens])
                         ;; Ollama with KV cache returns prompt_eval_count = only newly-evaluated
                         ;; tokens (not cached prefix), so accumulate across tool-call iterations.
                         ;; LM Studio / OpenAI format always returns the full prompt token count.
                         total-tokens (if (= @active-api :openai)
                                        (or prompt-tokens 0)
                                        (+ accum-tokens (or prompt-tokens 0)))
                         pct (when (and context-length (pos? total-tokens) (> context-length 0))
                               (int (* 100 (/ total-tokens context-length))))]
                     (log log-file "finish_reason:" finish-reason
                          "| tool_calls:" (count (or tool-calls []))
-                          (when prompt-tokens (str "| tokens: " prompt-tokens "/" (or context-length "?")
+                          (when (pos? total-tokens) (str "| tokens: " total-tokens "/" (or context-length "?")
-                                                   " (" (when context-length (str (int (* 100 (/ prompt-tokens context-length))) "%")) ")")))
+                                                         " (" (when pct (str pct "%")) ")")))
-                     ;; Warn user when approaching context limit
+                     ;; Log context window usage
-                     (when (and context-length prompt-tokens
+                     (when (and pct (> pct 80))
-                                (> (/ prompt-tokens context-length) 0.80))
+                       (log log-file "Context window warning:" pct "% used"))
-                       (let [pct (int (* 100 (/ prompt-tokens context-length)))]
+                     ;; Push context info to status bar
-                         (log log-file "Context window warning:" pct "% used")
+                     (when (and pct context-length)
                       (swap! event-queue conj
-                                {:type :error
+                              {:type :context-info
-                                 :message (str "Context " pct "% full (" prompt-tokens "/" context-length " tokens). Responses may degrade.")})))
+                               :prompt_tokens total-tokens
                               :max_tokens context-length
                               :percentage pct}))
                     ;; Push assistant text
                     (when (and content (seq (str/trim content)))
                       (swap! event-queue conj {:type :text :content content}))
@@ -779,14 +909,19 @@ Always explain what you're doing before using tools. Use the tools when needed t
                               (let [clean-results (mapv #(dissoc % :diff) tool-results)
                                     assistant-msg (select-keys message [:role :content :tool_calls])
                                     new-messages (into (conj messages assistant-msg) clean-results)
                                   ;; If nudge, inject a system hint to stop researching
                                     new-messages (if nudge?
                                                    (do (log log-file "Research loop nudge injected")
                                                        (conj new-messages
                                                              {:role "system"
                                                               :content "You have already performed several web searches. You have enough information to answer. Stop searching and synthesize your findings into a clear response now."}))
-                                                    new-messages)]
+                                                    new-messages)
-                                 (recur new-messages (inc iteration) signatures 0))))))
+                                     ;; Proactively trim old tool results when context is filling up
                                     [new-messages next-accum]
                                     (if (and pct (>= pct 85))
                                       (do (log log-file "Proactive context trim at" pct "%")
                                           [(trim-messages new-messages 8) 0])
                                       [new-messages total-tokens])]
                                 (recur new-messages (inc iteration) signatures 0 next-accum))))))
                       ;; Done - no more tool calls
                       (do
                         (log log-file "Agent finished after" iteration "iterations")