542 lines
24 KiB
Clojure
542 lines
24 KiB
Clojure
(ns agent.syntax
|
|
"Regex-based syntax highlighting for code blocks and diffs.
|
|
Single-pass tokenizer using java.util.regex.Matcher.lookingAt()."
|
|
(:require [clojure.string :as str])
|
|
(:import [java.util.regex Pattern Matcher]))
|
|
|
|
;; ============================================================
|
|
;; Color Palette (ANSI 256)
|
|
;; ============================================================
|
|
|
|
(def ^:private colors
|
|
{:string "\033[38;5;108m"
|
|
:comment "\033[38;5;245m"
|
|
:keyword "\033[38;5;176m"
|
|
:number "\033[38;5;216m"
|
|
:builtin "\033[38;5;75m"
|
|
:constant "\033[38;5;216m"
|
|
:type "\033[38;5;180m"
|
|
:clj-kw "\033[38;5;73m"
|
|
:param "\033[38;5;208m"})
|
|
|
|
;; ============================================================
|
|
;; Tokenizer Engine
|
|
;; ============================================================
|
|
|
|
(defn- highlight-line*
|
|
"Walk left-to-right through `line`. For each position, try rules in order;
|
|
first match wins. `rules` is a vector of [compiled-Pattern color-or-fn].
|
|
When color-or-fn is a function, it receives the matched text and returns
|
|
an ANSI color string (or nil for default).
|
|
`default-fg` is the ANSI code for unhighlighted text."
|
|
[^String line rules ^String default-fg]
|
|
(let [len (.length line)
|
|
sb (StringBuilder.)
|
|
matcher-cache (object-array (count rules))]
|
|
;; Pre-create matchers for each rule
|
|
(dotimes [i (count rules)]
|
|
(let [[^Pattern pat _] (nth rules i)]
|
|
(aset matcher-cache i (.matcher pat line))))
|
|
(loop [pos 0]
|
|
(if (>= pos len)
|
|
(.toString sb)
|
|
(let [matched?
|
|
(loop [ri 0]
|
|
(if (>= ri (count rules))
|
|
false
|
|
(let [^Matcher m (aget matcher-cache ri)
|
|
_ (.region m pos len)]
|
|
(if (.lookingAt m)
|
|
(let [[_ color-or-fn] (nth rules ri)
|
|
text (.group m)
|
|
color (if (fn? color-or-fn)
|
|
(color-or-fn text)
|
|
color-or-fn)]
|
|
(if color
|
|
(do (.append sb color)
|
|
(.append sb text)
|
|
(.append sb default-fg))
|
|
(do (.append sb text)))
|
|
(.end m))
|
|
(recur (inc ri))))))]
|
|
(if matched?
|
|
(recur (long matched?))
|
|
(do (.append sb (.charAt line pos))
|
|
(recur (inc pos)))))))))
|
|
|
|
;; ============================================================
|
|
;; Language: Clojure
|
|
;; ============================================================
|
|
|
|
(def ^:private clj-special-forms
|
|
#{"def" "defn" "defn-" "defmacro" "defmethod" "defmulti" "defonce" "defprotocol"
|
|
"defrecord" "deftype" "defstruct" "definline" "definterface"
|
|
"fn" "fn*" "if" "if-let" "if-not" "if-some"
|
|
"when" "when-let" "when-not" "when-first" "when-some"
|
|
"do" "let" "letfn" "binding" "loop" "recur"
|
|
"cond" "condp" "cond->" "cond->>" "case"
|
|
"try" "catch" "finally" "throw"
|
|
"quote" "var" "import" "require" "use" "refer" "ns"
|
|
"and" "or" "not"
|
|
"doseq" "dotimes" "doto" "dorun" "doall"
|
|
"for" "while"
|
|
"new" "set!" "monitor-enter" "monitor-exit"
|
|
"->" "->>" "as->" "some->" "some->>"})
|
|
|
|
(def ^:private clj-builtins
|
|
#{"map" "filter" "reduce" "apply" "partial" "comp" "juxt" "complement"
|
|
"mapv" "filterv" "mapcat" "keep" "remove"
|
|
"first" "second" "last" "rest" "next" "cons" "conj" "into"
|
|
"assoc" "dissoc" "update" "get" "get-in" "assoc-in" "update-in" "select-keys"
|
|
"merge" "merge-with"
|
|
"atom" "deref" "reset!" "swap!" "compare-and-set!"
|
|
"str" "subs" "format" "name" "keyword" "symbol"
|
|
"println" "print" "prn" "pr" "pr-str" "prn-str"
|
|
"count" "empty?" "seq" "seq?" "sequential?"
|
|
"vec" "vector" "vector?" "list" "list?" "set" "hash-set" "sorted-set"
|
|
"hash-map" "sorted-map" "zipmap" "frequencies" "group-by"
|
|
"keys" "vals" "contains?" "find"
|
|
"range" "repeat" "repeatedly" "iterate" "cycle" "interleave" "interpose"
|
|
"take" "drop" "take-while" "drop-while" "split-at" "split-with" "partition"
|
|
"partition-by" "partition-all"
|
|
"concat" "flatten" "distinct" "sort" "sort-by" "reverse" "shuffle"
|
|
"every?" "some" "not-every?" "not-any?"
|
|
"identity" "constantly"
|
|
"inc" "dec" "+" "-" "*" "/" "mod" "rem" "quot"
|
|
"=" "==" "not=" "<" ">" "<=" ">="
|
|
"zero?" "pos?" "neg?" "even?" "odd?" "number?" "integer?"
|
|
"nil?" "true?" "false?" "string?" "keyword?" "symbol?" "map?" "coll?" "fn?"
|
|
"type" "class" "instance?" "satisfies?" "extends?"
|
|
"meta" "with-meta" "vary-meta"
|
|
"read-string" "slurp" "spit"
|
|
"re-find" "re-matches" "re-seq" "re-pattern"
|
|
"future" "promise" "deliver" "realized?" "pmap"
|
|
"resolve" "ns-resolve" "eval"
|
|
"max" "min" "abs" "rand" "rand-int"
|
|
"nth" "nfirst" "nnext" "fnext" "ffirst"
|
|
"not-empty" "bounded-count" "transduce" "sequence"
|
|
"volatile!" "vswap!" "vreset!"
|
|
"reduced" "reduced?" "unreduced" "ensure-reduced"
|
|
"ex-info" "ex-data" "ex-message"})
|
|
|
|
(def ^:private clj-constants
|
|
#{"nil" "true" "false"})
|
|
|
|
(defn- clj-classify [text]
|
|
(cond
|
|
(contains? clj-constants text) (:constant colors)
|
|
(contains? clj-special-forms text) (:keyword colors)
|
|
(contains? clj-builtins text) (:builtin colors)
|
|
:else nil))
|
|
|
|
(def ^:private clj-rules
|
|
(mapv (fn [[re c]] [(Pattern/compile re) c])
|
|
[[";.*" (:comment colors)]
|
|
["\"(?:[^\"\\\\]|\\\\.)*\"" (:string colors)]
|
|
["#\"(?:[^\"\\\\]|\\\\.)*\"" (:string colors)]
|
|
["\\\\(?:newline|space|tab|backspace|formfeed|return|[a-zA-Z])" (:string colors)]
|
|
[":[a-zA-Z_*+!?<>=/.\\-][a-zA-Z0-9_*+!?<>=/.\\-:#]*" (:clj-kw colors)]
|
|
["-?0[xX][0-9a-fA-F]+" (:number colors)]
|
|
["-?\\d+\\.\\d+" (:number colors)]
|
|
["-?\\d+/\\d+" (:number colors)]
|
|
["-?\\d+" (:number colors)]
|
|
["##(?:Inf|-Inf|NaN)" (:constant colors)]
|
|
["[a-zA-Z_*+!?<>=/.\\-][a-zA-Z0-9_*+!?<>=/.\\-:#]*" clj-classify]]))
|
|
|
|
;; ============================================================
|
|
;; Language: JavaScript / TypeScript
|
|
;; ============================================================
|
|
|
|
(def ^:private js-keywords
|
|
#{"async" "await" "break" "case" "catch" "class" "const" "continue"
|
|
"debugger" "default" "delete" "do" "else" "export" "extends"
|
|
"finally" "for" "from" "function" "if" "import" "in" "instanceof"
|
|
"let" "new" "of" "return" "static" "super" "switch" "this"
|
|
"throw" "try" "typeof" "var" "void" "while" "with" "yield"
|
|
;; TS extras
|
|
"type" "interface" "enum" "namespace" "declare" "implements"
|
|
"abstract" "as" "readonly" "keyof" "infer"})
|
|
|
|
(def ^:private js-builtins
|
|
#{"console" "Math" "JSON" "Object" "Array" "String" "Number" "Boolean"
|
|
"Promise" "Map" "Set" "WeakMap" "WeakSet" "Symbol" "Proxy" "Reflect"
|
|
"parseInt" "parseFloat" "isNaN" "isFinite" "undefined" "NaN" "Infinity"
|
|
"require" "module" "exports" "process" "Buffer" "global" "window" "document"})
|
|
|
|
(def ^:private js-constants
|
|
#{"true" "false" "null" "undefined" "NaN" "Infinity"})
|
|
|
|
(defn- js-classify [text]
|
|
(cond
|
|
(contains? js-constants text) (:constant colors)
|
|
(contains? js-keywords text) (:keyword colors)
|
|
(contains? js-builtins text) (:builtin colors)
|
|
(and (>= (count text) 2) (Character/isUpperCase (.charAt ^String text 0))) (:type colors)
|
|
:else nil))
|
|
|
|
(def ^:private js-rules
|
|
(mapv (fn [[re c]] [(Pattern/compile re) c])
|
|
[["//.*" (:comment colors)]
|
|
["/\\*[\\s\\S]*?\\*/" (:comment colors)]
|
|
["\"(?:[^\"\\\\]|\\\\.)*\"" (:string colors)]
|
|
["'(?:[^'\\\\]|\\\\.)*'" (:string colors)]
|
|
["`(?:[^`\\\\]|\\\\.)*`" (:string colors)]
|
|
["/(?![*/])(?:[^/\\\\]|\\\\.)+/[gimsuy]*" (:string colors)]
|
|
["@[a-zA-Z_][a-zA-Z0-9_]*" (:param colors)]
|
|
["0[xX][0-9a-fA-F]+" (:number colors)]
|
|
["\\d+\\.\\d+(?:[eE][+-]?\\d+)?" (:number colors)]
|
|
["\\d+" (:number colors)]
|
|
["[a-zA-Z_$][a-zA-Z0-9_$]*" js-classify]]))
|
|
|
|
;; ============================================================
|
|
;; Language: Python
|
|
;; ============================================================
|
|
|
|
(def ^:private py-keywords
|
|
#{"and" "as" "assert" "async" "await" "break" "class" "continue"
|
|
"def" "del" "elif" "else" "except" "finally" "for" "from"
|
|
"global" "if" "import" "in" "is" "lambda" "nonlocal" "not"
|
|
"or" "pass" "raise" "return" "try" "while" "with" "yield"
|
|
"match" "case"})
|
|
|
|
(def ^:private py-builtins
|
|
#{"print" "len" "range" "int" "str" "float" "list" "dict" "set" "tuple"
|
|
"bool" "type" "isinstance" "issubclass" "hasattr" "getattr" "setattr"
|
|
"super" "property" "staticmethod" "classmethod" "enumerate" "zip"
|
|
"map" "filter" "sorted" "reversed" "any" "all" "min" "max" "sum"
|
|
"abs" "round" "input" "open" "repr" "id" "hash" "callable" "iter" "next"
|
|
"ValueError" "TypeError" "KeyError" "IndexError" "RuntimeError"
|
|
"Exception" "StopIteration" "AttributeError" "ImportError" "OSError"
|
|
"self" "cls"})
|
|
|
|
(def ^:private py-constants
|
|
#{"True" "False" "None"})
|
|
|
|
(defn- py-classify [text]
|
|
(cond
|
|
(contains? py-constants text) (:constant colors)
|
|
(contains? py-keywords text) (:keyword colors)
|
|
(contains? py-builtins text) (:builtin colors)
|
|
(and (>= (count text) 2) (Character/isUpperCase (.charAt ^String text 0))) (:type colors)
|
|
:else nil))
|
|
|
|
(def ^:private py-rules
|
|
(mapv (fn [[re c]] [(Pattern/compile re) c])
|
|
[["#.*" (:comment colors)]
|
|
["\"\"\"[\\s\\S]*?\"\"\"" (:string colors)]
|
|
["'''[\\s\\S]*?'''" (:string colors)]
|
|
["f\"(?:[^\"\\\\]|\\\\.)*\"" (:string colors)]
|
|
["f'(?:[^'\\\\]|\\\\.)*'" (:string colors)]
|
|
["\"(?:[^\"\\\\]|\\\\.)*\"" (:string colors)]
|
|
["'(?:[^'\\\\]|\\\\.)*'" (:string colors)]
|
|
["@[a-zA-Z_][a-zA-Z0-9_.]*" (:param colors)]
|
|
["0[xX][0-9a-fA-F]+" (:number colors)]
|
|
["\\d+\\.\\d+(?:[eE][+-]?\\d+)?" (:number colors)]
|
|
["\\d+" (:number colors)]
|
|
["[a-zA-Z_][a-zA-Z0-9_]*" py-classify]]))
|
|
|
|
;; ============================================================
|
|
;; Language: Java
|
|
;; ============================================================
|
|
|
|
(def ^:private java-keywords
|
|
#{"abstract" "assert" "boolean" "break" "byte" "case" "catch" "char"
|
|
"class" "const" "continue" "default" "do" "double" "else" "enum"
|
|
"extends" "final" "finally" "float" "for" "goto" "if" "implements"
|
|
"import" "instanceof" "int" "interface" "long" "native" "new"
|
|
"package" "private" "protected" "public" "return" "short" "static"
|
|
"strictfp" "super" "switch" "synchronized" "this" "throw" "throws"
|
|
"transient" "try" "var" "void" "volatile" "while" "yield" "record"
|
|
"sealed" "permits" "non-sealed"})
|
|
|
|
(def ^:private java-constants
|
|
#{"true" "false" "null"})
|
|
|
|
(defn- java-classify [text]
|
|
(cond
|
|
(contains? java-constants text) (:constant colors)
|
|
(contains? java-keywords text) (:keyword colors)
|
|
(and (>= (count text) 2) (Character/isUpperCase (.charAt ^String text 0))) (:type colors)
|
|
:else nil))
|
|
|
|
(def ^:private java-rules
|
|
(mapv (fn [[re c]] [(Pattern/compile re) c])
|
|
[["//.*" (:comment colors)]
|
|
["/\\*[\\s\\S]*?\\*/" (:comment colors)]
|
|
["\"(?:[^\"\\\\]|\\\\.)*\"" (:string colors)]
|
|
["'(?:[^'\\\\]|\\\\.)*'" (:string colors)]
|
|
["@[a-zA-Z_][a-zA-Z0-9_]*" (:param colors)]
|
|
["0[xX][0-9a-fA-F]+[lL]?" (:number colors)]
|
|
["\\d+\\.\\d+[fFdD]?" (:number colors)]
|
|
["\\d+[lLfFdD]?" (:number colors)]
|
|
["[a-zA-Z_$][a-zA-Z0-9_$]*" java-classify]]))
|
|
|
|
;; ============================================================
|
|
;; Language: Kotlin
|
|
;; ============================================================
|
|
|
|
(def ^:private kt-keywords
|
|
#{"abstract" "annotation" "as" "break" "by" "catch" "class" "companion"
|
|
"const" "constructor" "continue" "crossinline" "data" "do" "else" "enum"
|
|
"expect" "external" "final" "finally" "for" "fun" "get" "if" "import"
|
|
"in" "infix" "init" "inline" "inner" "interface" "internal" "is"
|
|
"lateinit" "noinline" "object" "open" "operator" "out" "override"
|
|
"package" "private" "protected" "public" "reified" "return" "sealed"
|
|
"set" "super" "suspend" "tailrec" "this" "throw" "try" "typealias"
|
|
"val" "var" "vararg" "when" "where" "while" "yield"})
|
|
|
|
(def ^:private kt-builtins
|
|
#{"println" "print" "listOf" "mutableListOf" "mapOf" "mutableMapOf"
|
|
"setOf" "mutableSetOf" "arrayOf" "intArrayOf" "emptyList" "emptyMap"
|
|
"require" "check" "error" "TODO" "repeat" "run" "with" "apply" "also" "let"
|
|
"takeIf" "takeUnless" "lazy" "coroutineScope" "launch" "async"
|
|
"String" "Int" "Long" "Double" "Float" "Boolean" "Char" "Unit" "Any" "Nothing"})
|
|
|
|
(def ^:private kt-constants
|
|
#{"true" "false" "null"})
|
|
|
|
(defn- kt-classify [text]
|
|
(cond
|
|
(contains? kt-constants text) (:constant colors)
|
|
(contains? kt-keywords text) (:keyword colors)
|
|
(contains? kt-builtins text) (:builtin colors)
|
|
(and (>= (count text) 2) (Character/isUpperCase (.charAt ^String text 0))) (:type colors)
|
|
:else nil))
|
|
|
|
(def ^:private kt-rules
|
|
(mapv (fn [[re c]] [(Pattern/compile re) c])
|
|
[["//.*" (:comment colors)]
|
|
["/\\*[\\s\\S]*?\\*/" (:comment colors)]
|
|
["\"\"\"[\\s\\S]*?\"\"\"" (:string colors)]
|
|
["\"(?:[^\"\\\\]|\\\\.)*\"" (:string colors)]
|
|
["'(?:[^'\\\\]|\\\\.)*'" (:string colors)]
|
|
["@[a-zA-Z_][a-zA-Z0-9_]*" (:param colors)]
|
|
["0[xX][0-9a-fA-F]+[lL]?" (:number colors)]
|
|
["\\d+\\.\\d+[fFdD]?" (:number colors)]
|
|
["\\d+[lLfFdD]?" (:number colors)]
|
|
["[a-zA-Z_][a-zA-Z0-9_]*" kt-classify]]))
|
|
|
|
;; ============================================================
|
|
;; Language: Rust
|
|
;; ============================================================
|
|
|
|
(def ^:private rust-keywords
|
|
#{"as" "async" "await" "break" "const" "continue" "crate" "dyn"
|
|
"else" "enum" "extern" "fn" "for" "if" "impl" "in"
|
|
"let" "loop" "match" "mod" "move" "mut" "pub" "ref"
|
|
"return" "self" "Self" "static" "struct" "super" "trait" "type"
|
|
"unsafe" "use" "where" "while" "yield" "macro_rules"})
|
|
|
|
(def ^:private rust-builtins
|
|
#{"println" "eprintln" "format" "vec" "panic" "assert" "assert_eq"
|
|
"assert_ne" "debug_assert" "todo" "unimplemented" "unreachable"
|
|
"cfg" "derive" "include" "include_str" "env" "concat" "stringify"
|
|
"Some" "None" "Ok" "Err" "Box" "Rc" "Arc" "Vec" "String"
|
|
"Option" "Result" "HashMap" "HashSet" "BTreeMap" "BTreeSet"
|
|
"Iterator" "IntoIterator" "From" "Into" "TryFrom" "TryInto"
|
|
"Display" "Debug" "Clone" "Copy" "Default" "PartialEq" "Eq"
|
|
"PartialOrd" "Ord" "Hash" "Send" "Sync" "Sized" "Drop" "Fn" "FnMut" "FnOnce"})
|
|
|
|
(def ^:private rust-constants
|
|
#{"true" "false"})
|
|
|
|
(defn- rust-classify [text]
|
|
(cond
|
|
(contains? rust-constants text) (:constant colors)
|
|
(contains? rust-keywords text) (:keyword colors)
|
|
(contains? rust-builtins text) (:builtin colors)
|
|
(and (>= (count text) 2) (Character/isUpperCase (.charAt ^String text 0))) (:type colors)
|
|
:else nil))
|
|
|
|
(def ^:private rust-rules
|
|
(mapv (fn [[re c]] [(Pattern/compile re) c])
|
|
[["//.*" (:comment colors)]
|
|
["/\\*[\\s\\S]*?\\*/" (:comment colors)]
|
|
["r#\"[^\"]*\"#" (:string colors)]
|
|
["r\"[^\"]*\"" (:string colors)]
|
|
["\"(?:[^\"\\\\]|\\\\.)*\"" (:string colors)]
|
|
["'[a-zA-Z_][a-zA-Z0-9_]*" (:param colors)] ;; lifetimes
|
|
["'(?:[^'\\\\]|\\\\.)*'" (:string colors)] ;; char literals
|
|
["[a-zA-Z_][a-zA-Z0-9_]*!" (:builtin colors)] ;; macros
|
|
["0[xX][0-9a-fA-F_]+" (:number colors)]
|
|
["0[bB][01_]+" (:number colors)]
|
|
["0[oO][0-7_]+" (:number colors)]
|
|
["\\d[\\d_]*\\.\\d[\\d_]*(?:[eE][+-]?\\d+)?(?:f32|f64)?" (:number colors)]
|
|
["\\d[\\d_]*(?:u8|u16|u32|u64|u128|usize|i8|i16|i32|i64|i128|isize|f32|f64)?" (:number colors)]
|
|
["[a-zA-Z_][a-zA-Z0-9_]*" rust-classify]]))
|
|
|
|
;; ============================================================
|
|
;; Language: Bash
|
|
;; ============================================================
|
|
|
|
(def ^:private bash-keywords
|
|
#{"if" "then" "else" "elif" "fi" "for" "while" "until" "do" "done"
|
|
"case" "esac" "in" "function" "select" "time" "coproc"
|
|
"return" "exit" "break" "continue" "shift" "trap"
|
|
"local" "export" "declare" "typeset" "readonly" "unset"})
|
|
|
|
(def ^:private bash-builtins
|
|
#{"echo" "printf" "read" "cd" "pwd" "ls" "cp" "mv" "rm" "mkdir" "rmdir"
|
|
"cat" "grep" "sed" "awk" "find" "sort" "uniq" "wc" "head" "tail"
|
|
"chmod" "chown" "curl" "wget" "tar" "gzip" "gunzip" "zip" "unzip"
|
|
"git" "docker" "make" "ssh" "scp" "rsync"
|
|
"test" "true" "false" "source" "eval" "exec" "set" "env"})
|
|
|
|
(defn- bash-classify [text]
|
|
(cond
|
|
(contains? bash-keywords text) (:keyword colors)
|
|
(contains? bash-builtins text) (:builtin colors)
|
|
:else nil))
|
|
|
|
(def ^:private bash-rules
|
|
(mapv (fn [[re c]] [(Pattern/compile re) c])
|
|
[["#.*" (:comment colors)]
|
|
["\"(?:[^\"\\\\]|\\\\.)*\"" (:string colors)]
|
|
["'[^']*'" (:string colors)]
|
|
["\\$\\{[^}]+\\}" (:param colors)]
|
|
["\\$[a-zA-Z_][a-zA-Z0-9_]*" (:param colors)]
|
|
["\\$[0-9@#?!$*-]" (:param colors)]
|
|
["\\d+" (:number colors)]
|
|
["[a-zA-Z_][a-zA-Z0-9_]*" bash-classify]]))
|
|
|
|
;; ============================================================
|
|
;; Language: JSON
|
|
;; ============================================================
|
|
|
|
(def ^:private json-rules
|
|
(mapv (fn [[re c]] [(Pattern/compile re) c])
|
|
[["\"(?:[^\"\\\\]|\\\\.)*\"\\s*:" (:clj-kw colors)] ;; keys
|
|
["\"(?:[^\"\\\\]|\\\\.)*\"" (:string colors)]
|
|
["-?\\d+\\.\\d+(?:[eE][+-]?\\d+)?" (:number colors)]
|
|
["-?\\d+" (:number colors)]
|
|
["\\b(?:true|false)\\b" (:constant colors)]
|
|
["\\bnull\\b" (:constant colors)]]))
|
|
|
|
;; ============================================================
|
|
;; Language: Generic (Go, C, C++, Ruby, CSS, etc.)
|
|
;; ============================================================
|
|
|
|
(def ^:private generic-keywords
|
|
#{"if" "else" "for" "while" "do" "switch" "case" "default" "break"
|
|
"continue" "return" "goto" "try" "catch" "throw" "finally"
|
|
"class" "struct" "enum" "interface" "extends" "implements"
|
|
"public" "private" "protected" "static" "const" "final" "abstract"
|
|
"virtual" "override" "new" "delete" "this" "self" "super"
|
|
"import" "export" "package" "module" "use" "require" "include"
|
|
"void" "int" "long" "float" "double" "char" "bool" "string"
|
|
"var" "let" "val" "def" "fn" "func" "fun" "function"
|
|
"type" "typedef" "namespace" "template" "typename"
|
|
"async" "await" "yield" "defer" "select" "chan" "go"
|
|
"begin" "end" "then" "elsif" "unless" "rescue" "ensure" "raise"})
|
|
|
|
(def ^:private generic-constants
|
|
#{"true" "false" "nil" "null" "none" "None" "True" "False" "NULL"
|
|
"undefined" "NaN" "Infinity"})
|
|
|
|
(defn- generic-classify [text]
|
|
(cond
|
|
(contains? generic-constants text) (:constant colors)
|
|
(contains? generic-keywords text) (:keyword colors)
|
|
(and (>= (count text) 2) (Character/isUpperCase (.charAt ^String text 0))) (:type colors)
|
|
:else nil))
|
|
|
|
(def ^:private generic-rules
|
|
(mapv (fn [[re c]] [(Pattern/compile re) c])
|
|
[["//.*" (:comment colors)]
|
|
["#.*" (:comment colors)]
|
|
["/\\*[\\s\\S]*?\\*/" (:comment colors)]
|
|
["\"(?:[^\"\\\\]|\\\\.)*\"" (:string colors)]
|
|
["'(?:[^'\\\\]|\\\\.)*'" (:string colors)]
|
|
["`(?:[^`\\\\]|\\\\.)*`" (:string colors)]
|
|
["@[a-zA-Z_][a-zA-Z0-9_]*" (:param colors)]
|
|
["0[xX][0-9a-fA-F]+" (:number colors)]
|
|
["\\d+\\.\\d+(?:[eE][+-]?\\d+)?" (:number colors)]
|
|
["\\d+" (:number colors)]
|
|
["[a-zA-Z_][a-zA-Z0-9_]*" generic-classify]]))
|
|
|
|
;; ============================================================
|
|
;; Language Registry
|
|
;; ============================================================
|
|
|
|
(def ^:private lang-rules
|
|
{:clojure clj-rules
|
|
:javascript js-rules
|
|
:python py-rules
|
|
:java java-rules
|
|
:kotlin kt-rules
|
|
:rust rust-rules
|
|
:bash bash-rules
|
|
:json json-rules
|
|
:generic generic-rules})
|
|
|
|
(def ^:private fence-tag->lang
|
|
{"clojure" :clojure "clj" :clojure "cljs" :clojure "edn" :clojure
|
|
"javascript" :javascript "js" :javascript "typescript" :javascript "ts" :javascript
|
|
"jsx" :javascript "tsx" :javascript
|
|
"python" :python "py" :python
|
|
"java" :java
|
|
"kotlin" :kotlin "kt" :kotlin
|
|
"rust" :rust "rs" :rust
|
|
"bash" :bash "sh" :bash "shell" :bash "zsh" :bash
|
|
"json" :json "jsonc" :json
|
|
"go" :generic "c" :generic "cpp" :generic "c++" :generic
|
|
"ruby" :generic "rb" :generic
|
|
"css" :generic "scss" :generic "less" :generic
|
|
"html" :generic "xml" :generic "svg" :generic
|
|
"yaml" :generic "yml" :generic "toml" :generic
|
|
"sql" :generic "graphql" :generic "gql" :generic
|
|
"lua" :generic "perl" :generic "r" :generic
|
|
"swift" :generic "scala" :generic "groovy" :generic
|
|
"haskell" :generic "hs" :generic "elixir" :generic "ex" :generic
|
|
"erlang" :generic "erl" :generic
|
|
"zig" :generic "nim" :generic "ocaml" :generic "ml" :generic
|
|
"dart" :generic "php" :generic
|
|
"dockerfile" :generic "makefile" :generic
|
|
"diff" :generic "patch" :generic})
|
|
|
|
(def ^:private ext->lang
|
|
{".clj" :clojure ".cljs" :clojure ".cljc" :clojure ".edn" :clojure ".bb" :clojure
|
|
".js" :javascript ".jsx" :javascript ".ts" :javascript ".tsx" :javascript ".mjs" :javascript
|
|
".py" :python ".pyw" :python
|
|
".java" :java
|
|
".kt" :kotlin ".kts" :kotlin
|
|
".rs" :rust
|
|
".sh" :bash ".bash" :bash ".zsh" :bash
|
|
".json" :json ".jsonc" :json
|
|
".go" :generic ".c" :generic ".h" :generic ".cpp" :generic ".hpp" :generic ".cc" :generic
|
|
".rb" :generic ".css" :generic ".scss" :generic ".less" :generic
|
|
".html" :generic ".xml" :generic ".svg" :generic
|
|
".yaml" :generic ".yml" :generic ".toml" :generic
|
|
".sql" :generic ".lua" :generic ".pl" :generic ".r" :generic
|
|
".swift" :generic ".scala" :generic ".groovy" :generic
|
|
".hs" :generic ".ex" :generic ".exs" :generic ".erl" :generic
|
|
".zig" :generic ".nim" :generic ".ml" :generic
|
|
".dart" :generic ".php" :generic})
|
|
|
|
;; ============================================================
|
|
;; Public API
|
|
;; ============================================================
|
|
|
|
(defn lang-for-fence
|
|
"Map a code fence tag (e.g. \"clojure\", \"js\") to a language keyword."
|
|
[tag]
|
|
(when tag
|
|
(get fence-tag->lang (str/lower-case (str/trim tag)))))
|
|
|
|
(defn lang-for-ext
|
|
"Map a file extension (e.g. \".clj\", \".rs\") to a language keyword."
|
|
[ext]
|
|
(when ext
|
|
(get ext->lang (str/lower-case ext))))
|
|
|
|
(defn highlight-line
|
|
"Syntax-highlight a single line of code. Returns string with ANSI fg codes.
|
|
`lang` — keyword like :clojure, :javascript, etc. (nil = no highlighting)
|
|
`default-fg` — ANSI code for unhighlighted text (\"\" for terminal default,
|
|
or e.g. \"\\033[38;5;210m\" for diff removed lines).
|
|
Caller should append \\033[0m after the returned string."
|
|
[line lang default-fg]
|
|
(if-let [rules (get lang-rules lang)]
|
|
(str default-fg (highlight-line* line rules (or default-fg "")))
|
|
line))
|