Bootstrap compiler (reader, analyzer, transformer, compiler, Mix plugin), core protocols (16 protocols for Map/List/Tuple/BitString), PersistentVector (bit-partitioned trie), domain tools (clojurify/elixirify), BEAM concurrency (receive, spawn, GenServer), control flow & macros (threading, try/catch, destructuring, defmacro with quasiquote/auto-gensym), and Malli schema adapter (m/=> specs, auto @type, recursive schemas, cross-references). 537 compiler tests + 55 Malli unit tests + 15 integration tests = 607 total. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
627 lines
16 KiB
Elixir
627 lines
16 KiB
Elixir
defmodule CljElixir.Analyzer do
|
|
@moduledoc """
|
|
AST analyzer and validator for CljElixir.
|
|
|
|
Performs lightweight static analysis on CljElixir AST forms (output of the Reader)
|
|
before they are passed to the Transformer. Catches common structural errors early
|
|
with clear diagnostic messages.
|
|
|
|
## Validations
|
|
|
|
1. **Special form arity** - `defmodule` needs name + body, `let` needs a vector
|
|
with an even number of binding pairs, `if` needs 2-3 args, `case` needs a
|
|
subject + even pattern/body pairs, `cond` needs even pairs, `loop` needs a
|
|
vector with even binding pairs.
|
|
|
|
2. **Map literal validation** - Maps must have an even number of forms (key-value pairs).
|
|
|
|
3. **`recur` position** - `recur` must appear in tail position. In `if`/`case`/`cond`,
|
|
the tail position is the last expression of each branch. In `let`/`do`, the tail
|
|
position is the last expression.
|
|
|
|
4. **Nested `recur`** - `recur` inside a nested `loop` should only refer to the
|
|
innermost loop, not an outer one.
|
|
|
|
## Return Value
|
|
|
|
Returns `{:ok, forms}` when the AST is valid (passes forms through unchanged),
|
|
or `{:error, diagnostics}` when errors are found.
|
|
|
|
Diagnostics are maps with keys: `:severity`, `:message`, `:line`, `:col`.
|
|
"""
|
|
|
|
@type diagnostic :: %{
|
|
severity: :error | :warning,
|
|
message: String.t(),
|
|
line: non_neg_integer(),
|
|
col: non_neg_integer()
|
|
}
|
|
|
|
@doc """
|
|
Analyze and validate a list of CljElixir AST forms.
|
|
|
|
Returns `{:ok, forms}` if all validations pass, or `{:error, diagnostics}`
|
|
with a list of diagnostic maps describing the errors found.
|
|
"""
|
|
@spec analyze(list()) :: {:ok, list()} | {:error, [diagnostic()]}
|
|
def analyze(forms) when is_list(forms) do
|
|
diagnostics =
|
|
forms
|
|
|> Enum.flat_map(fn form -> validate_form(form, %{tail: true, in_loop: false, in_fn: false}) end)
|
|
|
|
case Enum.filter(diagnostics, &(&1.severity == :error)) do
|
|
[] -> {:ok, forms}
|
|
_errors -> {:error, diagnostics}
|
|
end
|
|
end
|
|
|
|
def analyze(form) do
|
|
analyze(List.wrap(form))
|
|
end
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Form validation - dispatches on the head of each s-expression
|
|
# ---------------------------------------------------------------------------
|
|
|
|
# A list form starting with an atom is an s-expression: (special-form ...)
|
|
defp validate_form({:list, meta, [{:symbol, _, "defmodule"} | args]}, ctx) do
|
|
validate_defmodule(args, meta, ctx)
|
|
end
|
|
|
|
defp validate_form({:list, meta, [{:symbol, _, "defn"} | args]}, ctx) do
|
|
validate_defn(args, meta, ctx)
|
|
end
|
|
|
|
defp validate_form({:list, meta, [{:symbol, _, "defn-"} | args]}, ctx) do
|
|
validate_defn(args, meta, ctx)
|
|
end
|
|
|
|
defp validate_form({:list, meta, [{:symbol, _, "fn"} | args]}, ctx) do
|
|
validate_fn(args, meta, ctx)
|
|
end
|
|
|
|
defp validate_form({:list, meta, [{:symbol, _, "let"} | args]}, ctx) do
|
|
validate_let(args, meta, ctx)
|
|
end
|
|
|
|
defp validate_form({:list, meta, [{:symbol, _, "if"} | args]}, ctx) do
|
|
validate_if(args, meta, ctx)
|
|
end
|
|
|
|
defp validate_form({:list, meta, [{:symbol, _, "case"} | args]}, ctx) do
|
|
validate_case(args, meta, ctx)
|
|
end
|
|
|
|
defp validate_form({:list, meta, [{:symbol, _, "cond"} | args]}, ctx) do
|
|
validate_cond(args, meta, ctx)
|
|
end
|
|
|
|
defp validate_form({:list, meta, [{:symbol, _, "loop"} | args]}, ctx) do
|
|
validate_loop(args, meta, ctx)
|
|
end
|
|
|
|
defp validate_form({:list, meta, [{:symbol, _, "recur"} | _args]}, ctx) do
|
|
validate_recur(meta, ctx)
|
|
end
|
|
|
|
defp validate_form({:list, meta, [{:symbol, _, "do"} | args]}, ctx) do
|
|
validate_do(args, meta, ctx)
|
|
end
|
|
|
|
defp validate_form({:map, meta, elements}, ctx) do
|
|
validate_map_literal(elements, meta, ctx)
|
|
end
|
|
|
|
# Generic list form: validate children
|
|
defp validate_form({:list, _meta, children}, ctx) when is_list(children) do
|
|
# In a function call, only the last argument is not necessarily in tail position,
|
|
# but for recur analysis, none of the arguments to a call are in tail position
|
|
# (since the call itself might be, but its args are not).
|
|
non_tail_ctx = %{ctx | tail: false}
|
|
|
|
Enum.flat_map(children, fn child ->
|
|
validate_form(child, non_tail_ctx)
|
|
end)
|
|
end
|
|
|
|
# Vectors: validate elements
|
|
defp validate_form({:vector, _meta, elements}, ctx) when is_list(elements) do
|
|
non_tail_ctx = %{ctx | tail: false}
|
|
Enum.flat_map(elements, fn el -> validate_form(el, non_tail_ctx) end)
|
|
end
|
|
|
|
# Sets: validate elements
|
|
defp validate_form({:set, _meta, elements}, ctx) when is_list(elements) do
|
|
non_tail_ctx = %{ctx | tail: false}
|
|
Enum.flat_map(elements, fn el -> validate_form(el, non_tail_ctx) end)
|
|
end
|
|
|
|
# Atoms, numbers, strings, symbols, keywords — always valid
|
|
defp validate_form(_leaf, _ctx), do: []
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Special form validators
|
|
# ---------------------------------------------------------------------------
|
|
|
|
defp validate_defmodule(args, meta, ctx) do
|
|
line = meta_line(meta)
|
|
col = meta_col(meta)
|
|
|
|
case args do
|
|
[] ->
|
|
[
|
|
%{
|
|
severity: :error,
|
|
message: "defmodule requires a module name and at least one body expression",
|
|
line: line,
|
|
col: col
|
|
}
|
|
]
|
|
|
|
[_name] ->
|
|
[
|
|
%{
|
|
severity: :error,
|
|
message: "defmodule requires at least one body expression after the module name",
|
|
line: line,
|
|
col: col
|
|
}
|
|
]
|
|
|
|
[_name | body] ->
|
|
# Body forms are each in tail position within the module (top-level forms)
|
|
Enum.flat_map(body, fn form ->
|
|
validate_form(form, %{ctx | tail: true, in_loop: false, in_fn: false})
|
|
end)
|
|
end
|
|
end
|
|
|
|
defp validate_defn(args, meta, ctx) do
|
|
line = meta_line(meta)
|
|
col = meta_col(meta)
|
|
|
|
case args do
|
|
[] ->
|
|
[
|
|
%{
|
|
severity: :error,
|
|
message: "defn requires a function name, parameter vector, and body",
|
|
line: line,
|
|
col: col
|
|
}
|
|
]
|
|
|
|
[_name] ->
|
|
[
|
|
%{
|
|
severity: :error,
|
|
message: "defn requires a parameter vector and body after the function name",
|
|
line: line,
|
|
col: col
|
|
}
|
|
]
|
|
|
|
[_name, maybe_doc | rest] ->
|
|
# Could be: (defn name [params] body...)
|
|
# or: (defn name "docstring" [params] body...)
|
|
# or: (defn name ([params1] body1) ([params2] body2)) -- multi-arity
|
|
fn_ctx = %{ctx | tail: true, in_fn: true, in_loop: false}
|
|
|
|
case maybe_doc do
|
|
# Multi-arity: (defn name (clause1) (clause2) ...)
|
|
{:list, _, _} ->
|
|
clauses = [maybe_doc | rest]
|
|
Enum.flat_map(clauses, fn clause -> validate_fn_clause(clause, fn_ctx) end)
|
|
|
|
# Docstring form: (defn name "doc" ...)
|
|
{:string, _, _} ->
|
|
validate_defn_body(rest, fn_ctx, line, col)
|
|
|
|
# Single arity with param vector: (defn name [params] body...)
|
|
{:vector, _, _} ->
|
|
validate_fn_body(rest, fn_ctx)
|
|
|
|
_ ->
|
|
validate_fn_body(rest, fn_ctx)
|
|
end
|
|
end
|
|
end
|
|
|
|
defp validate_defn_body(rest, ctx, line, col) do
|
|
case rest do
|
|
[] ->
|
|
[
|
|
%{
|
|
severity: :error,
|
|
message: "defn requires a parameter vector and body after docstring",
|
|
line: line,
|
|
col: col
|
|
}
|
|
]
|
|
|
|
[{:vector, _, _} | body] ->
|
|
validate_fn_body(body, ctx)
|
|
|
|
[{:list, _, _} | _] = clauses ->
|
|
# Multi-arity after docstring
|
|
Enum.flat_map(clauses, fn clause -> validate_fn_clause(clause, ctx) end)
|
|
|
|
_ ->
|
|
[]
|
|
end
|
|
end
|
|
|
|
defp validate_fn(args, meta, ctx) do
|
|
line = meta_line(meta)
|
|
col = meta_col(meta)
|
|
|
|
fn_ctx = %{ctx | tail: true, in_fn: true, in_loop: false}
|
|
|
|
case args do
|
|
[] ->
|
|
[
|
|
%{
|
|
severity: :error,
|
|
message: "fn requires a parameter vector and body",
|
|
line: line,
|
|
col: col
|
|
}
|
|
]
|
|
|
|
# (fn [params] body...) - single arity
|
|
[{:vector, _, _} | body] ->
|
|
validate_fn_body(body, fn_ctx)
|
|
|
|
# (fn name [params] body...) - named fn
|
|
[{:symbol, _, _}, {:vector, _, _} | body] ->
|
|
validate_fn_body(body, fn_ctx)
|
|
|
|
# (fn (clause1) (clause2) ...) - multi-arity
|
|
[{:list, _, _} | _] = clauses ->
|
|
Enum.flat_map(clauses, fn clause -> validate_fn_clause(clause, fn_ctx) end)
|
|
|
|
# (fn name (clause1) (clause2) ...) - named multi-arity
|
|
[{:symbol, _, _} | [{:list, _, _} | _] = clauses] ->
|
|
Enum.flat_map(clauses, fn clause -> validate_fn_clause(clause, fn_ctx) end)
|
|
|
|
_ ->
|
|
[]
|
|
end
|
|
end
|
|
|
|
defp validate_fn_clause({:list, _meta, [{:vector, _, _} | body]}, ctx) do
|
|
validate_fn_body(body, ctx)
|
|
end
|
|
|
|
defp validate_fn_clause(_other, _ctx), do: []
|
|
|
|
defp validate_fn_body([], _ctx), do: []
|
|
|
|
defp validate_fn_body(body, ctx) do
|
|
{leading, [last]} = Enum.split(body, -1)
|
|
non_tail = %{ctx | tail: false}
|
|
|
|
leading_diags = Enum.flat_map(leading, fn form -> validate_form(form, non_tail) end)
|
|
last_diags = validate_form(last, ctx)
|
|
leading_diags ++ last_diags
|
|
end
|
|
|
|
defp validate_let(args, meta, ctx) do
|
|
line = meta_line(meta)
|
|
col = meta_col(meta)
|
|
|
|
case args do
|
|
[] ->
|
|
[
|
|
%{
|
|
severity: :error,
|
|
message: "let requires a binding vector and body",
|
|
line: line,
|
|
col: col
|
|
}
|
|
]
|
|
|
|
[{:vector, vmeta, bindings} | body] ->
|
|
binding_diags = validate_binding_vector(bindings, vmeta, "let")
|
|
|
|
body_diags =
|
|
case body do
|
|
[] ->
|
|
[
|
|
%{
|
|
severity: :warning,
|
|
message: "let with no body expression always returns nil",
|
|
line: line,
|
|
col: col
|
|
}
|
|
]
|
|
|
|
_ ->
|
|
validate_body_forms(body, ctx)
|
|
end
|
|
|
|
binding_diags ++ body_diags
|
|
|
|
_ ->
|
|
[
|
|
%{
|
|
severity: :error,
|
|
message: "let requires a binding vector as its first argument",
|
|
line: line,
|
|
col: col
|
|
}
|
|
]
|
|
end
|
|
end
|
|
|
|
defp validate_if(args, meta, ctx) do
|
|
line = meta_line(meta)
|
|
col = meta_col(meta)
|
|
|
|
case length(args) do
|
|
n when n < 2 ->
|
|
[
|
|
%{
|
|
severity: :error,
|
|
message: "if requires a condition and at least a then branch (got #{n} argument(s))",
|
|
line: line,
|
|
col: col
|
|
}
|
|
]
|
|
|
|
n when n > 3 ->
|
|
[
|
|
%{
|
|
severity: :error,
|
|
message: "if accepts at most 3 arguments (condition, then, else), got #{n}",
|
|
line: line,
|
|
col: col
|
|
}
|
|
]
|
|
|
|
2 ->
|
|
[condition, then_branch] = args
|
|
non_tail = %{ctx | tail: false}
|
|
|
|
validate_form(condition, non_tail) ++
|
|
validate_form(then_branch, ctx)
|
|
|
|
3 ->
|
|
[condition, then_branch, else_branch] = args
|
|
non_tail = %{ctx | tail: false}
|
|
|
|
validate_form(condition, non_tail) ++
|
|
validate_form(then_branch, ctx) ++
|
|
validate_form(else_branch, ctx)
|
|
end
|
|
end
|
|
|
|
defp validate_case(args, meta, ctx) do
|
|
line = meta_line(meta)
|
|
col = meta_col(meta)
|
|
|
|
case args do
|
|
[] ->
|
|
[
|
|
%{
|
|
severity: :error,
|
|
message: "case requires a subject expression and at least one pattern/body pair",
|
|
line: line,
|
|
col: col
|
|
}
|
|
]
|
|
|
|
[_subject] ->
|
|
[
|
|
%{
|
|
severity: :error,
|
|
message: "case requires at least one pattern/body pair after the subject",
|
|
line: line,
|
|
col: col
|
|
}
|
|
]
|
|
|
|
[subject | pairs] ->
|
|
non_tail = %{ctx | tail: false}
|
|
subject_diags = validate_form(subject, non_tail)
|
|
|
|
pair_diags =
|
|
if rem(length(pairs), 2) != 0 do
|
|
[
|
|
%{
|
|
severity: :error,
|
|
message:
|
|
"case requires an even number of pattern/body forms, got #{length(pairs)}",
|
|
line: line,
|
|
col: col
|
|
}
|
|
]
|
|
else
|
|
pairs
|
|
|> Enum.chunk_every(2)
|
|
|> Enum.flat_map(fn
|
|
[_pattern, body] ->
|
|
validate_form(body, ctx)
|
|
|
|
_ ->
|
|
[]
|
|
end)
|
|
end
|
|
|
|
subject_diags ++ pair_diags
|
|
end
|
|
end
|
|
|
|
defp validate_cond(args, meta, ctx) do
|
|
line = meta_line(meta)
|
|
col = meta_col(meta)
|
|
|
|
if rem(length(args), 2) != 0 do
|
|
[
|
|
%{
|
|
severity: :error,
|
|
message: "cond requires an even number of test/expression pairs, got #{length(args)}",
|
|
line: line,
|
|
col: col
|
|
}
|
|
]
|
|
else
|
|
non_tail = %{ctx | tail: false}
|
|
|
|
args
|
|
|> Enum.chunk_every(2)
|
|
|> Enum.flat_map(fn
|
|
[test, body] ->
|
|
validate_form(test, non_tail) ++ validate_form(body, ctx)
|
|
|
|
_ ->
|
|
[]
|
|
end)
|
|
end
|
|
end
|
|
|
|
defp validate_loop(args, meta, ctx) do
|
|
line = meta_line(meta)
|
|
col = meta_col(meta)
|
|
|
|
case args do
|
|
[] ->
|
|
[
|
|
%{
|
|
severity: :error,
|
|
message: "loop requires a binding vector and body",
|
|
line: line,
|
|
col: col
|
|
}
|
|
]
|
|
|
|
[{:vector, vmeta, bindings} | body] ->
|
|
binding_diags = validate_binding_vector(bindings, vmeta, "loop")
|
|
|
|
body_diags =
|
|
case body do
|
|
[] ->
|
|
[
|
|
%{
|
|
severity: :warning,
|
|
message: "loop with no body expression always returns nil",
|
|
line: line,
|
|
col: col
|
|
}
|
|
]
|
|
|
|
_ ->
|
|
loop_ctx = %{ctx | tail: true, in_loop: true}
|
|
validate_body_forms(body, loop_ctx)
|
|
end
|
|
|
|
binding_diags ++ body_diags
|
|
|
|
_ ->
|
|
[
|
|
%{
|
|
severity: :error,
|
|
message: "loop requires a binding vector as its first argument",
|
|
line: line,
|
|
col: col
|
|
}
|
|
]
|
|
end
|
|
end
|
|
|
|
defp validate_recur(meta, ctx) do
|
|
line = meta_line(meta)
|
|
col = meta_col(meta)
|
|
|
|
cond do
|
|
not ctx.tail ->
|
|
[
|
|
%{
|
|
severity: :error,
|
|
message: "recur must be in tail position",
|
|
line: line,
|
|
col: col
|
|
}
|
|
]
|
|
|
|
not (ctx.in_loop or ctx.in_fn) ->
|
|
[
|
|
%{
|
|
severity: :error,
|
|
message: "recur must be inside a loop or function body",
|
|
line: line,
|
|
col: col
|
|
}
|
|
]
|
|
|
|
true ->
|
|
[]
|
|
end
|
|
end
|
|
|
|
defp validate_do(args, _meta, ctx) do
|
|
validate_body_forms(args, ctx)
|
|
end
|
|
|
|
defp validate_map_literal(elements, meta, _ctx) do
|
|
if rem(length(elements), 2) != 0 do
|
|
line = meta_line(meta)
|
|
col = meta_col(meta)
|
|
|
|
[
|
|
%{
|
|
severity: :error,
|
|
message:
|
|
"map literal requires an even number of forms (key-value pairs), got #{length(elements)}",
|
|
line: line,
|
|
col: col
|
|
}
|
|
]
|
|
else
|
|
[]
|
|
end
|
|
end
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
defp validate_binding_vector(bindings, meta, form_name) do
|
|
if rem(length(bindings), 2) != 0 do
|
|
line = meta_line(meta)
|
|
col = meta_col(meta)
|
|
|
|
[
|
|
%{
|
|
severity: :error,
|
|
message:
|
|
"#{form_name} binding vector requires an even number of forms (name/value pairs), got #{length(bindings)}",
|
|
line: line,
|
|
col: col
|
|
}
|
|
]
|
|
else
|
|
[]
|
|
end
|
|
end
|
|
|
|
defp validate_body_forms([], _ctx), do: []
|
|
|
|
defp validate_body_forms(forms, ctx) do
|
|
{leading, [last]} = Enum.split(forms, -1)
|
|
non_tail = %{ctx | tail: false}
|
|
|
|
leading_diags = Enum.flat_map(leading, fn form -> validate_form(form, non_tail) end)
|
|
last_diags = validate_form(last, ctx)
|
|
leading_diags ++ last_diags
|
|
end
|
|
|
|
defp meta_line(meta) when is_map(meta), do: Map.get(meta, :line, 0)
|
|
defp meta_line(meta) when is_list(meta), do: Keyword.get(meta, :line, 0)
|
|
defp meta_line(_), do: 0
|
|
|
|
defp meta_col(meta) when is_map(meta), do: Map.get(meta, :col, 0)
|
|
defp meta_col(meta) when is_list(meta), do: Keyword.get(meta, :col, 0)
|
|
defp meta_col(_), do: 0
|
|
end
|