Files
CljElixir/lib/clj_elixir/analyzer.ex
Adam d8719b6d48 Phases 1-7: Complete CljElixir compiler through Malli schema adapter
Bootstrap compiler (reader, analyzer, transformer, compiler, Mix plugin),
core protocols (16 protocols for Map/List/Tuple/BitString), PersistentVector
(bit-partitioned trie), domain tools (clojurify/elixirify), BEAM concurrency
(receive, spawn, GenServer), control flow & macros (threading, try/catch,
destructuring, defmacro with quasiquote/auto-gensym), and Malli schema
adapter (m/=> specs, auto @type, recursive schemas, cross-references).

537 compiler tests + 55 Malli unit tests + 15 integration tests = 607 total.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-08 10:38:22 -04:00

627 lines
16 KiB
Elixir

defmodule CljElixir.Analyzer do
@moduledoc """
AST analyzer and validator for CljElixir.
Performs lightweight static analysis on CljElixir AST forms (output of the Reader)
before they are passed to the Transformer. Catches common structural errors early
with clear diagnostic messages.
## Validations
1. **Special form arity** - `defmodule` needs name + body, `let` needs a vector
with an even number of binding pairs, `if` needs 2-3 args, `case` needs a
subject + even pattern/body pairs, `cond` needs even pairs, `loop` needs a
vector with even binding pairs.
2. **Map literal validation** - Maps must have an even number of forms (key-value pairs).
3. **`recur` position** - `recur` must appear in tail position. In `if`/`case`/`cond`,
the tail position is the last expression of each branch. In `let`/`do`, the tail
position is the last expression.
4. **Nested `recur`** - `recur` inside a nested `loop` should only refer to the
innermost loop, not an outer one.
## Return Value
Returns `{:ok, forms}` when the AST is valid (passes forms through unchanged),
or `{:error, diagnostics}` when errors are found.
Diagnostics are maps with keys: `:severity`, `:message`, `:line`, `:col`.
"""
@type diagnostic :: %{
severity: :error | :warning,
message: String.t(),
line: non_neg_integer(),
col: non_neg_integer()
}
@doc """
Analyze and validate a list of CljElixir AST forms.
Returns `{:ok, forms}` if all validations pass, or `{:error, diagnostics}`
with a list of diagnostic maps describing the errors found.
"""
@spec analyze(list()) :: {:ok, list()} | {:error, [diagnostic()]}
def analyze(forms) when is_list(forms) do
diagnostics =
forms
|> Enum.flat_map(fn form -> validate_form(form, %{tail: true, in_loop: false, in_fn: false}) end)
case Enum.filter(diagnostics, &(&1.severity == :error)) do
[] -> {:ok, forms}
_errors -> {:error, diagnostics}
end
end
def analyze(form) do
analyze(List.wrap(form))
end
# ---------------------------------------------------------------------------
# Form validation - dispatches on the head of each s-expression
# ---------------------------------------------------------------------------
# A list form starting with an atom is an s-expression: (special-form ...)
defp validate_form({:list, meta, [{:symbol, _, "defmodule"} | args]}, ctx) do
validate_defmodule(args, meta, ctx)
end
defp validate_form({:list, meta, [{:symbol, _, "defn"} | args]}, ctx) do
validate_defn(args, meta, ctx)
end
defp validate_form({:list, meta, [{:symbol, _, "defn-"} | args]}, ctx) do
validate_defn(args, meta, ctx)
end
defp validate_form({:list, meta, [{:symbol, _, "fn"} | args]}, ctx) do
validate_fn(args, meta, ctx)
end
defp validate_form({:list, meta, [{:symbol, _, "let"} | args]}, ctx) do
validate_let(args, meta, ctx)
end
defp validate_form({:list, meta, [{:symbol, _, "if"} | args]}, ctx) do
validate_if(args, meta, ctx)
end
defp validate_form({:list, meta, [{:symbol, _, "case"} | args]}, ctx) do
validate_case(args, meta, ctx)
end
defp validate_form({:list, meta, [{:symbol, _, "cond"} | args]}, ctx) do
validate_cond(args, meta, ctx)
end
defp validate_form({:list, meta, [{:symbol, _, "loop"} | args]}, ctx) do
validate_loop(args, meta, ctx)
end
defp validate_form({:list, meta, [{:symbol, _, "recur"} | _args]}, ctx) do
validate_recur(meta, ctx)
end
defp validate_form({:list, meta, [{:symbol, _, "do"} | args]}, ctx) do
validate_do(args, meta, ctx)
end
defp validate_form({:map, meta, elements}, ctx) do
validate_map_literal(elements, meta, ctx)
end
# Generic list form: validate children
defp validate_form({:list, _meta, children}, ctx) when is_list(children) do
# In a function call, only the last argument is not necessarily in tail position,
# but for recur analysis, none of the arguments to a call are in tail position
# (since the call itself might be, but its args are not).
non_tail_ctx = %{ctx | tail: false}
Enum.flat_map(children, fn child ->
validate_form(child, non_tail_ctx)
end)
end
# Vectors: validate elements
defp validate_form({:vector, _meta, elements}, ctx) when is_list(elements) do
non_tail_ctx = %{ctx | tail: false}
Enum.flat_map(elements, fn el -> validate_form(el, non_tail_ctx) end)
end
# Sets: validate elements
defp validate_form({:set, _meta, elements}, ctx) when is_list(elements) do
non_tail_ctx = %{ctx | tail: false}
Enum.flat_map(elements, fn el -> validate_form(el, non_tail_ctx) end)
end
# Atoms, numbers, strings, symbols, keywords — always valid
defp validate_form(_leaf, _ctx), do: []
# ---------------------------------------------------------------------------
# Special form validators
# ---------------------------------------------------------------------------
defp validate_defmodule(args, meta, ctx) do
line = meta_line(meta)
col = meta_col(meta)
case args do
[] ->
[
%{
severity: :error,
message: "defmodule requires a module name and at least one body expression",
line: line,
col: col
}
]
[_name] ->
[
%{
severity: :error,
message: "defmodule requires at least one body expression after the module name",
line: line,
col: col
}
]
[_name | body] ->
# Body forms are each in tail position within the module (top-level forms)
Enum.flat_map(body, fn form ->
validate_form(form, %{ctx | tail: true, in_loop: false, in_fn: false})
end)
end
end
defp validate_defn(args, meta, ctx) do
line = meta_line(meta)
col = meta_col(meta)
case args do
[] ->
[
%{
severity: :error,
message: "defn requires a function name, parameter vector, and body",
line: line,
col: col
}
]
[_name] ->
[
%{
severity: :error,
message: "defn requires a parameter vector and body after the function name",
line: line,
col: col
}
]
[_name, maybe_doc | rest] ->
# Could be: (defn name [params] body...)
# or: (defn name "docstring" [params] body...)
# or: (defn name ([params1] body1) ([params2] body2)) -- multi-arity
fn_ctx = %{ctx | tail: true, in_fn: true, in_loop: false}
case maybe_doc do
# Multi-arity: (defn name (clause1) (clause2) ...)
{:list, _, _} ->
clauses = [maybe_doc | rest]
Enum.flat_map(clauses, fn clause -> validate_fn_clause(clause, fn_ctx) end)
# Docstring form: (defn name "doc" ...)
{:string, _, _} ->
validate_defn_body(rest, fn_ctx, line, col)
# Single arity with param vector: (defn name [params] body...)
{:vector, _, _} ->
validate_fn_body(rest, fn_ctx)
_ ->
validate_fn_body(rest, fn_ctx)
end
end
end
defp validate_defn_body(rest, ctx, line, col) do
case rest do
[] ->
[
%{
severity: :error,
message: "defn requires a parameter vector and body after docstring",
line: line,
col: col
}
]
[{:vector, _, _} | body] ->
validate_fn_body(body, ctx)
[{:list, _, _} | _] = clauses ->
# Multi-arity after docstring
Enum.flat_map(clauses, fn clause -> validate_fn_clause(clause, ctx) end)
_ ->
[]
end
end
defp validate_fn(args, meta, ctx) do
line = meta_line(meta)
col = meta_col(meta)
fn_ctx = %{ctx | tail: true, in_fn: true, in_loop: false}
case args do
[] ->
[
%{
severity: :error,
message: "fn requires a parameter vector and body",
line: line,
col: col
}
]
# (fn [params] body...) - single arity
[{:vector, _, _} | body] ->
validate_fn_body(body, fn_ctx)
# (fn name [params] body...) - named fn
[{:symbol, _, _}, {:vector, _, _} | body] ->
validate_fn_body(body, fn_ctx)
# (fn (clause1) (clause2) ...) - multi-arity
[{:list, _, _} | _] = clauses ->
Enum.flat_map(clauses, fn clause -> validate_fn_clause(clause, fn_ctx) end)
# (fn name (clause1) (clause2) ...) - named multi-arity
[{:symbol, _, _} | [{:list, _, _} | _] = clauses] ->
Enum.flat_map(clauses, fn clause -> validate_fn_clause(clause, fn_ctx) end)
_ ->
[]
end
end
defp validate_fn_clause({:list, _meta, [{:vector, _, _} | body]}, ctx) do
validate_fn_body(body, ctx)
end
defp validate_fn_clause(_other, _ctx), do: []
defp validate_fn_body([], _ctx), do: []
defp validate_fn_body(body, ctx) do
{leading, [last]} = Enum.split(body, -1)
non_tail = %{ctx | tail: false}
leading_diags = Enum.flat_map(leading, fn form -> validate_form(form, non_tail) end)
last_diags = validate_form(last, ctx)
leading_diags ++ last_diags
end
defp validate_let(args, meta, ctx) do
line = meta_line(meta)
col = meta_col(meta)
case args do
[] ->
[
%{
severity: :error,
message: "let requires a binding vector and body",
line: line,
col: col
}
]
[{:vector, vmeta, bindings} | body] ->
binding_diags = validate_binding_vector(bindings, vmeta, "let")
body_diags =
case body do
[] ->
[
%{
severity: :warning,
message: "let with no body expression always returns nil",
line: line,
col: col
}
]
_ ->
validate_body_forms(body, ctx)
end
binding_diags ++ body_diags
_ ->
[
%{
severity: :error,
message: "let requires a binding vector as its first argument",
line: line,
col: col
}
]
end
end
defp validate_if(args, meta, ctx) do
line = meta_line(meta)
col = meta_col(meta)
case length(args) do
n when n < 2 ->
[
%{
severity: :error,
message: "if requires a condition and at least a then branch (got #{n} argument(s))",
line: line,
col: col
}
]
n when n > 3 ->
[
%{
severity: :error,
message: "if accepts at most 3 arguments (condition, then, else), got #{n}",
line: line,
col: col
}
]
2 ->
[condition, then_branch] = args
non_tail = %{ctx | tail: false}
validate_form(condition, non_tail) ++
validate_form(then_branch, ctx)
3 ->
[condition, then_branch, else_branch] = args
non_tail = %{ctx | tail: false}
validate_form(condition, non_tail) ++
validate_form(then_branch, ctx) ++
validate_form(else_branch, ctx)
end
end
defp validate_case(args, meta, ctx) do
line = meta_line(meta)
col = meta_col(meta)
case args do
[] ->
[
%{
severity: :error,
message: "case requires a subject expression and at least one pattern/body pair",
line: line,
col: col
}
]
[_subject] ->
[
%{
severity: :error,
message: "case requires at least one pattern/body pair after the subject",
line: line,
col: col
}
]
[subject | pairs] ->
non_tail = %{ctx | tail: false}
subject_diags = validate_form(subject, non_tail)
pair_diags =
if rem(length(pairs), 2) != 0 do
[
%{
severity: :error,
message:
"case requires an even number of pattern/body forms, got #{length(pairs)}",
line: line,
col: col
}
]
else
pairs
|> Enum.chunk_every(2)
|> Enum.flat_map(fn
[_pattern, body] ->
validate_form(body, ctx)
_ ->
[]
end)
end
subject_diags ++ pair_diags
end
end
defp validate_cond(args, meta, ctx) do
line = meta_line(meta)
col = meta_col(meta)
if rem(length(args), 2) != 0 do
[
%{
severity: :error,
message: "cond requires an even number of test/expression pairs, got #{length(args)}",
line: line,
col: col
}
]
else
non_tail = %{ctx | tail: false}
args
|> Enum.chunk_every(2)
|> Enum.flat_map(fn
[test, body] ->
validate_form(test, non_tail) ++ validate_form(body, ctx)
_ ->
[]
end)
end
end
defp validate_loop(args, meta, ctx) do
line = meta_line(meta)
col = meta_col(meta)
case args do
[] ->
[
%{
severity: :error,
message: "loop requires a binding vector and body",
line: line,
col: col
}
]
[{:vector, vmeta, bindings} | body] ->
binding_diags = validate_binding_vector(bindings, vmeta, "loop")
body_diags =
case body do
[] ->
[
%{
severity: :warning,
message: "loop with no body expression always returns nil",
line: line,
col: col
}
]
_ ->
loop_ctx = %{ctx | tail: true, in_loop: true}
validate_body_forms(body, loop_ctx)
end
binding_diags ++ body_diags
_ ->
[
%{
severity: :error,
message: "loop requires a binding vector as its first argument",
line: line,
col: col
}
]
end
end
defp validate_recur(meta, ctx) do
line = meta_line(meta)
col = meta_col(meta)
cond do
not ctx.tail ->
[
%{
severity: :error,
message: "recur must be in tail position",
line: line,
col: col
}
]
not (ctx.in_loop or ctx.in_fn) ->
[
%{
severity: :error,
message: "recur must be inside a loop or function body",
line: line,
col: col
}
]
true ->
[]
end
end
defp validate_do(args, _meta, ctx) do
validate_body_forms(args, ctx)
end
defp validate_map_literal(elements, meta, _ctx) do
if rem(length(elements), 2) != 0 do
line = meta_line(meta)
col = meta_col(meta)
[
%{
severity: :error,
message:
"map literal requires an even number of forms (key-value pairs), got #{length(elements)}",
line: line,
col: col
}
]
else
[]
end
end
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
defp validate_binding_vector(bindings, meta, form_name) do
if rem(length(bindings), 2) != 0 do
line = meta_line(meta)
col = meta_col(meta)
[
%{
severity: :error,
message:
"#{form_name} binding vector requires an even number of forms (name/value pairs), got #{length(bindings)}",
line: line,
col: col
}
]
else
[]
end
end
defp validate_body_forms([], _ctx), do: []
defp validate_body_forms(forms, ctx) do
{leading, [last]} = Enum.split(forms, -1)
non_tail = %{ctx | tail: false}
leading_diags = Enum.flat_map(leading, fn form -> validate_form(form, non_tail) end)
last_diags = validate_form(last, ctx)
leading_diags ++ last_diags
end
defp meta_line(meta) when is_map(meta), do: Map.get(meta, :line, 0)
defp meta_line(meta) when is_list(meta), do: Keyword.get(meta, :line, 0)
defp meta_line(_), do: 0
defp meta_col(meta) when is_map(meta), do: Map.get(meta, :col, 0)
defp meta_col(meta) when is_list(meta), do: Keyword.get(meta, :col, 0)
defp meta_col(_), do: 0
end