defmodule CljElixir.Analyzer do @moduledoc """ AST analyzer and validator for CljElixir. Performs lightweight static analysis on CljElixir AST forms (output of the Reader) before they are passed to the Transformer. Catches common structural errors early with clear diagnostic messages. ## Validations 1. **Special form arity** - `defmodule` needs name + body, `let` needs a vector with an even number of binding pairs, `if` needs 2-3 args, `case` needs a subject + even pattern/body pairs, `cond` needs even pairs, `loop` needs a vector with even binding pairs. 2. **Map literal validation** - Maps must have an even number of forms (key-value pairs). 3. **`recur` position** - `recur` must appear in tail position. In `if`/`case`/`cond`, the tail position is the last expression of each branch. In `let`/`do`, the tail position is the last expression. 4. **Nested `recur`** - `recur` inside a nested `loop` should only refer to the innermost loop, not an outer one. ## Return Value Returns `{:ok, forms}` when the AST is valid (passes forms through unchanged), or `{:error, diagnostics}` when errors are found. Diagnostics are maps with keys: `:severity`, `:message`, `:line`, `:col`. """ @type diagnostic :: %{ severity: :error | :warning, message: String.t(), line: non_neg_integer(), col: non_neg_integer() } @doc """ Analyze and validate a list of CljElixir AST forms. Returns `{:ok, forms}` if all validations pass, or `{:error, diagnostics}` with a list of diagnostic maps describing the errors found. """ @spec analyze(list()) :: {:ok, list()} | {:error, [diagnostic()]} def analyze(forms) when is_list(forms) do diagnostics = forms |> Enum.flat_map(fn form -> validate_form(form, %{tail: true, in_loop: false, in_fn: false}) end) case Enum.filter(diagnostics, &(&1.severity == :error)) do [] -> {:ok, forms} _errors -> {:error, diagnostics} end end def analyze(form) do analyze(List.wrap(form)) end # --------------------------------------------------------------------------- # Form validation - dispatches on the head of each s-expression # --------------------------------------------------------------------------- # A list form starting with an atom is an s-expression: (special-form ...) defp validate_form({:list, meta, [{:symbol, _, "defmodule"} | args]}, ctx) do validate_defmodule(args, meta, ctx) end defp validate_form({:list, meta, [{:symbol, _, "defn"} | args]}, ctx) do validate_defn(args, meta, ctx) end defp validate_form({:list, meta, [{:symbol, _, "defn-"} | args]}, ctx) do validate_defn(args, meta, ctx) end defp validate_form({:list, meta, [{:symbol, _, "fn"} | args]}, ctx) do validate_fn(args, meta, ctx) end defp validate_form({:list, meta, [{:symbol, _, "let"} | args]}, ctx) do validate_let(args, meta, ctx) end defp validate_form({:list, meta, [{:symbol, _, "if"} | args]}, ctx) do validate_if(args, meta, ctx) end defp validate_form({:list, meta, [{:symbol, _, "case"} | args]}, ctx) do validate_case(args, meta, ctx) end defp validate_form({:list, meta, [{:symbol, _, "cond"} | args]}, ctx) do validate_cond(args, meta, ctx) end defp validate_form({:list, meta, [{:symbol, _, "loop"} | args]}, ctx) do validate_loop(args, meta, ctx) end defp validate_form({:list, _meta, [{:symbol, _, "receive"} | args]}, ctx) do validate_receive(args, ctx) end defp validate_form({:list, meta, [{:symbol, _, "recur"} | _args]}, ctx) do validate_recur(meta, ctx) end defp validate_form({:list, meta, [{:symbol, _, "do"} | args]}, ctx) do validate_do(args, meta, ctx) end defp validate_form({:map, meta, elements}, ctx) do validate_map_literal(elements, meta, ctx) end # Generic list form: validate children defp validate_form({:list, _meta, children}, ctx) when is_list(children) do # In a function call, only the last argument is not necessarily in tail position, # but for recur analysis, none of the arguments to a call are in tail position # (since the call itself might be, but its args are not). non_tail_ctx = %{ctx | tail: false} Enum.flat_map(children, fn child -> validate_form(child, non_tail_ctx) end) end # Vectors: validate elements defp validate_form({:vector, _meta, elements}, ctx) when is_list(elements) do non_tail_ctx = %{ctx | tail: false} Enum.flat_map(elements, fn el -> validate_form(el, non_tail_ctx) end) end # Sets: validate elements defp validate_form({:set, _meta, elements}, ctx) when is_list(elements) do non_tail_ctx = %{ctx | tail: false} Enum.flat_map(elements, fn el -> validate_form(el, non_tail_ctx) end) end # Atoms, numbers, strings, symbols, keywords — always valid defp validate_form(_leaf, _ctx), do: [] # --------------------------------------------------------------------------- # Special form validators # --------------------------------------------------------------------------- defp validate_defmodule(args, meta, ctx) do line = meta_line(meta) col = meta_col(meta) case args do [] -> [ %{ severity: :error, message: "defmodule requires a module name and at least one body expression", line: line, col: col } ] [_name] -> [ %{ severity: :error, message: "defmodule requires at least one body expression after the module name", line: line, col: col } ] [_name | body] -> # Body forms are each in tail position within the module (top-level forms) Enum.flat_map(body, fn form -> validate_form(form, %{ctx | tail: true, in_loop: false, in_fn: false}) end) end end defp validate_defn(args, meta, ctx) do line = meta_line(meta) col = meta_col(meta) case args do [] -> [ %{ severity: :error, message: "defn requires a function name, parameter vector, and body", line: line, col: col } ] [_name] -> [ %{ severity: :error, message: "defn requires a parameter vector and body after the function name", line: line, col: col } ] [_name, maybe_doc | rest] -> # Could be: (defn name [params] body...) # or: (defn name "docstring" [params] body...) # or: (defn name ([params1] body1) ([params2] body2)) -- multi-arity fn_ctx = %{ctx | tail: true, in_fn: true, in_loop: false} case maybe_doc do # Multi-arity: (defn name (clause1) (clause2) ...) {:list, _, _} -> clauses = [maybe_doc | rest] Enum.flat_map(clauses, fn clause -> validate_fn_clause(clause, fn_ctx) end) # Docstring form: (defn name "doc" ...) {:string, _, _} -> validate_defn_body(rest, fn_ctx, line, col) # Single arity with param vector: (defn name [params] body...) {:vector, _, _} -> validate_fn_body(rest, fn_ctx) _ -> validate_fn_body(rest, fn_ctx) end end end defp validate_defn_body(rest, ctx, line, col) do case rest do [] -> [ %{ severity: :error, message: "defn requires a parameter vector and body after docstring", line: line, col: col } ] [{:vector, _, _} | body] -> validate_fn_body(body, ctx) [{:list, _, _} | _] = clauses -> # Multi-arity after docstring Enum.flat_map(clauses, fn clause -> validate_fn_clause(clause, ctx) end) _ -> [] end end defp validate_fn(args, meta, ctx) do line = meta_line(meta) col = meta_col(meta) fn_ctx = %{ctx | tail: true, in_fn: true, in_loop: false} case args do [] -> [ %{ severity: :error, message: "fn requires a parameter vector and body", line: line, col: col } ] # (fn [params] body...) - single arity [{:vector, _, _} | body] -> validate_fn_body(body, fn_ctx) # (fn name [params] body...) - named fn [{:symbol, _, _}, {:vector, _, _} | body] -> validate_fn_body(body, fn_ctx) # (fn (clause1) (clause2) ...) - multi-arity [{:list, _, _} | _] = clauses -> Enum.flat_map(clauses, fn clause -> validate_fn_clause(clause, fn_ctx) end) # (fn name (clause1) (clause2) ...) - named multi-arity [{:symbol, _, _} | [{:list, _, _} | _] = clauses] -> Enum.flat_map(clauses, fn clause -> validate_fn_clause(clause, fn_ctx) end) _ -> [] end end defp validate_fn_clause({:list, _meta, [{:vector, _, _} | body]}, ctx) do validate_fn_body(body, ctx) end defp validate_fn_clause(_other, _ctx), do: [] defp validate_fn_body([], _ctx), do: [] defp validate_fn_body(body, ctx) do {leading, [last]} = Enum.split(body, -1) non_tail = %{ctx | tail: false} leading_diags = Enum.flat_map(leading, fn form -> validate_form(form, non_tail) end) last_diags = validate_form(last, ctx) leading_diags ++ last_diags end defp validate_let(args, meta, ctx) do line = meta_line(meta) col = meta_col(meta) case args do [] -> [ %{ severity: :error, message: "let requires a binding vector and body", line: line, col: col } ] [{:vector, vmeta, bindings} | body] -> binding_diags = validate_binding_vector(bindings, vmeta, "let") body_diags = case body do [] -> [ %{ severity: :warning, message: "let with no body expression always returns nil", line: line, col: col } ] _ -> validate_body_forms(body, ctx) end binding_diags ++ body_diags _ -> [ %{ severity: :error, message: "let requires a binding vector as its first argument", line: line, col: col } ] end end defp validate_if(args, meta, ctx) do line = meta_line(meta) col = meta_col(meta) case length(args) do n when n < 2 -> [ %{ severity: :error, message: "if requires a condition and at least a then branch (got #{n} argument(s))", line: line, col: col } ] n when n > 3 -> [ %{ severity: :error, message: "if accepts at most 3 arguments (condition, then, else), got #{n}", line: line, col: col } ] 2 -> [condition, then_branch] = args non_tail = %{ctx | tail: false} validate_form(condition, non_tail) ++ validate_form(then_branch, ctx) 3 -> [condition, then_branch, else_branch] = args non_tail = %{ctx | tail: false} validate_form(condition, non_tail) ++ validate_form(then_branch, ctx) ++ validate_form(else_branch, ctx) end end defp validate_case(args, meta, ctx) do line = meta_line(meta) col = meta_col(meta) case args do [] -> [ %{ severity: :error, message: "case requires a subject expression and at least one pattern/body pair", line: line, col: col } ] [_subject] -> [ %{ severity: :error, message: "case requires at least one pattern/body pair after the subject", line: line, col: col } ] [subject | pairs] -> non_tail = %{ctx | tail: false} subject_diags = validate_form(subject, non_tail) pair_diags = if rem(length(pairs), 2) != 0 do [ %{ severity: :error, message: "case requires an even number of pattern/body forms, got #{length(pairs)}", line: line, col: col } ] else pairs |> Enum.chunk_every(2) |> Enum.flat_map(fn [_pattern, body] -> validate_form(body, ctx) _ -> [] end) end subject_diags ++ pair_diags end end defp validate_cond(args, meta, ctx) do line = meta_line(meta) col = meta_col(meta) if rem(length(args), 2) != 0 do [ %{ severity: :error, message: "cond requires an even number of test/expression pairs, got #{length(args)}", line: line, col: col } ] else non_tail = %{ctx | tail: false} args |> Enum.chunk_every(2) |> Enum.flat_map(fn [test, body] -> validate_form(test, non_tail) ++ validate_form(body, ctx) _ -> [] end) end end defp validate_loop(args, meta, ctx) do line = meta_line(meta) col = meta_col(meta) case args do [] -> [ %{ severity: :error, message: "loop requires a binding vector and body", line: line, col: col } ] [{:vector, vmeta, bindings} | body] -> binding_diags = validate_binding_vector(bindings, vmeta, "loop") body_diags = case body do [] -> [ %{ severity: :warning, message: "loop with no body expression always returns nil", line: line, col: col } ] _ -> loop_ctx = %{ctx | tail: true, in_loop: true} validate_body_forms(body, loop_ctx) end binding_diags ++ body_diags _ -> [ %{ severity: :error, message: "loop requires a binding vector as its first argument", line: line, col: col } ] end end # receive propagates tail position into clause bodies defp validate_receive(clauses, ctx) do validate_receive_clauses(clauses, ctx) end defp validate_receive_clauses([], _ctx), do: [] defp validate_receive_clauses([:after, _timeout, body | rest], ctx) do validate_form(body, ctx) ++ validate_receive_clauses(rest, ctx) end defp validate_receive_clauses([_pattern, :guard, _guard, body | rest], ctx) do validate_form(body, ctx) ++ validate_receive_clauses(rest, ctx) end defp validate_receive_clauses([_pattern, body | rest], ctx) do validate_form(body, ctx) ++ validate_receive_clauses(rest, ctx) end defp validate_receive_clauses([_], _ctx), do: [] defp validate_recur(meta, ctx) do line = meta_line(meta) col = meta_col(meta) cond do not ctx.tail -> [ %{ severity: :error, message: "recur must be in tail position", line: line, col: col } ] not (ctx.in_loop or ctx.in_fn) -> [ %{ severity: :error, message: "recur must be inside a loop or function body", line: line, col: col } ] true -> [] end end defp validate_do(args, _meta, ctx) do validate_body_forms(args, ctx) end defp validate_map_literal(elements, meta, _ctx) do if rem(length(elements), 2) != 0 do line = meta_line(meta) col = meta_col(meta) [ %{ severity: :error, message: "map literal requires an even number of forms (key-value pairs), got #{length(elements)}", line: line, col: col } ] else [] end end # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- defp validate_binding_vector(bindings, meta, form_name) do if rem(length(bindings), 2) != 0 do line = meta_line(meta) col = meta_col(meta) [ %{ severity: :error, message: "#{form_name} binding vector requires an even number of forms (name/value pairs), got #{length(bindings)}", line: line, col: col } ] else [] end end defp validate_body_forms([], _ctx), do: [] defp validate_body_forms(forms, ctx) do {leading, [last]} = Enum.split(forms, -1) non_tail = %{ctx | tail: false} leading_diags = Enum.flat_map(leading, fn form -> validate_form(form, non_tail) end) last_diags = validate_form(last, ctx) leading_diags ++ last_diags end defp meta_line(meta) when is_map(meta), do: Map.get(meta, :line, 0) defp meta_line(meta) when is_list(meta), do: Keyword.get(meta, :line, 0) defp meta_line(_), do: 0 defp meta_col(meta) when is_map(meta), do: Map.get(meta, :col, 0) defp meta_col(meta) when is_list(meta), do: Keyword.get(meta, :col, 0) defp meta_col(_), do: 0 end