Phases 1-7: Complete CljElixir compiler through Malli schema adapter
Bootstrap compiler (reader, analyzer, transformer, compiler, Mix plugin), core protocols (16 protocols for Map/List/Tuple/BitString), PersistentVector (bit-partitioned trie), domain tools (clojurify/elixirify), BEAM concurrency (receive, spawn, GenServer), control flow & macros (threading, try/catch, destructuring, defmacro with quasiquote/auto-gensym), and Malli schema adapter (m/=> specs, auto @type, recursive schemas, cross-references). 537 compiler tests + 55 Malli unit tests + 15 integration tests = 607 total. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,647 @@
|
||||
defmodule CljElixir.Reader do
|
||||
@moduledoc """
|
||||
Reader for CljElixir: tokenizes source text and parses it into CljElixir AST.
|
||||
|
||||
The reader has two phases:
|
||||
1. Tokenizer — converts source text into a flat list of tokens
|
||||
2. Parser — recursive descent over the token list, producing CljElixir AST nodes
|
||||
|
||||
## AST representation
|
||||
|
||||
Literals are themselves: integers, floats, strings, booleans, nil, atoms (keywords).
|
||||
|
||||
Compound forms use tagged tuples:
|
||||
{:symbol, meta, name}
|
||||
{:list, meta, [elements]}
|
||||
{:vector, meta, [elements]}
|
||||
{:map, meta, [k1, v1, k2, v2, ...]}
|
||||
{:set, meta, [elements]}
|
||||
{:tuple, meta, [elements]}
|
||||
{:regex, meta, pattern}
|
||||
{:quote, meta, form}
|
||||
{:with_meta, meta, {metadata, target}}
|
||||
{:anon_fn, meta, body}
|
||||
{:quasiquote, meta, form}
|
||||
{:unquote, meta, form}
|
||||
{:splice_unquote, meta, form}
|
||||
{:deref, meta, form}
|
||||
"""
|
||||
|
||||
alias CljElixir.Reader.Token
|
||||
|
||||
# ── Public API ──────────────────────────────────────────────────────
|
||||
|
||||
@doc """
|
||||
Read a string of CljElixir source into a list of AST forms.
|
||||
|
||||
Returns `{:ok, [form]}` on success, `{:error, message}` on failure.
|
||||
"""
|
||||
@spec read_string(String.t()) :: {:ok, list()} | {:error, String.t()}
|
||||
def read_string(source) when is_binary(source) do
|
||||
case tokenize(source) do
|
||||
{:ok, tokens} ->
|
||||
parse_all(tokens, [])
|
||||
|
||||
{:error, _} = err ->
|
||||
err
|
||||
end
|
||||
end
|
||||
|
||||
# ════════════════════════════════════════════════════════════════════
|
||||
# TOKENIZER
|
||||
# ════════════════════════════════════════════════════════════════════
|
||||
|
||||
@doc false
|
||||
def tokenize(source) do
|
||||
chars = String.to_charlist(source)
|
||||
tokenize_loop(chars, 1, 1, [])
|
||||
end
|
||||
|
||||
# ---------- end of input ----------
|
||||
defp tokenize_loop([], _line, _col, acc), do: {:ok, Enum.reverse(acc)}
|
||||
|
||||
# ---------- newline ----------
|
||||
defp tokenize_loop([?\n | rest], line, _col, acc),
|
||||
do: tokenize_loop(rest, line + 1, 1, acc)
|
||||
|
||||
defp tokenize_loop([?\r, ?\n | rest], line, _col, acc),
|
||||
do: tokenize_loop(rest, line + 1, 1, acc)
|
||||
|
||||
defp tokenize_loop([?\r | rest], line, _col, acc),
|
||||
do: tokenize_loop(rest, line + 1, 1, acc)
|
||||
|
||||
# ---------- whitespace / commas ----------
|
||||
defp tokenize_loop([c | rest], line, col, acc) when c in [?\s, ?\t, ?,],
|
||||
do: tokenize_loop(rest, line, col + 1, acc)
|
||||
|
||||
# ---------- comments ----------
|
||||
defp tokenize_loop([?; | rest], line, _col, acc) do
|
||||
rest = skip_comment(rest)
|
||||
# skip_comment stops at (but does not consume) the newline or EOF.
|
||||
# Let the main loop's newline handler increment line/col.
|
||||
tokenize_loop(rest, line, 1, acc)
|
||||
end
|
||||
|
||||
# ---------- strings ----------
|
||||
defp tokenize_loop([?" | rest], line, col, acc) do
|
||||
case read_string_literal(rest, line, col + 1, []) do
|
||||
{:ok, value, rest2, end_line, end_col} ->
|
||||
token = %Token{type: :string, value: value, line: line, col: col}
|
||||
tokenize_loop(rest2, end_line, end_col, [token | acc])
|
||||
|
||||
{:error, msg} ->
|
||||
{:error, msg}
|
||||
end
|
||||
end
|
||||
|
||||
# ---------- dispatch sequences: #{ #el[ #( #" ----------
|
||||
defp tokenize_loop([?#, ?e, ?l, ?[ | rest], line, col, acc) do
|
||||
token = %Token{type: :hash_el_lbracket, value: "#el[", line: line, col: col}
|
||||
tokenize_loop(rest, line, col + 4, [token | acc])
|
||||
end
|
||||
|
||||
defp tokenize_loop([?#, ?{ | rest], line, col, acc) do
|
||||
token = %Token{type: :hash_lbrace, value: "\#{", line: line, col: col}
|
||||
tokenize_loop(rest, line, col + 2, [token | acc])
|
||||
end
|
||||
|
||||
defp tokenize_loop([?#, ?( | rest], line, col, acc) do
|
||||
token = %Token{type: :hash_lparen, value: "#(", line: line, col: col}
|
||||
tokenize_loop(rest, line, col + 2, [token | acc])
|
||||
end
|
||||
|
||||
defp tokenize_loop([?#, ?" | rest], line, col, acc) do
|
||||
case read_string_literal(rest, line, col + 2, []) do
|
||||
{:ok, value, rest2, end_line, end_col} ->
|
||||
token = %Token{type: :hash_string, value: value, line: line, col: col}
|
||||
tokenize_loop(rest2, end_line, end_col, [token | acc])
|
||||
|
||||
{:error, msg} ->
|
||||
{:error, msg}
|
||||
end
|
||||
end
|
||||
|
||||
# ---------- splice-unquote ~@ (must come before unquote ~) ----------
|
||||
defp tokenize_loop([?~, ?@ | rest], line, col, acc) do
|
||||
token = %Token{type: :splice_unquote, value: "~@", line: line, col: col}
|
||||
tokenize_loop(rest, line, col + 2, [token | acc])
|
||||
end
|
||||
|
||||
# ---------- unquote ~ ----------
|
||||
defp tokenize_loop([?~ | rest], line, col, acc) do
|
||||
token = %Token{type: :unquote, value: "~", line: line, col: col}
|
||||
tokenize_loop(rest, line, col + 1, [token | acc])
|
||||
end
|
||||
|
||||
# ---------- delimiters ----------
|
||||
defp tokenize_loop([?( | rest], line, col, acc) do
|
||||
token = %Token{type: :lparen, value: "(", line: line, col: col}
|
||||
tokenize_loop(rest, line, col + 1, [token | acc])
|
||||
end
|
||||
|
||||
defp tokenize_loop([?) | rest], line, col, acc) do
|
||||
token = %Token{type: :rparen, value: ")", line: line, col: col}
|
||||
tokenize_loop(rest, line, col + 1, [token | acc])
|
||||
end
|
||||
|
||||
defp tokenize_loop([?[ | rest], line, col, acc) do
|
||||
token = %Token{type: :lbracket, value: "[", line: line, col: col}
|
||||
tokenize_loop(rest, line, col + 1, [token | acc])
|
||||
end
|
||||
|
||||
defp tokenize_loop([?] | rest], line, col, acc) do
|
||||
token = %Token{type: :rbracket, value: "]", line: line, col: col}
|
||||
tokenize_loop(rest, line, col + 1, [token | acc])
|
||||
end
|
||||
|
||||
defp tokenize_loop([?{ | rest], line, col, acc) do
|
||||
token = %Token{type: :lbrace, value: "{", line: line, col: col}
|
||||
tokenize_loop(rest, line, col + 1, [token | acc])
|
||||
end
|
||||
|
||||
defp tokenize_loop([?} | rest], line, col, acc) do
|
||||
token = %Token{type: :rbrace, value: "}", line: line, col: col}
|
||||
tokenize_loop(rest, line, col + 1, [token | acc])
|
||||
end
|
||||
|
||||
# ---------- quote ' ----------
|
||||
defp tokenize_loop([?' | rest], line, col, acc) do
|
||||
token = %Token{type: :quote, value: "'", line: line, col: col}
|
||||
tokenize_loop(rest, line, col + 1, [token | acc])
|
||||
end
|
||||
|
||||
# ---------- quasiquote ` ----------
|
||||
defp tokenize_loop([?` | rest], line, col, acc) do
|
||||
token = %Token{type: :quasiquote, value: "`", line: line, col: col}
|
||||
tokenize_loop(rest, line, col + 1, [token | acc])
|
||||
end
|
||||
|
||||
# ---------- metadata ^ ----------
|
||||
defp tokenize_loop([?^ | rest], line, col, acc) do
|
||||
token = %Token{type: :meta, value: "^", line: line, col: col}
|
||||
tokenize_loop(rest, line, col + 1, [token | acc])
|
||||
end
|
||||
|
||||
# ---------- deref @ ----------
|
||||
defp tokenize_loop([?@ | rest], line, col, acc) do
|
||||
token = %Token{type: :deref, value: "@", line: line, col: col}
|
||||
tokenize_loop(rest, line, col + 1, [token | acc])
|
||||
end
|
||||
|
||||
# ---------- keywords ----------
|
||||
defp tokenize_loop([?: | rest], line, col, acc) do
|
||||
case read_keyword(rest, line, col) do
|
||||
{:ok, kw_value, rest2, end_col} ->
|
||||
token = %Token{type: :keyword, value: kw_value, line: line, col: col}
|
||||
tokenize_loop(rest2, line, end_col, [token | acc])
|
||||
|
||||
{:error, msg} ->
|
||||
{:error, msg}
|
||||
end
|
||||
end
|
||||
|
||||
# ---------- negative numbers: -<digit> ----------
|
||||
# Since whitespace is always consumed before reaching tokenize_loop,
|
||||
# a standalone `-` followed by a digit is always a negative number literal.
|
||||
# The `-` inside symbol names (like `my-func`) is consumed by the symbol reader
|
||||
# and never reaches this clause as a standalone character.
|
||||
defp tokenize_loop([?- | rest], line, col, acc) do
|
||||
if starts_with_digit?(rest) do
|
||||
{:ok, token, rest2, end_col} = read_number(rest, line, col + 1, [?-])
|
||||
token = %{token | line: line, col: col}
|
||||
tokenize_loop(rest2, line, end_col, [token | acc])
|
||||
else
|
||||
# It's a symbol starting with -
|
||||
case read_symbol([?- | rest], line, col) do
|
||||
{:ok, token, rest2, end_col} ->
|
||||
tokenize_loop(rest2, line, end_col, [token | acc])
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# ---------- numbers ----------
|
||||
defp tokenize_loop([c | _] = chars, line, col, acc) when c in ?0..?9 do
|
||||
{:ok, token, rest2, end_col} = read_number(chars, line, col, [])
|
||||
tokenize_loop(rest2, line, end_col, [token | acc])
|
||||
end
|
||||
|
||||
# ---------- symbols (and true/false/nil) ----------
|
||||
defp tokenize_loop([c | _] = chars, line, col, acc)
|
||||
when c in ?a..?z or c in ?A..?Z or
|
||||
c == ?_ or c == ?* or c == ?! or c == ?? or
|
||||
c == ?< or c == ?> or c == ?= or c == ?+ or
|
||||
c == ?. or c == ?& or c == ?% do
|
||||
case read_symbol(chars, line, col) do
|
||||
{:ok, token, rest, end_col} ->
|
||||
tokenize_loop(rest, line, end_col, [token | acc])
|
||||
end
|
||||
end
|
||||
|
||||
# ---------- catch-all: unexpected character ----------
|
||||
defp tokenize_loop([c | _], line, col, _acc) do
|
||||
{:error, "Unexpected character '#{<<c::utf8>>}' at line #{line}, col #{col}"}
|
||||
end
|
||||
|
||||
# ── Tokenizer helpers ───────────────────────────────────────────────
|
||||
|
||||
# Characters that can continue a symbol (after the start)
|
||||
defp symbol_continue_char?(c) when c in ?a..?z, do: true
|
||||
defp symbol_continue_char?(c) when c in ?A..?Z, do: true
|
||||
defp symbol_continue_char?(c) when c in ?0..?9, do: true
|
||||
defp symbol_continue_char?(c) when c in [?_, ?*, ?!, ??, ?<, ?>, ?=, ?+, ?-, ?/, ?., ?%, ?&, ?#], do: true
|
||||
defp symbol_continue_char?(_), do: false
|
||||
|
||||
defp starts_with_digit?([c | _]) when c in ?0..?9, do: true
|
||||
defp starts_with_digit?(_), do: false
|
||||
|
||||
defp skip_comment([?\n | _] = rest), do: rest
|
||||
defp skip_comment([?\r | _] = rest), do: rest
|
||||
defp skip_comment([]), do: []
|
||||
defp skip_comment([_ | rest]), do: skip_comment(rest)
|
||||
|
||||
# ── String literal reader ──────────────────────────────────────────
|
||||
|
||||
defp read_string_literal([], line, _col, _acc),
|
||||
do: {:error, "Unterminated string starting at line #{line}"}
|
||||
|
||||
defp read_string_literal([?" | rest], line, col, acc),
|
||||
do: {:ok, IO.chardata_to_string(Enum.reverse(acc)), rest, line, col + 1}
|
||||
|
||||
defp read_string_literal([?\\, ?" | rest], line, col, acc),
|
||||
do: read_string_literal(rest, line, col + 2, [?" | acc])
|
||||
|
||||
defp read_string_literal([?\\, ?\\ | rest], line, col, acc),
|
||||
do: read_string_literal(rest, line, col + 2, [?\\ | acc])
|
||||
|
||||
defp read_string_literal([?\\, ?n | rest], line, col, acc),
|
||||
do: read_string_literal(rest, line, col + 2, [?\n | acc])
|
||||
|
||||
defp read_string_literal([?\\, ?t | rest], line, col, acc),
|
||||
do: read_string_literal(rest, line, col + 2, [?\t | acc])
|
||||
|
||||
defp read_string_literal([?\\, ?r | rest], line, col, acc),
|
||||
do: read_string_literal(rest, line, col + 2, [?\r | acc])
|
||||
|
||||
defp read_string_literal([?\n | rest], line, _col, acc),
|
||||
do: read_string_literal(rest, line + 1, 1, [?\n | acc])
|
||||
|
||||
defp read_string_literal([c | rest], line, col, acc),
|
||||
do: read_string_literal(rest, line, col + 1, [c | acc])
|
||||
|
||||
# ── Keyword reader ─────────────────────────────────────────────────
|
||||
|
||||
# Quoted keyword: :"some-name"
|
||||
defp read_keyword([?" | rest], line, col) do
|
||||
case read_string_literal(rest, line, col + 2, []) do
|
||||
{:ok, value, rest2, _end_line, end_col} ->
|
||||
{:ok, String.to_atom(value), rest2, end_col}
|
||||
|
||||
{:error, msg} ->
|
||||
{:error, msg}
|
||||
end
|
||||
end
|
||||
|
||||
# Regular keyword: :name, :my-key, :ok
|
||||
defp read_keyword(chars, _line, col) do
|
||||
{name_chars, rest} = take_keyword_chars(chars, [])
|
||||
|
||||
case name_chars do
|
||||
[] ->
|
||||
{:error, "Expected keyword name after ':'"}
|
||||
|
||||
_ ->
|
||||
name = IO.chardata_to_string(Enum.reverse(name_chars))
|
||||
atom_val = String.to_atom(name)
|
||||
{:ok, atom_val, rest, col + 1 + length(name_chars)}
|
||||
end
|
||||
end
|
||||
|
||||
defp take_keyword_chars([c | rest], acc) when c in ?a..?z or c in ?A..?Z or c in ?0..?9 or c in [?_, ?-, ?!, ??, ?., ?/, ?*, ?+, ?>, ?<, ?=, ?&, ?#],
|
||||
do: take_keyword_chars(rest, [c | acc])
|
||||
|
||||
defp take_keyword_chars(rest, acc), do: {acc, rest}
|
||||
|
||||
# ── Number reader ──────────────────────────────────────────────────
|
||||
|
||||
defp read_number(chars, line, col, prefix) do
|
||||
{digit_chars, rest} = take_digits(chars, prefix)
|
||||
|
||||
case rest do
|
||||
[?. | after_dot] ->
|
||||
case after_dot do
|
||||
[d | _] when d in ?0..?9 ->
|
||||
{frac_chars, rest2} = take_digits(after_dot, [?. | digit_chars])
|
||||
str = IO.chardata_to_string(Enum.reverse(frac_chars))
|
||||
{float_val, ""} = Float.parse(str)
|
||||
end_col = col + String.length(str) - length(prefix)
|
||||
token = %Token{type: :float, value: float_val, line: line, col: col}
|
||||
{:ok, token, rest2, end_col}
|
||||
|
||||
_ ->
|
||||
# dot not followed by digit — just an integer, leave dot for next token
|
||||
str = IO.chardata_to_string(Enum.reverse(digit_chars))
|
||||
{int_val, ""} = Integer.parse(str)
|
||||
end_col = col + String.length(str) - length(prefix)
|
||||
token = %Token{type: :integer, value: int_val, line: line, col: col}
|
||||
{:ok, token, rest, end_col}
|
||||
end
|
||||
|
||||
_ ->
|
||||
str = IO.chardata_to_string(Enum.reverse(digit_chars))
|
||||
{int_val, ""} = Integer.parse(str)
|
||||
end_col = col + String.length(str) - length(prefix)
|
||||
token = %Token{type: :integer, value: int_val, line: line, col: col}
|
||||
{:ok, token, rest, end_col}
|
||||
end
|
||||
end
|
||||
|
||||
defp take_digits([c | rest], acc) when c in ?0..?9,
|
||||
do: take_digits(rest, [c | acc])
|
||||
|
||||
defp take_digits(rest, acc), do: {acc, rest}
|
||||
|
||||
# ── Symbol reader ──────────────────────────────────────────────────
|
||||
|
||||
defp read_symbol(chars, line, col) do
|
||||
{sym_chars, rest} = take_symbol_chars(chars, [])
|
||||
name = IO.chardata_to_string(Enum.reverse(sym_chars))
|
||||
end_col = col + String.length(name)
|
||||
|
||||
token =
|
||||
case name do
|
||||
"true" -> %Token{type: :boolean, value: true, line: line, col: col}
|
||||
"false" -> %Token{type: :boolean, value: false, line: line, col: col}
|
||||
"nil" -> %Token{type: :nil, value: nil, line: line, col: col}
|
||||
_ -> %Token{type: :symbol, value: name, line: line, col: col}
|
||||
end
|
||||
|
||||
{:ok, token, rest, end_col}
|
||||
end
|
||||
|
||||
defp take_symbol_chars([c | rest], acc) do
|
||||
if (acc == [] && symbol_start_char?(c)) || (acc != [] && symbol_continue_char?(c)) do
|
||||
take_symbol_chars(rest, [c | acc])
|
||||
else
|
||||
{acc, [c | rest]}
|
||||
end
|
||||
end
|
||||
|
||||
defp take_symbol_chars([], acc), do: {acc, []}
|
||||
|
||||
defp symbol_start_char?(c) when c in ?a..?z, do: true
|
||||
defp symbol_start_char?(c) when c in ?A..?Z, do: true
|
||||
defp symbol_start_char?(c) when c in [?_, ?*, ?!, ??, ?<, ?>, ?=, ?+, ?-, ?., ?&, ?%], do: true
|
||||
defp symbol_start_char?(_), do: false
|
||||
|
||||
# ════════════════════════════════════════════════════════════════════
|
||||
# PARSER — Recursive Descent
|
||||
# ════════════════════════════════════════════════════════════════════
|
||||
|
||||
# Parse all top-level forms until tokens are exhausted
|
||||
defp parse_all([], acc), do: {:ok, Enum.reverse(acc)}
|
||||
|
||||
defp parse_all(tokens, acc) do
|
||||
case parse_form(tokens) do
|
||||
{:ok, form, rest} ->
|
||||
parse_all(rest, [form | acc])
|
||||
|
||||
{:error, _} = err ->
|
||||
err
|
||||
end
|
||||
end
|
||||
|
||||
# ── Parse a single form ────────────────────────────────────────────
|
||||
|
||||
# Literals
|
||||
defp parse_form([%Token{type: :integer, value: v} | rest]),
|
||||
do: {:ok, v, rest}
|
||||
|
||||
defp parse_form([%Token{type: :float, value: v} | rest]),
|
||||
do: {:ok, v, rest}
|
||||
|
||||
defp parse_form([%Token{type: :string, value: v} | rest]),
|
||||
do: {:ok, v, rest}
|
||||
|
||||
defp parse_form([%Token{type: :keyword, value: v} | rest]),
|
||||
do: {:ok, v, rest}
|
||||
|
||||
defp parse_form([%Token{type: :boolean, value: v} | rest]),
|
||||
do: {:ok, v, rest}
|
||||
|
||||
defp parse_form([%Token{type: :nil} | rest]),
|
||||
do: {:ok, nil, rest}
|
||||
|
||||
# Symbol
|
||||
defp parse_form([%Token{type: :symbol, value: name, line: l, col: c} | rest]),
|
||||
do: {:ok, {:symbol, %{line: l, col: c}, name}, rest}
|
||||
|
||||
# List ( ... )
|
||||
defp parse_form([%Token{type: :lparen, line: l, col: c} | rest]) do
|
||||
case parse_until(rest, :rparen) do
|
||||
{:ok, elements, rest2} ->
|
||||
{:ok, {:list, %{line: l, col: c}, elements}, rest2}
|
||||
|
||||
{:error, _} = err ->
|
||||
err
|
||||
end
|
||||
end
|
||||
|
||||
# Vector [ ... ]
|
||||
defp parse_form([%Token{type: :lbracket, line: l, col: c} | rest]) do
|
||||
case parse_until(rest, :rbracket) do
|
||||
{:ok, elements, rest2} ->
|
||||
{:ok, {:vector, %{line: l, col: c}, elements}, rest2}
|
||||
|
||||
{:error, _} = err ->
|
||||
err
|
||||
end
|
||||
end
|
||||
|
||||
# Map { ... }
|
||||
defp parse_form([%Token{type: :lbrace, line: l, col: c} | rest]) do
|
||||
case parse_until(rest, :rbrace) do
|
||||
{:ok, elements, rest2} ->
|
||||
{:ok, {:map, %{line: l, col: c}, elements}, rest2}
|
||||
|
||||
{:error, _} = err ->
|
||||
err
|
||||
end
|
||||
end
|
||||
|
||||
# Set #{ ... }
|
||||
defp parse_form([%Token{type: :hash_lbrace, line: l, col: c} | rest]) do
|
||||
case parse_until(rest, :rbrace) do
|
||||
{:ok, elements, rest2} ->
|
||||
{:ok, {:set, %{line: l, col: c}, elements}, rest2}
|
||||
|
||||
{:error, _} = err ->
|
||||
err
|
||||
end
|
||||
end
|
||||
|
||||
# BEAM tuple #el[ ... ]
|
||||
defp parse_form([%Token{type: :hash_el_lbracket, line: l, col: c} | rest]) do
|
||||
case parse_until(rest, :rbracket) do
|
||||
{:ok, elements, rest2} ->
|
||||
{:ok, {:tuple, %{line: l, col: c}, elements}, rest2}
|
||||
|
||||
{:error, _} = err ->
|
||||
err
|
||||
end
|
||||
end
|
||||
|
||||
# Anonymous function #( ... )
|
||||
defp parse_form([%Token{type: :hash_lparen, line: l, col: c} | rest]) do
|
||||
case parse_until(rest, :rparen) do
|
||||
{:ok, elements, rest2} ->
|
||||
body = {:list, %{line: l, col: c}, elements}
|
||||
{:ok, {:anon_fn, %{line: l, col: c}, body}, rest2}
|
||||
|
||||
{:error, _} = err ->
|
||||
err
|
||||
end
|
||||
end
|
||||
|
||||
# Regex #"..."
|
||||
defp parse_form([%Token{type: :hash_string, value: pattern, line: l, col: c} | rest]),
|
||||
do: {:ok, {:regex, %{line: l, col: c}, pattern}, rest}
|
||||
|
||||
# Quote '
|
||||
defp parse_form([%Token{type: :quote, line: l, col: c} | rest]) do
|
||||
case parse_form(rest) do
|
||||
{:ok, form, rest2} ->
|
||||
{:ok, {:quote, %{line: l, col: c}, form}, rest2}
|
||||
|
||||
{:error, _} = err ->
|
||||
err
|
||||
end
|
||||
end
|
||||
|
||||
# Quasiquote `
|
||||
defp parse_form([%Token{type: :quasiquote, line: l, col: c} | rest]) do
|
||||
case parse_form(rest) do
|
||||
{:ok, form, rest2} ->
|
||||
{:ok, {:quasiquote, %{line: l, col: c}, form}, rest2}
|
||||
|
||||
{:error, _} = err ->
|
||||
err
|
||||
end
|
||||
end
|
||||
|
||||
# Unquote ~
|
||||
defp parse_form([%Token{type: :unquote, line: l, col: c} | rest]) do
|
||||
case parse_form(rest) do
|
||||
{:ok, form, rest2} ->
|
||||
{:ok, {:unquote, %{line: l, col: c}, form}, rest2}
|
||||
|
||||
{:error, _} = err ->
|
||||
err
|
||||
end
|
||||
end
|
||||
|
||||
# Splice-unquote ~@
|
||||
defp parse_form([%Token{type: :splice_unquote, line: l, col: c} | rest]) do
|
||||
case parse_form(rest) do
|
||||
{:ok, form, rest2} ->
|
||||
{:ok, {:splice_unquote, %{line: l, col: c}, form}, rest2}
|
||||
|
||||
{:error, _} = err ->
|
||||
err
|
||||
end
|
||||
end
|
||||
|
||||
# Deref @
|
||||
defp parse_form([%Token{type: :deref, line: l, col: c} | rest]) do
|
||||
case parse_form(rest) do
|
||||
{:ok, form, rest2} ->
|
||||
{:ok, {:deref, %{line: l, col: c}, form}, rest2}
|
||||
|
||||
{:error, _} = err ->
|
||||
err
|
||||
end
|
||||
end
|
||||
|
||||
# Metadata ^
|
||||
defp parse_form([%Token{type: :meta, line: l, col: c} | rest]) do
|
||||
case parse_meta_value(rest, l, c) do
|
||||
{:ok, meta_form, rest2} ->
|
||||
case parse_form(rest2) do
|
||||
{:ok, target, rest3} ->
|
||||
{:ok, {:with_meta, %{line: l, col: c}, {meta_form, target}}, rest3}
|
||||
|
||||
{:error, _} = err ->
|
||||
err
|
||||
end
|
||||
|
||||
{:error, _} = err ->
|
||||
err
|
||||
end
|
||||
end
|
||||
|
||||
# Unexpected token
|
||||
defp parse_form([%Token{type: type, line: l, col: c} | _]),
|
||||
do: {:error, "Unexpected token #{type} at line #{l}, col #{c}"}
|
||||
|
||||
defp parse_form([]),
|
||||
do: {:error, "Unexpected end of input"}
|
||||
|
||||
# ── Parse helpers ──────────────────────────────────────────────────
|
||||
|
||||
# Parse elements until a closing delimiter token type is found
|
||||
defp parse_until(tokens, closer) do
|
||||
parse_until_loop(tokens, closer, [])
|
||||
end
|
||||
|
||||
defp parse_until_loop([], closer, _acc) do
|
||||
name = delimiter_name(closer)
|
||||
{:error, "Unexpected end of input, expected '#{name}'"}
|
||||
end
|
||||
|
||||
defp parse_until_loop([%Token{type: type} | rest], closer, acc) when type == closer do
|
||||
{:ok, Enum.reverse(acc), rest}
|
||||
end
|
||||
|
||||
defp parse_until_loop(tokens, closer, acc) do
|
||||
case parse_form(tokens) do
|
||||
{:ok, form, rest} ->
|
||||
parse_until_loop(rest, closer, [form | acc])
|
||||
|
||||
{:error, _} = err ->
|
||||
err
|
||||
end
|
||||
end
|
||||
|
||||
# Parse the value after ^ (metadata)
|
||||
# ^{...} — map metadata
|
||||
defp parse_meta_value([%Token{type: :lbrace, line: l, col: c} | rest], _ml, _mc) do
|
||||
case parse_until(rest, :rbrace) do
|
||||
{:ok, elements, rest2} ->
|
||||
{:ok, {:map, %{line: l, col: c}, elements}, rest2}
|
||||
|
||||
{:error, _} = err ->
|
||||
err
|
||||
end
|
||||
end
|
||||
|
||||
# ^:keyword — sugar for ^{:keyword true}
|
||||
defp parse_meta_value([%Token{type: :keyword, value: kw, line: l, col: c} | rest], _ml, _mc) do
|
||||
meta_map = {:map, %{line: l, col: c}, [kw, true]}
|
||||
{:ok, meta_map, rest}
|
||||
end
|
||||
|
||||
# ^symbol — sugar for ^{:tag symbol}
|
||||
defp parse_meta_value([%Token{type: :symbol} | _] = tokens, _ml, _mc) do
|
||||
case parse_form(tokens) do
|
||||
{:ok, form, rest} -> {:ok, form, rest}
|
||||
{:error, _} = err -> err
|
||||
end
|
||||
end
|
||||
|
||||
defp parse_meta_value(_tokens, ml, mc) do
|
||||
{:error, "Expected metadata value (map, keyword, or symbol) at line #{ml}, col #{mc}"}
|
||||
end
|
||||
|
||||
defp delimiter_name(:rparen), do: ")"
|
||||
defp delimiter_name(:rbracket), do: "]"
|
||||
defp delimiter_name(:rbrace), do: "}"
|
||||
end
|
||||
Reference in New Issue
Block a user