Files
2026-03-09 23:09:46 -04:00

243 lines
6.3 KiB
Clojure

(ns String
"Elixir String module — UTF-8 string operations.
In CljElixir: (String/split s \" \"), (String/trim s), etc.
Strings in Elixir are UTF-8 encoded binaries.")
;; --- Searching ---
(defn contains?
"Returns true if `string` contains `pattern`.
(String/contains? \"hello world\" \"world\") ;=> true
(String/contains? \"hello\" [\"x\" \"e\"]) ;=> true (any match)"
[string pattern])
(defn starts-with?
"Returns true if `string` starts with `prefix`.
(String/starts-with? \"hello\" \"he\") ;=> true"
[string prefix])
(defn ends-with?
"Returns true if `string` ends with `suffix`.
(String/ends-with? \"hello\" \"lo\") ;=> true"
[string suffix])
(defn match?
"Returns true if `string` matches the regex `pattern`.
(String/match? \"hello123\" ~r/\\d+/) ;=> true"
[string pattern])
;; --- Splitting & Joining ---
(defn split
"Splits `string` by `pattern`. Without a pattern splits on whitespace.
(String/split \"a,b,c\" \",\") ;=> [\"a\" \"b\" \"c\"]
(String/split \"a,b,c\" \",\" 2) ;=> [\"a\" \"b,c\"]"
([string])
([string pattern])
([string pattern parts]))
(defn split-at
"Splits `string` at `position`.
(String/split-at \"hello\" 3) ;=> {\"hel\" \"lo\"}"
[string position])
;; --- Transformation ---
(defn replace
"Replaces occurrences of `pattern` in `string` with `replacement`.
(String/replace \"hello world\" \"world\" \"elixir\") ;=> \"hello elixir\"
(String/replace \"aabba\" ~r/a/ \"x\") ;=> \"xxbbx\""
[string pattern replacement])
(defn replace-prefix
"Replaces prefix if it matches.
(String/replace-prefix \"hello\" \"he\" \"HE\") ;=> \"HEllo\""
[string match replacement])
(defn replace-suffix
"Replaces suffix if it matches.
(String/replace-suffix \"hello\" \"lo\" \"LO\") ;=> \"helLO\""
[string match replacement])
(defn replace-leading
"Replaces all leading occurrences of `match` with `replacement`."
[string match replacement])
(defn replace-trailing
"Replaces all trailing occurrences of `match` with `replacement`."
[string match replacement])
(defn upcase
"Converts string to uppercase.
(String/upcase \"hello\") ;=> \"HELLO\""
[string])
(defn downcase
"Converts string to lowercase.
(String/downcase \"HELLO\") ;=> \"hello\""
[string])
(defn capitalize
"Capitalizes the first character, downcases the rest.
(String/capitalize \"hello world\") ;=> \"Hello world\""
[string])
(defn reverse
"Reverses the string (grapheme-aware for Unicode).
(String/reverse \"hello\") ;=> \"olleh\""
[string])
(defn duplicate
"Repeats `string` `n` times.
(String/duplicate \"ha\" 3) ;=> \"hahaha\""
[string n])
(defn pad-leading
"Pads `string` on the left to `count` characters.
(String/pad-leading \"13\" 5 \"0\") ;=> \"00013\""
([string count])
([string count padding]))
(defn pad-trailing
"Pads `string` on the right to `count` characters.
(String/pad-trailing \"hi\" 5) ;=> \"hi \""
([string count])
([string count padding]))
;; --- Trimming ---
(defn trim
"Removes leading and trailing whitespace (or specified characters).
(String/trim \" hello \") ;=> \"hello\""
([string])
([string to-trim]))
(defn trim-leading
"Removes leading whitespace.
(String/trim-leading \" hello\") ;=> \"hello\""
([string])
([string to-trim]))
(defn trim-trailing
"Removes trailing whitespace.
(String/trim-trailing \"hello \") ;=> \"hello\""
([string])
([string to-trim]))
;; --- Slicing & Access ---
(defn slice
"Returns a substring starting at `start` for `length` characters.
(String/slice \"hello\" 1 3) ;=> \"ell\"
(String/slice \"hello\" 1..3) ;=> \"ell\""
([string range])
([string start length]))
(defn at
"Returns the grapheme at `position`. Negative indices count from end.
(String/at \"hello\" 1) ;=> \"e\"
(String/at \"hello\" -1) ;=> \"o\""
[string position])
(defn first
"Returns the first grapheme.
(String/first \"hello\") ;=> \"h\""
[string])
(defn last
"Returns the last grapheme.
(String/last \"hello\") ;=> \"o\""
[string])
(defn length
"Returns the number of Unicode graphemes.
(String/length \"héllo\") ;=> 5"
[string])
(defn byte-size
"Returns the number of bytes in the string.
(String/byte-size \"héllo\") ;=> 6 (é is 2 bytes in UTF-8)"
[string])
(defn graphemes
"Returns a list of grapheme clusters.
(String/graphemes \"hello\") ;=> [\"h\" \"e\" \"l\" \"l\" \"o\"]"
[string])
(defn codepoints
"Returns a list of codepoints.
(String/codepoints \"hello\") ;=> [\"h\" \"e\" \"l\" \"l\" \"o\"]"
[string])
(defn next-grapheme
"Returns tuple {grapheme rest} or nil.
(String/next-grapheme \"abc\") ;=> {\"a\" \"bc\"}"
[string])
(defn next-codepoint
"Returns tuple {codepoint rest} or nil."
[string])
;; --- Conversion ---
(defn to-integer
"Converts string to integer.
(String/to-integer \"123\") ;=> 123
(String/to-integer \"FF\" 16) ;=> 255"
([string])
([string base]))
(defn to-float
"Converts string to float.
(String/to-float \"3.14\") ;=> 3.14"
[string])
(defn to-atom
"Converts string to an existing atom.
(String/to-atom \"hello\") ;=> :hello"
[string])
(defn to-existing-atom
"Converts string to an existing atom. Raises if atom doesn't exist.
(String/to-existing-atom \"hello\") ;=> :hello"
[string])
(defn to-charlist
"Converts string to a charlist.
(String/to-charlist \"hello\") ;=> 'hello'"
[string])
(defn myers-difference
"Returns a keyword list of edit steps to transform string1 into string2.
(String/myers-difference \"abc\" \"adc\") ;=> [[:eq \"a\"] [:del \"b\"] [:ins \"d\"] [:eq \"c\"]]"
[string1 string2])
(defn valid?
"Returns true if `string` is a valid UTF-8 string.
(String/valid? \"hello\") ;=> true"
[string])
(defn printable?
"Returns true if `string` consists only of printable characters.
(String/printable? \"hello\") ;=> true"
[string])
(defn equivalent?
"Returns true if two strings are equivalent ignoring Unicode normalization differences."
[string1 string2])
(defn bag-distance
"Returns the bag distance between two strings (simple edit distance metric)."
[string1 string2])
(defn jaro-distance
"Returns the Jaro distance between two strings (0.0 to 1.0).
(String/jaro-distance \"Dwayne\" \"Duane\") ;=> 0.822..."
[string1 string2])
(defn chunk
"Splits string into chunks by character type.
(String/chunk \"abc123def\" :valid) ;=> [\"abc123def\"]"
[string mode])