243 lines
6.3 KiB
Clojure
243 lines
6.3 KiB
Clojure
(ns String
|
|
"Elixir String module — UTF-8 string operations.
|
|
|
|
In CljElixir: (String/split s \" \"), (String/trim s), etc.
|
|
Strings in Elixir are UTF-8 encoded binaries.")
|
|
|
|
;; --- Searching ---
|
|
|
|
(defn contains?
|
|
"Returns true if `string` contains `pattern`.
|
|
(String/contains? \"hello world\" \"world\") ;=> true
|
|
(String/contains? \"hello\" [\"x\" \"e\"]) ;=> true (any match)"
|
|
[string pattern])
|
|
|
|
(defn starts-with?
|
|
"Returns true if `string` starts with `prefix`.
|
|
(String/starts-with? \"hello\" \"he\") ;=> true"
|
|
[string prefix])
|
|
|
|
(defn ends-with?
|
|
"Returns true if `string` ends with `suffix`.
|
|
(String/ends-with? \"hello\" \"lo\") ;=> true"
|
|
[string suffix])
|
|
|
|
(defn match?
|
|
"Returns true if `string` matches the regex `pattern`.
|
|
(String/match? \"hello123\" ~r/\\d+/) ;=> true"
|
|
[string pattern])
|
|
|
|
;; --- Splitting & Joining ---
|
|
|
|
(defn split
|
|
"Splits `string` by `pattern`. Without a pattern splits on whitespace.
|
|
(String/split \"a,b,c\" \",\") ;=> [\"a\" \"b\" \"c\"]
|
|
(String/split \"a,b,c\" \",\" 2) ;=> [\"a\" \"b,c\"]"
|
|
([string])
|
|
([string pattern])
|
|
([string pattern parts]))
|
|
|
|
(defn split-at
|
|
"Splits `string` at `position`.
|
|
(String/split-at \"hello\" 3) ;=> {\"hel\" \"lo\"}"
|
|
[string position])
|
|
|
|
;; --- Transformation ---
|
|
|
|
(defn replace
|
|
"Replaces occurrences of `pattern` in `string` with `replacement`.
|
|
(String/replace \"hello world\" \"world\" \"elixir\") ;=> \"hello elixir\"
|
|
(String/replace \"aabba\" ~r/a/ \"x\") ;=> \"xxbbx\""
|
|
[string pattern replacement])
|
|
|
|
(defn replace-prefix
|
|
"Replaces prefix if it matches.
|
|
(String/replace-prefix \"hello\" \"he\" \"HE\") ;=> \"HEllo\""
|
|
[string match replacement])
|
|
|
|
(defn replace-suffix
|
|
"Replaces suffix if it matches.
|
|
(String/replace-suffix \"hello\" \"lo\" \"LO\") ;=> \"helLO\""
|
|
[string match replacement])
|
|
|
|
(defn replace-leading
|
|
"Replaces all leading occurrences of `match` with `replacement`."
|
|
[string match replacement])
|
|
|
|
(defn replace-trailing
|
|
"Replaces all trailing occurrences of `match` with `replacement`."
|
|
[string match replacement])
|
|
|
|
(defn upcase
|
|
"Converts string to uppercase.
|
|
(String/upcase \"hello\") ;=> \"HELLO\""
|
|
[string])
|
|
|
|
(defn downcase
|
|
"Converts string to lowercase.
|
|
(String/downcase \"HELLO\") ;=> \"hello\""
|
|
[string])
|
|
|
|
(defn capitalize
|
|
"Capitalizes the first character, downcases the rest.
|
|
(String/capitalize \"hello world\") ;=> \"Hello world\""
|
|
[string])
|
|
|
|
(defn reverse
|
|
"Reverses the string (grapheme-aware for Unicode).
|
|
(String/reverse \"hello\") ;=> \"olleh\""
|
|
[string])
|
|
|
|
(defn duplicate
|
|
"Repeats `string` `n` times.
|
|
(String/duplicate \"ha\" 3) ;=> \"hahaha\""
|
|
[string n])
|
|
|
|
(defn pad-leading
|
|
"Pads `string` on the left to `count` characters.
|
|
(String/pad-leading \"13\" 5 \"0\") ;=> \"00013\""
|
|
([string count])
|
|
([string count padding]))
|
|
|
|
(defn pad-trailing
|
|
"Pads `string` on the right to `count` characters.
|
|
(String/pad-trailing \"hi\" 5) ;=> \"hi \""
|
|
([string count])
|
|
([string count padding]))
|
|
|
|
;; --- Trimming ---
|
|
|
|
(defn trim
|
|
"Removes leading and trailing whitespace (or specified characters).
|
|
(String/trim \" hello \") ;=> \"hello\""
|
|
([string])
|
|
([string to-trim]))
|
|
|
|
(defn trim-leading
|
|
"Removes leading whitespace.
|
|
(String/trim-leading \" hello\") ;=> \"hello\""
|
|
([string])
|
|
([string to-trim]))
|
|
|
|
(defn trim-trailing
|
|
"Removes trailing whitespace.
|
|
(String/trim-trailing \"hello \") ;=> \"hello\""
|
|
([string])
|
|
([string to-trim]))
|
|
|
|
;; --- Slicing & Access ---
|
|
|
|
(defn slice
|
|
"Returns a substring starting at `start` for `length` characters.
|
|
(String/slice \"hello\" 1 3) ;=> \"ell\"
|
|
(String/slice \"hello\" 1..3) ;=> \"ell\""
|
|
([string range])
|
|
([string start length]))
|
|
|
|
(defn at
|
|
"Returns the grapheme at `position`. Negative indices count from end.
|
|
(String/at \"hello\" 1) ;=> \"e\"
|
|
(String/at \"hello\" -1) ;=> \"o\""
|
|
[string position])
|
|
|
|
(defn first
|
|
"Returns the first grapheme.
|
|
(String/first \"hello\") ;=> \"h\""
|
|
[string])
|
|
|
|
(defn last
|
|
"Returns the last grapheme.
|
|
(String/last \"hello\") ;=> \"o\""
|
|
[string])
|
|
|
|
(defn length
|
|
"Returns the number of Unicode graphemes.
|
|
(String/length \"héllo\") ;=> 5"
|
|
[string])
|
|
|
|
(defn byte-size
|
|
"Returns the number of bytes in the string.
|
|
(String/byte-size \"héllo\") ;=> 6 (é is 2 bytes in UTF-8)"
|
|
[string])
|
|
|
|
(defn graphemes
|
|
"Returns a list of grapheme clusters.
|
|
(String/graphemes \"hello\") ;=> [\"h\" \"e\" \"l\" \"l\" \"o\"]"
|
|
[string])
|
|
|
|
(defn codepoints
|
|
"Returns a list of codepoints.
|
|
(String/codepoints \"hello\") ;=> [\"h\" \"e\" \"l\" \"l\" \"o\"]"
|
|
[string])
|
|
|
|
(defn next-grapheme
|
|
"Returns tuple {grapheme rest} or nil.
|
|
(String/next-grapheme \"abc\") ;=> {\"a\" \"bc\"}"
|
|
[string])
|
|
|
|
(defn next-codepoint
|
|
"Returns tuple {codepoint rest} or nil."
|
|
[string])
|
|
|
|
;; --- Conversion ---
|
|
|
|
(defn to-integer
|
|
"Converts string to integer.
|
|
(String/to-integer \"123\") ;=> 123
|
|
(String/to-integer \"FF\" 16) ;=> 255"
|
|
([string])
|
|
([string base]))
|
|
|
|
(defn to-float
|
|
"Converts string to float.
|
|
(String/to-float \"3.14\") ;=> 3.14"
|
|
[string])
|
|
|
|
(defn to-atom
|
|
"Converts string to an existing atom.
|
|
(String/to-atom \"hello\") ;=> :hello"
|
|
[string])
|
|
|
|
(defn to-existing-atom
|
|
"Converts string to an existing atom. Raises if atom doesn't exist.
|
|
(String/to-existing-atom \"hello\") ;=> :hello"
|
|
[string])
|
|
|
|
(defn to-charlist
|
|
"Converts string to a charlist.
|
|
(String/to-charlist \"hello\") ;=> 'hello'"
|
|
[string])
|
|
|
|
(defn myers-difference
|
|
"Returns a keyword list of edit steps to transform string1 into string2.
|
|
(String/myers-difference \"abc\" \"adc\") ;=> [[:eq \"a\"] [:del \"b\"] [:ins \"d\"] [:eq \"c\"]]"
|
|
[string1 string2])
|
|
|
|
(defn valid?
|
|
"Returns true if `string` is a valid UTF-8 string.
|
|
(String/valid? \"hello\") ;=> true"
|
|
[string])
|
|
|
|
(defn printable?
|
|
"Returns true if `string` consists only of printable characters.
|
|
(String/printable? \"hello\") ;=> true"
|
|
[string])
|
|
|
|
(defn equivalent?
|
|
"Returns true if two strings are equivalent ignoring Unicode normalization differences."
|
|
[string1 string2])
|
|
|
|
(defn bag-distance
|
|
"Returns the bag distance between two strings (simple edit distance metric)."
|
|
[string1 string2])
|
|
|
|
(defn jaro-distance
|
|
"Returns the Jaro distance between two strings (0.0 to 1.0).
|
|
(String/jaro-distance \"Dwayne\" \"Duane\") ;=> 0.822..."
|
|
[string1 string2])
|
|
|
|
(defn chunk
|
|
"Splits string into chunks by character type.
|
|
(String/chunk \"abc123def\" :valid) ;=> [\"abc123def\"]"
|
|
[string mode])
|