Files
2026-02-08 11:20:43 -10:00

239 lines
5.1 KiB
Clojure
Vendored

;; # Tablecloth Column Exploration
^{:kind/hidden true}
(ns intro
(:require [tablecloth.api :as tc]
[scicloj.clay.v2.api :as clay]
[scicloj.kindly.v3.api :as kindly]
[scicloj.kindly.v3.kind :as kind]))
^{:kind/hidden true}
(clay/start!)
^{:kind/hidden true}
(comment
(clay/show-doc! "docs/column_exploration.clj" {:hide-doc? true})
(clay/write-html! "docs/column_exploration.html")
,)
;; ## What is this exploration?
;;
;; We want to add a `column` entity to tablecloth that parallels `dataset`. It will make
;; the column a first-class entity within tablecloth.
;; ## Usage
(require '[tablecloth.column.api :refer [column] :as col])
;; ### Column creation
;; We can create an empty column like this:
(column)
;; We can check if it it's a column.
(col/column? (column))
;; We can create a columns with data in a number of ways
(column [1 2 3 4])
(column (range 10))
;; When you do this the types of the resulting array is determined
;; automatically from the items provided.
(let [int-column (column (range 10))]
(col/typeof int-column))
(let [string-column (column ["foo" "bar"])]
(col/typeof string-column))
;; ### Basic Operations
;; Operations are right now in their own namespace
(require '[tablecloth.column.api.operators :as ops])
;; With that imported we can perform a large number of operations:
(def a (column [20 30 40 50]))
(def b (column (range 4)))
(ops/- a b)
(ops/pow a 2)
(ops/* 10 (ops/sin a))
(ops/< a 35)
;; All these operations take a column as their first argument and
;; return a column, so they can be chained easily.
(-> a
(ops/* b)
(ops/< 70))
;; ### Subsetting and accesssing
;; You can access an element in a column in exactly the same ways you
;; would in Clojure.
(def myclm (column (range 5)))
myclm
(myclm 2)
(nth myclm 2)
(get myclm 2)
;; #### Selecting multiple elements
;; There are two ways to select multiple elements from a column:
;; * If you need to select a continuous subset, you can use `slice`;
;; * if you may need to select diverse elements, use `select`.
;;
;; **Slice**
;; The `slice` method allows you to use indexes to specify a portion
;; of the column to extract.
(def myclm
(column (repeatedly 10 #(rand-int 10))))
myclm
(col/slice myclm 3 5)
;; It also supports negative indexing, making it possible to slice
;; from the end of the column:
(col/slice myclm -7 -5)
;; It's also possible to slice from one direction to the beginning or
;; end:
(col/slice myclm 7 :end)
(col/slice myclm -3 :end)
(col/slice myclm :start 7)
(col/slice myclm :start -3)
;; **Select**
;;
;; The `select` fn works by taking a list of index positions:
(col/select myclm [1 3 5 8])
;; We can combine this type of selection with the operations just
;; demonstrated to select certain values.
myclm
;; Let's see which positions are greter than 5.
(ops/> myclm 5)
;; We can use a column of boolean values like the one above with the `select` function as well. `select` will choose all the positions that are true. It's like supplying select a list of the index positions that hold true values.
(col/select myclm (ops/> myclm 5))
;; ### Iterating over a column
;; Many operations that you might want to perform on a column are
;; available in the `tablecloth.column.api.operators` namespace.
;; However, when there is a need to do something custom, you can also
;; interate over the column.
(defn calc-percent [x]
(/ x 100.0))
(col/column-map myclm calc-percent)
;; It's also possible to iterate over multiple columns by supplying a
;; vector of columns:
(-> [(column [5 6 7 8 9])
(column [1 2 3 4 5])]
(col/column-map (partial *)))
(comment
(-> (column [1 nil 2 3 nil 0])
(ops/* 10))
(-> (column [1 nil 2 3 nil 0])
(ops/max [10 10 10 10 10 10]))
(tech.v3.dataset.column/missing))
;; ### Sorting a column
;; You can use `sort-column` to sort a colum
(def myclm (column (repeatedly 10 #(rand-int 100))))
myclm
(col/sort-column myclm)
;; As you can see, sort-columns sorts in ascending order by default,
;; but you can also specify a different order using ordering keywords
;; `:asc` and `:desc`:
(col/sort-column myclm :desc)
;; Finally, sort can also accept a `comparator-fn`:
(let [c (column ["1" "100" "4" "-10"])]
(col/sort-column c (fn [a b]
(let [a (parse-long a)
b (parse-long b)]
(< a b)))))
;; ### Missing values
;; The column has built-in support for basic awareness and handling of
;; missing values. Columns will be scanned for missing values when
;; created.
(def myclm (column [10 nil -4 20 1000 nil -233]))
;; You can identify the set of index positions of missing values:
(col/missing myclm)
(col/count-missing myclm)
;; You can remove missing values:
(col/drop-missing myclm)
;; Or you can replace them:
(col/replace-missing myclm)
;; There are a range of built-in strategies:
(col/replace-missing myclm :midpoint)
;; And you can provide your own value using a specific value or fn:
(col/replace-missing myclm :value 555)
(col/replace-missing myclm :value (fn [col-without-missing]
(ops/mean col-without-missing)))