Files
2026-02-08 11:20:43 -10:00

1896 lines
71 KiB
Clojure
Vendored

(ns tech.v3.dataset-test
(:require [tech.v3.datatype :as dtype]
[tech.v3.datatype.functional :as dfn]
[tech.v3.datatype.datetime :as dtype-dt]
[tech.v3.datatype.struct :as dt-struct]
[tech.v3.datatype.argops :as argops]
[tech.v3.datatype.bitmap :as bitmap]
[tech.v3.tensor :as dtt]
[tech.v3.dataset :as ds]
[tech.v3.dataset-api :as ds-api]
[tech.v3.dataset.protocols :as ds-proto]
[tech.v3.dataset.base :as ds-base]
[tech.v3.dataset.column :as ds-col]
[tech.v3.dataset.tensor :as ds-tens]
[tech.v3.dataset.string-table :as str-table]
[tech.v3.dataset.join :as ds-join]
[tech.v3.datatype.rolling :as rolling]
[tech.v3.dataset.test-utils :as test-utils]
[tech.v3.dataset.rolling :as ds-roll]
[tech.v3.dataset.column-filters :as cf]
[tech.v3.dataset.impl.column :as col-impl]
[tech.v3.dataset.print :as ds-print]
;;Loading multimethods required to load the files
[tech.v3.libs.poi]
[tech.v3.libs.fastexcel]
[tech.v3.io :as tech-io]
[taoensso.nippy :as nippy]
[clojure.test :refer [deftest is]]
[ham-fisted.api :as hamf]
[ham-fisted.lazy-noncaching :as lznc])
(:import [java.util List HashSet UUID Random BitSet Map]
[java.util.concurrent ConcurrentHashMap]
[java.time LocalDate]
[java.io File ByteArrayInputStream]
[tech.v3 TMD]
[ham_fisted IFnDef$OD IMutList]))
(deftest datatype-parser
(let [ds (ds/->dataset "test/data/datatype_parser.csv")]
(is (= :int16 (dtype/get-datatype (ds/column ds "id"))))
(is (= [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] (ds/column ds "id")))
(is (= :string (dtype/get-datatype (ds/column ds "char"))))
(is (= ["t", "f", "y", "n", "T", "F", "Y", "N", "A", "z"]
(ds/column ds "char")))
(is (= :string (dtype/get-datatype (ds/column ds "word"))))
(is (= ["true", "False", "YES", "NO", "positive", "negative", "yep", "not", "pos", "neg"]
(ds/column ds "word")))
(is (= :boolean (dtype/get-datatype (ds/column ds "bool"))))
(is (= [true, true, false, false, true, false, true, false, false, false]
(ds/column ds "bool")))
(is (= :string (dtype/get-datatype (ds/column ds "boolstr"))))
(is (= ["true", "true", "false", "false", "true", "false", "true", "false", "False", "false"]
(ds/column ds "boolstr")))
(is (= :string (dtype/get-datatype (ds/column ds "boolean"))))
(is (= ["t", "y", "n", "f", "true", "false", "positive", "negative", "negative", "negative"]
(ds/column ds "boolean"))))
(let [ds (ds/->dataset "test/data/datatype_parser.csv" {:parser-fn {"boolean" :boolean
"boolstr" :boolean}})]
(is (= :int16 (dtype/get-datatype (ds/column ds "id"))))
(is (= [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] (ds/column ds "id")))
(is (= :string (dtype/get-datatype (ds/column ds "char"))))
(is (= ["t", "f", "y", "n", "T", "F", "Y", "N", "A", "z"]
(ds/column ds "char")))
(is (= :string (dtype/get-datatype (ds/column ds "word"))))
(is (= ["true", "False", "YES", "NO", "positive", "negative", "yep", "not", "pos", "neg"]
(ds/column ds "word")))
(is (= :boolean (dtype/get-datatype (ds/column ds "bool"))))
(is (= [true, true, false, false, true, false, true, false, false, false]
(ds/column ds "boolean")))
(is (= :boolean (dtype/get-datatype (ds/column ds "boolstr"))))
(is (= [true, true, false, false, true, false, true, false, false, false]
(ds/column ds "boolstr")))
(is (= :boolean (dtype/get-datatype (ds/column ds "boolean"))))
(is (= [true, true, false, false, true, false, true, false, false, false]
(ds/column ds "boolean")))))
(deftest iterable
(let [ds (ds/->dataset (test-utils/mapseq-fruit-dataset))]
(is (= (ds/column-names ds)
(map ds-col/column-name (vals ds))))))
(deftest string-column-add-or-update
(let [ds (-> (ds/->dataset (test-utils/mapseq-fruit-dataset))
(ds/update-column :fruit-name (partial dtype/emap #(str (name %) "-fn") :string)))]
(is (= ["apple-fn" "apple-fn" "apple-fn" "mandarin-fn" "mandarin-fn"
"mandarin-fn" "mandarin-fn" "mandarin-fn" "apple-fn" "apple-fn"]
(->> (ds :fruit-name)
(take 10)
vec)))))
(deftest name-values-seq->dataset-test
(is (= [{:a 0.0, :b "a"} {:a 1.0, :b "b"} {:a 2.0, :b "c"}
{:a 3.0, :b "d"} {:a 4.0, :b "e"}]
(-> (ds/->dataset
{:a (double-array (range 5))
:b ["a" "b" "c" "d" "e"]})
(ds/mapseq-reader))))
(is (= #{4}
(-> (ds/->dataset
{:a (double-array (range 5))
:b ["a" "b" "c" "d"]})
(ds/missing)
(set))))
(is (= [{:a 0, :b "a"} {:a 1, :b "b"} {:a 2, :b "c"}
{:a 3, :b "d"} {:a 4, :b "e"}]
(-> (ds/->dataset
{:a (long-array (range 5))
:b ["a" "b" "c" "d" "e"]})
(ds/mapseq-reader)))))
(deftest unique-by-test
(let [ds (test-utils/mapseq-fruit-dataset)]
(is (= [7 4]
(dtype/shape (ds/unique-by ds :fruit-name))))
(is (= [7 4]
(dtype/shape (ds/unique-by-column ds :fruit-name))))
(is (= #{:apple :orange :lemon :mandarin}
(-> (ds/column (ds/unique-by-column ds :fruit-name)
:fruit-name)
set)))
(is (= [7 24]
(dtype/shape (ds/unique-by ds :width))))
(is (= [7 24]
(dtype/shape (ds/unique-by-column ds :width))))
(is (dfn/equals [5.8 5.9 6.0 6.1 6.2 6.3 6.5
6.7 6.8 6.9 7.0 7.1 7.2 7.3
7.4 7.5 7.6 7.7 7.8 8.0 8.4
9.0 9.2 9.6]
(->> (ds/column (ds/unique-by-column ds :width)
:width)
sort
vec)))))
(deftest ds-concat-nil-pun
(let [ds (-> (ds/->dataset (test-utils/mapseq-fruit-dataset))
(ds/select :all (range 10)))
d1 (ds/concat nil ds)
d2 (ds/concat ds nil nil)
nothing (ds/concat nil nil nil)]
(is (= (vec (ds :fruit-name))
(vec (d1 :fruit-name))))
(is (= (vec (ds :fruit-name))
(vec (d2 :fruit-name))))
(is (nil? nothing))))
(deftest ds-concat-copying-nil-pun
(let [ds (-> (ds/->dataset (test-utils/mapseq-fruit-dataset))
(ds/select :all (range 10)))
d1 (ds/concat-copying nil ds)
d2 (ds/concat-copying ds nil nil)
nothing (ds/concat nil nil nil)]
(is (= (vec (ds :fruit-name))
(vec (d1 :fruit-name))))
(is (= (vec (ds :fruit-name))
(vec (d2 :fruit-name))))
(is (nil? nothing))))
(deftest ds-concat-missing
(let [ds (-> (ds/->dataset (test-utils/mapseq-fruit-dataset))
(ds/select [:fruit-name] (range 10))
(ds/update-column :fruit-name #(ds-col/set-missing % [3 6])))
d1 (ds/concat ds ds)]
(is (= (set [3 6 13 16]) (set (ds-col/missing (d1 :fruit-name)))))
(is (= [:apple :apple :apple nil :mandarin
:mandarin nil :mandarin :apple :apple
:apple :apple :apple nil :mandarin
:mandarin nil :mandarin :apple :apple ]
(vec (d1 :fruit-name))))))
(deftest concat-copying-missing
(let [ds (-> (ds/->dataset (test-utils/mapseq-fruit-dataset))
(ds/select [:fruit-name] (range 10))
(ds/update-column :fruit-name #(ds-col/set-missing % [3 6])))
d1 (ds/concat-copying ds ds)]
(is (= (set [3 6 13 16]) (set (ds-col/missing (d1 :fruit-name)))))
(is (= [:apple :apple :apple nil :mandarin
:mandarin nil :mandarin :apple :apple
:apple :apple :apple nil :mandarin
:mandarin nil :mandarin :apple :apple ]
(vec (d1 :fruit-name))))))
(deftest update-column-datatype-detect
(let [ds (-> (ds/->dataset (test-utils/mapseq-fruit-dataset))
(ds/select :all (range 10)))
updated (ds/update-column ds :width #(->> %
(map (fn [data]
(* 10 data)))))
add-or-updated (ds/add-or-update-column
ds :width (->> (ds :width)
(map (fn [data]
(* 10 data)))))
width-answer (->> (ds :width)
(mapv (fn [data]
(* 10 data))))]
(is (dfn/equals width-answer
(updated :width)))
(is (dfn/equals width-answer
(add-or-updated :width)))))
(deftest filter-fail-regression
(let [ds (ds/->dataset (test-utils/mapseq-fruit-dataset))]
(is (= [:mandarin :mandarin :mandarin :mandarin]
(vec (dtype/sub-buffer (ds :fruit-name) 4 4))))))
(deftest simple-select-test
(let [ds (ds/->dataset (test-utils/mapseq-fruit-dataset))
sel-col (ds-col/select (ds :fruit-name) (range 5 10))]
(is (= [:mandarin :mandarin :mandarin :apple :apple]
(vec sel-col)))
(is (= 5 (dtype/ecount sel-col)))))
(deftest generic-sort-numbers
(let [ds (-> (ds/->dataset (test-utils/mapseq-fruit-dataset))
(ds/sort-by-column :mass >))
ds2 (-> (ds/->dataset (test-utils/mapseq-fruit-dataset))
(ds/sort-by-column :mass))]
(is (= (vec (take 10 (ds :mass)))
(vec (take 10 (reverse (ds2 :mass))))))))
(deftest selection-map
(let [ds (ds/->dataset (test-utils/mapseq-fruit-dataset))
colname-map (->> (take 3 (ds/column-names ds))
(map (juxt identity #(keyword (str (name %) "-selected"))))
(into {}))
;;Enforce the order in the map.
ordered-ds (ds/select-columns ds colname-map)
;;ensure normal ordering rules apply, make a dataset with random column
;;name order
shuffled-ds (-> (ds/select-columns ds (shuffle (ds/column-names ds)))
(ds/select-columns colname-map))
shuffled-unordered (-> (ds/select-columns ds (reverse (ds/column-names ds)))
(ds/unordered-select colname-map :all))
colname-vals (vec (vals colname-map))]
(is (= colname-vals
(vec (ds/column-names ordered-ds))))
(is (= colname-vals
(vec (ds/column-names shuffled-ds))))
(is (not= colname-vals
(vec (ds/column-names shuffled-unordered))))))
(deftest boolean-double-arrays
(let [d (ds/->dataset [{:a true} {:a true} {:a false}])]
(is (= [1.0 1.0 0.0]
(vec (ds-col/to-double-array (d :a)))))))
(deftest remove-rows
(let [d (ds/->dataset (test-utils/mapseq-fruit-dataset))
d2 (ds/remove-rows d (range 5))]
(is (= (vec (drop 5 (d :fruit-name)))
(vec (d2 :fruit-name))))))
(deftest long-double-promotion
(is (= #{:float64}
(->> (ds/->dataset [{:a 1 :b (float 2.2)} {:a 1.2 :b 2}])
(vals)
(map dtype/get-datatype)
set))))
(deftest set-missing-range
(let [ds (-> (ds/->dataset (test-utils/mapseq-fruit-dataset))
(ds/update-column :fruit-name #(ds-col/set-missing % (range))))]
(is (= (vec (range (ds/row-count ds)))
(vec (dtype/->reader (ds-col/missing (ds :fruit-name))))))))
(deftest columnwise-concat
(let [ds (-> [{:a 1 :b 2 :c 3 :d 1} {:a 4 :b 5 :c 6 :d 2}]
(ds/->dataset)
(ds/columnwise-concat [:c :a :b]))]
(is (= (vec [:c :c :a :a :b :b])
(vec (ds :column))))
(is (= (vec [3 6 1 4 2 5])
(vec (ds :value))))
(is (= (vec [1 2 1 2 1 2])
(vec (ds :d))))))
(deftest default-names
(is (= "test/data/stocks.csv"
(ds/dataset-name (ds/->dataset "test/data/stocks.csv"))))
(is (= "test/data/stocks.xlsx"
(ds/dataset-name (ds/->dataset "test/data/stocks.xlsx")))))
(deftest unroll
(let [ds (-> (ds/->dataset [{:a 1 :b [2 3]}
{:a 2 :b [4 5]}
{:a 3 :b :a}])
(ds/unroll-column :b))]
(is (= [1 1 2 2 3]
(vec (ds :a))))
(is (= [2 3 4 5 :a]
(vec (ds :b)))))
(let [ds (-> (ds/->dataset (flatten (repeat 20
[{:a 1 :b [:a :b]}
{:a 2 :b [:c :d]}
{:a 3 :b :a}])))
(ds/unroll-column :b {:datatype :keyword}))]
(is (= (flatten (repeat 20 [1 1 2 2 3]))
(vec (ds :a))))
(is (= (flatten (repeat 20 [:a :b :c :d :a]))
(vec (ds :b)))))
(let [ds (-> (ds/->dataset [{:a 1 :b [2 3]}
{:a 2 :b [4 5]}
{:a 3 :b :a}])
(ds/unroll-column :b {:indexes? true}))]
(is (= [1 1 2 2 3]
(vec (ds :a))))
(is (= [2 3 4 5 :a]
(vec (ds :b))))
(is (= [0 1 0 1 0]
(vec (ds :indexes)))))
(let [ds (-> (ds/->dataset [{:a 1 :b (int-array [2 3])}
{:a 2 :b [4 5]}
{:a 3 :b :a}])
(ds/unroll-column :b {:indexes? :unroll-indexes}))]
(is (= [1 1 2 2 3]
(vec (ds :a))))
(is (= [2 3 4 5 :a]
(vec (ds :b))))
(is (= [0 1 0 1 0]
(vec (ds :unroll-indexes))))))
(deftest empty-bitmap
(let [ds (ds/->dataset [{:a 1 :b 1} {:a 2 :b 2}])]
(is (= 0 (ds/row-count (ds/select-rows ds (ds/missing ds))))))
(let [ds (ds/->dataset [{:a 1 :b 1} {:b 2}])]
(is (= 1 (ds/row-count (ds/select-rows ds (ds/missing ds)))))))
(deftest concat-columns-widening
(let [ds (ds/->dataset [{:a (int 1) :b (float 1)}])
ds2 (ds/->dataset [{:a (byte 2) :b 2}])
cds1 (ds/concat ds ds2)
cds2 (ds/concat ds2 ds)]
(is (= #{:int64 :float64}
(set (map dtype/get-datatype (vals cds1)))))
(is (= #{:int64 :float64}
(set (map dtype/get-datatype (vals cds2))))))
(let [ds (ds/->dataset [{:a (int 1) :b (float 1)}
{:b (float 2)}])
ds2 (ds/->dataset [{:a (byte 2) :b 2}])
cds1 (ds/concat ds ds2)
cds2 (ds/concat ds2 ds)]
(is (= #{:int64 :float64}
(set (map dtype/get-datatype (vals cds1)))))
(is (= #{:int64 :float64}
(set (map dtype/get-datatype (vals cds2)))))
(is (= [1 nil 2]
(vec (cds1 :a))))))
(deftest concat-copying-columns-widening
(let [ds (ds/->dataset [{:a (int 1) :b (float 1)}])
ds2 (ds/->dataset [{:a (byte 2) :b 2}])
cds1 (ds/concat ds ds2)
cds2 (ds/concat ds2 ds)]
(is (= #{:int64 :float64}
(set (map dtype/get-datatype (vals cds1)))))
(is (= #{:int64 :float64}
(set (map dtype/get-datatype (vals cds2))))))
(let [ds (ds/->dataset [{:a (int 1) :b (float 1)}
{:b (float 2)}])
ds2 (ds/->dataset [{:a (byte 2) :b 2}])
cds1 (ds/concat-copying ds ds2)
cds2 (ds/concat-copying ds2 ds)]
(is (= #{:int64 :float64}
(set (map dtype/get-datatype (vals cds1)))))
(is (= #{:int64 :float64}
(set (map dtype/get-datatype (vals cds2)))))
(is (= [1 nil 2]
(vec (cds1 :a))))))
(deftest concat-columns-various-datatypes
(let [stocks (ds/->dataset "test/data/stocks.csv")
ds1 (ds/select-rows stocks (range 10))
ds2 (ds/select-rows stocks (range 10 20))
res (ds/concat ds1 ds2)]
(is (= :packed-local-date
(dtype/get-datatype (res "date")))))
(let [ds (ds/->dataset [{:a "a" :b 0}])
res (ds/concat ds ds)]
(is (= :string (dtype/get-datatype (res :a))))))
(deftest concat-copying-columns-various-datatypes
(let [stocks (ds/->dataset "test/data/stocks.csv")
ds1 (ds/select-rows stocks (range 10))
ds2 (ds/select-rows stocks (range 10 20))
res (ds/concat ds1 ds2)]
(is (= :packed-local-date
(dtype/get-datatype (res "date")))))
(let [ds (ds/->dataset [{:a "a" :b 0}])
res (ds/concat-copying ds ds)]
(is (= :string (dtype/get-datatype (res :a))))))
(deftest set-datatype-lose-missing
(let [ds (-> (ds/->dataset [{:a 1 :b 1} {:b 2}])
(ds/update-column :a #(dtype/set-datatype % :int32)))]
(is (== 1 (dtype/ecount (ds-col/missing (ds :a)))))
(is (= :int32 (dtype/get-datatype (ds :a))))
(is (= [1 nil]
(vec (ds :a))))))
(deftest set-datatype-with-new-column
(let [ds (-> (ds/->dataset [{:a 1 :b 1} {:b 2}])
(ds/update-column :a #(ds-col/new-column
(ds-col/column-name %)
(dtype/emap int :int32 %)
{}
(ds-col/missing %))))]
(is (== 1 (dtype/ecount (ds-col/missing (ds :a)))))
(is (= :int32 (dtype/get-datatype (ds :a))))
(is (= [1 nil]
(vec (ds :a))))))
(deftest typed-column-map
(let [ds (-> (ds/->dataset [{:a 1.0} {:a 2.0}])
(ds/update-column
:a
#(dtype/emap (fn ^double [^double in]
(if (< in 2.0) (- in) in))
nil
%)))]
(is (= :float64 (dtype/get-datatype (ds :a))))
(is (= [-1.0 2.0]
(vec (ds :a))))))
(deftest typed-column-map-missing
(let [ds (ds/bind-> (ds/->dataset [{:a 1} {:b 2.0} {:a 2 :b 3.0}]) ds
(assoc :a (ds-col/column-map (fn [lhs rhs]
(when (and lhs rhs)
(+ (double lhs)
(double rhs))))
nil
(ds :a) (ds :b))))]
(is (= :float64 (dtype/get-datatype (ds :a))))
(is (= [false false true]
(vec (dfn/finite? (ds :a)))))
(is (= #{0 1}
(set (ds/missing (ds :a))))))
(let [ds (ds/bind-> (ds/->dataset [{:a 1} {:b 2.0} {:a 2 :b 3.0}]) ds
(assoc :a (ds-col/column-map (fn [lhs rhs]
(if (and lhs rhs)
(+ (double lhs)
(double rhs))
Double/NaN))
:float64
(ds :a) (ds :b))))]
(is (= :float64 (dtype/get-datatype (ds :a))))
(is (= [false false true]
(vec (dfn/finite? (ds :a))))))
;; Never remove these tests. Actual users are relying on this behavior to simplify
;; their processing chains.
(let [ds (ds/bind-> (ds/->dataset [{:a 1} {:b 2.0} {:a 2 :b 3.0}]) ds
(assoc :a (ds-col/column-map (fn [^double lhs ^double rhs]
(+ (double lhs)
(double rhs)))
{:missing-fn ds-col/union-missing-sets
:datatype :float64}
(ds :a) (ds :b))))]
(is (= :float64 (dtype/get-datatype (ds :a))))
(is (= [false false true]
(vec (dfn/finite? (ds :a))))))
(let [ds (-> (ds/->dataset [{:a 1} {:b 2.0} {:a 2 :b 3.0}])
(ds/column-map-m :a [:a :b]
(when (and a b)
(+ (double a) (double b)))))]
(is (= :float64 (dtype/get-datatype (ds :a))))
(is (= [false false true]
(vec (dfn/finite? (ds :a)))))
(is (= #{0 1}
(set (ds/missing (ds :a))))))
(let [ds (-> (ds/->dataset [{:a.a 1} {:b 2.0} {:a.a 2 :b 3.0}])
(ds/column-map-m :a [:a.a :b]
(when (and a-a b)
(+ (double a-a) (double b)))))]
(is (= :float64 (dtype/get-datatype (ds :a))))
(is (= [false false true]
(vec (dfn/finite? (ds :a)))))
(is (= #{0 1}
(set (ds/missing (ds :a)))))))
(deftest mean-object-column
(let [ds (-> (ds/->dataset [])
(ds/add-or-update-column :a (map (fn [arg] (* 2 arg)) (range 9))))]
(is (= :int64 (dtype/get-datatype (ds :a))))
(is (= 8.0 (dfn/mean (ds :a))))))
(deftest column-cast-test
(let [ds (ds/->dataset "test/data/stocks.csv" {:key-fn keyword})
price-dtype (dtype/get-datatype (ds :price))
_ (is (dfn/equals (ds :price)
(-> (ds/column-cast ds :price :string)
(ds/column-cast :price price-dtype)
(ds/column :price))))
date-dtype (dtype/get-datatype (ds :date))
_ (is (dfn/equals (dtype-dt/datetime->milliseconds (ds :date))
(-> (ds/column-cast ds :date :string)
(ds/column-cast :date date-dtype)
(ds/column :date)
(dtype-dt/datetime->milliseconds))))]
;;Custom cast fn
(is (= [40 36 43 28 25]
(->> (ds/column-cast ds :price [:int32 #(Math/round (double %))])
(#(ds/column % :price))
(take 5)
(vec))))
(is (nil? (->> (ds/column-cast ds :price [:int32 #(Math/round (double %))])
(#(ds/column % :price))
(meta)
(:unparsed-indexes))))
(is (not
(nil? (->> (ds/column-cast ds :price [:int32 #(Math/round (double %))] {:track-parse-errors true})
(#(ds/column % :price))
(meta)
(:unparsed-indexes)))))))
(deftest column-clone-double-read
(let [ds (ds/->dataset "test/data/stocks.csv"
{:key-fn keyword})
read-indexes (HashSet.)
new-ds (assoc ds
:price-2
(dtype/clone
(dtype/make-reader
:boolean
(ds/row-count ds)
(do
(locking read-indexes
(when (.contains read-indexes idx)
(throw (Exception. "Double read!!")))
(.add read-indexes idx))
true))))]
(is (= [true true true true true]
(vec (take 5 (new-ds :price-2)))))))
(deftest stats-with-missing
(let [DSm2 (ds/->dataset {:a [nil nil nil 1 2 nil 3
4 nil nil nil 11 nil]
:b [nil 2 2 2 2 3 nil 3 nil
3 nil 4 nil]})]
(is (> (:mean (ds-col/stats (DSm2 :a) #{:mean})) 0.0))
(is (> (:mean (ds-col/stats (DSm2 :b) #{:mean})) 0.0))))
(deftest uuids-test
(let [uuids (repeatedly 5 #(UUID/randomUUID))
ds (ds/->dataset
(->> uuids
(map-indexed (fn [idx uuid]
{:a uuid
:b uuid
:c idx}))))]
(is (= :uuid (dtype/get-datatype (ds :a))))
(is (= :uuid (dtype/get-datatype (ds :b))))
(is (= (vec uuids)
(vec (ds :a))))
(let [test-fname (str (UUID/randomUUID) ".csv")
_ (ds/write! ds test-fname)
loaded-ds (try (ds/->dataset test-fname
{:key-fn keyword})
(finally
(.delete (File. test-fname))))]
(is (= (vec (ds :a))
(vec (loaded-ds :a)))))))
(deftest filter-empty
(let [ds (ds/->dataset {:V1 (take 9 (cycle [1 2]))
:V2 (range 1 10)
:V3 (take 9 (cycle [0.5 1.0 1.5]))
:V4 (take 9 (cycle [\A \B \C]))})
result (ds/filter ds (constantly false))]
(is (= 0 (ds/row-count result)))
(is (= (ds/column-count ds)
(ds/column-count result)))
(is (string? (.toString ^Object result)))))
(deftest nil-mapseq-values
(let [ds (ds/->dataset [{:a nil} {:a 1} {}])]
(is (= #{0 2}
(set (ds/missing ds))))
(is (= [nil 1 nil]
(vec (dtype/->reader (ds :a)))))))
(deftest select-row
(let [ds (ds/->dataset {:V1 (take 9 (cycle [1 2]))
:V2 (range 1 10)
:V3 (take 9 (cycle [0.5 1.0 1.5]))
:V4 (take 9 (cycle [\A \B \C]))})]
(is (= [2 6 1.5 \C]
(-> (ds/select-rows ds 5)
(ds/value-reader)
(first)
(vec))))
(is (= [2 6 1.5 \C]
(-> (ds/select-rows ds [5])
(ds/value-reader)
(first)
(vec))))
))
(deftest select-by-index
(let [ds (ds/->dataset {:V1 (take 9 (cycle [1 2]))
:V2 (range 1 10)
:V3 (take 9 (cycle [0.5 1.0 1.5]))
:V4 (take 9 (cycle [\A \B \C]))})]
(is (= [1 \A]
(-> (ds/select-by-index ds [0 3] [0 8])
(ds/value-reader)
(first)
(vec))
(-> (ds/select-by-index ds [-4 -1] [-9 -1])
(ds/value-reader)
(first)
(vec))))
(is (= [\C]
(-> (ds/select-by-index ds 3 8)
(ds/value-reader)
(first)
(vec))
(-> (ds/select-by-index ds -1 -1)
(ds/value-reader)
(first)
(vec))
(-> (ds/select-by-index ds [3] [8])
(ds/value-reader)
(first)
(vec))
(-> (ds/select-by-index ds [-1] [-1])
(ds/value-reader)
(first)
(vec))))
(is (= [\A \B \C \A \B \C \A \B \C]
(vec ((ds/select-columns-by-index ds 3) :V4))
(vec ((ds/select-columns-by-index ds [3]) :V4))
(vec ((ds/select-columns-by-index ds -1) :V4))
(vec ((ds/select-columns-by-index ds [-1]) :V4))))
(is (= [2 6 1.5 \C]
(-> (ds/select-rows ds -4)
(ds/value-reader)
(first)
(vec))
(-> (ds/select-rows ds [-4])
(ds/value-reader)
(first)
(vec))))))
(deftest columns-named-false
(let [DS (ds/->dataset [{false 1} {false 2}])]
(is (= [1 2]
(vec (DS false)))))
(let [DS (ds/->dataset [{:a 1} {:a 2}])]
(is (= [1 2]
(-> (ds/rename-columns DS {:a false})
(ds/column false)
vec))))
(let [DS (ds/->dataset [{:a 1} {:a 2}])]
(is (= [1 2]
(-> (ds/select-columns DS {:a false})
(ds/column false)
vec)))))
(deftest positional-column-rename
(let [DS (ds/->dataset
(-> "id,a,ab\n0,aa,bb\n1,cc,dd"
.getBytes
ByteArrayInputStream.)
{:file-type :csv})
new-cols-incorrect [:a1 :a2]
new-cols-correct [:id :a1 :a2]]
(is (= new-cols-correct
(-> DS
(ds/rename-columns new-cols-correct)
ds/column-names)))
(is (thrown? Throwable
(ds/rename-columns DS new-cols-incorrect)))
(is (thrown? Throwable
(ds/rename-columns DS (set new-cols-correct))))))
(deftest column-sequences-use-nil-missing
(let [ds (ds/->dataset [{:a 1} {:b 2}])]
(is (= [1 nil] (vec (ds :a))))
(is (= [nil 2] (vec (ds :b))))))
(deftest ->dataset-nvs-parse-test
(let [ds (ds/->dataset {:a [1 2 3]
:b [4 5 6]})]
(is (= [1 2 3]
(vec (ds :a))))
(is (= [4 5 6]
(vec (ds :b))))))
(deftest apply-works-with-columns-and-vectors
(let [ds (ds/->dataset {:a [1 2 3]
:b [4 5 6]})
a-col (ds :a)]
(is (= 2 (apply a-col [1])))
(is (= 2 (apply (dtype/->reader a-col) [1])))))
(deftest vector-of-test
(let [ds (ds/->dataset {:a (vector-of :float 1 2 3 4)
:b (vector-of :short 1 2 3 4)})]
(is (= #{:float32 :int16}
(set (map dtype/get-datatype (vals ds)))))
(let [cds (dtype/clone ds)]
(is (every? #(not (nil? %))
(map dtype/->array (vals cds)))))))
(deftest serialize-datetime
(let [ds (ds/->dataset "test/data/stocks.csv")
_ (ds/write! ds "test.tsv.gz")
save-ds (ds/->dataset "test.tsv.gz")
fdata (java.io.File. "test.tsv.gz")]
(is (= (ds/row-count ds) (ds/row-count save-ds)))
(is (= (ds/column-count ds) (ds/column-count save-ds)))
(is (= (set (map dtype/get-datatype ds))
(set (map dtype/get-datatype save-ds))))
(when (.exists fdata)
(.delete fdata))))
(deftest custom-packed-local-date-parser
(let [ds (ds/->dataset "test/data/stocks.csv"
{:parser-fn {"date" [:packed-local-date
"MMM d yyyy"]}})]
(is (= 560 (ds/row-count ds)))))
(deftest stocks-to-from-nippy
(let [fname (format "%s.nippy" (java.util.UUID/randomUUID))]
(try
(let [stocks (ds/->dataset "test/data/stocks.csv")
_ (tech-io/put-nippy! fname stocks)
nip-stocks (tech-io/get-nippy fname)]
(is (= (ds/row-count stocks) (ds/row-count nip-stocks)))
(is (= (ds/column-count stocks) (ds/column-count nip-stocks)))
(is (= (vec (stocks "date"))
(vec (nip-stocks "date"))))
(is (= (mapv meta (vals stocks))
(mapv meta (vals nip-stocks)))))
(finally
(let [file (java.io.File. fname)]
(when (.exists file)
(.delete file)))))))
(deftest empty-dataset-hasheq
(let [ds (ds/->dataset [])]
(is (== 0 (.hashCode ds)))))
(deftest dataset-equality
(let [ds0 (ds/->dataset {:foo "foo" :bar "bar"}) ;;equal to 3
ds1 (ds/->dataset {:foo "foo" :bar "bar" :baz "baz"})
ds2 (ds/->dataset {:foo "foo" :bar "beer"})
ds3 (ds/->dataset {:foo "foo" :bar "bar"}) ;;equal to 0
datasets [ds0 ds1 ds2 ds3]
hashmaps (mapv (fn [ds] (into {} ds)) datasets)
mapify #(reduce (fn [^java.util.Map m [k v]]
(doto m (.put k v)))
(java.util.HashMap.) %)
mutmaps (mapv mapify datasets)
xs (range (count datasets))
dsresults (->> (for [i xs
j xs]
[i j (= (nth datasets i) (nth datasets j))])
(filter last)
(map (juxt first second))
set)
hashresults (->> (for [i xs
j xs]
[i j (= (nth datasets i) (nth hashmaps j))])
(filter last)
(map (juxt first second))
set)
mapresults (->> (for [i xs
j xs]
[i j (= (nth datasets i) (nth mutmaps j))])
(filter last)
(map (juxt first second))
set)
expected #{[0 0] [1 1] [2 2] [3 3] [3 0] [0 3]}]
(is (= dsresults expected)
"Datasets should obey map equivalence when compared to datasets.")
(is (= hashresults expected)
"Datasets should obey map equivalence when compared to IPersistentMap.")
(is (= mapresults expected)
"Datasets should obey map equivalence when compared to java.util.Map
like HashMap.")))
(deftest columns-are-persistent-vectors
(let [ds (-> (ds/->dataset "test/data/stocks.csv")
(ds/head))
sym-vec (vec (ds "symbol"))]
;;We use a clever impl of APersistentVector for the columns
(is (= sym-vec (ds "symbol")))))
(deftest replace-missing-test
(let [ds (ds/->dataset {:a [nil nil nil 1.0 2 nil nil nil
nil nil 4 nil 11 nil nil]
:b [2 2 2 nil nil nil nil nil
nil 13 nil 3 4 5 5]})]
(is (= [nil nil nil 1.0 2.0 2.0 2.0 2.0 2.0 2.0 4.0 4.0 11.0 11.0 11.0]
(vec ((ds/replace-missing ds :down) :a))))
(is (= [555.0 555.0 555.0 1.0 2.0 2.0 2.0 2.0 2.0 2.0 4.0 4.0 11.0 11.0 11.0]
(vec ((ds/replace-missing ds :all :down 555) :a))))
(is (= [1.0 1.0 1.0 1.0 2.0 2.0 2.0 2.0 2.0 2.0 4.0 4.0 11.0 11.0 11.0]
(vec ((ds/replace-missing ds :all :downup) :a))))
(is (= [1.0 1.0 1.0 1.0 2.0 4.0 4.0 4.0 4.0 4.0 4.0 11.0 11.0 nil nil]
(vec ((ds/replace-missing ds :up) :a))))
(is (= [1.0 1.0 1.0 1.0 2.0 4.0 4.0 4.0 4.0 4.0 4.0 11.0 11.0 11.0 11.0]
(vec ((ds/replace-missing ds :updown) :a))))
(is (= [1.0 1.0 1.0 1.0 2.0 4.0 4.0 4.0 4.0 4.0 4.0 11.0 11.0 555.0 555.0]
(vec ((ds/replace-missing ds :all :up 555) :a))))
(is (= [1.0 1.0 1.0 1.0 2.0 2.0 2.0 2.0 4.0 4.0 4.0 4.0 11.0 11.0 11.0]
(vec ((ds/replace-missing ds :mid) :a))))
(is (= [5.0 5.0 5.0 1.0 2.0 5.0 5.0 5.0 5.0 5.0 4.0 5.0 11.0 5.0 5.0]
(vec ((ds/replace-missing ds :all :value 5.0) :a))))))
(deftest replace-missing-string-table
(is (= ["one" "two" "three"]
(-> (ds/->dataset {:a ["one" nil "three"]})
(ds/replace-missing-value "two")
(ds/column :a)))))
(deftest replace-missing-all-values-missing
(let [empty-col (ds/->dataset {:a [nil nil]})]
(is (= 2 (-> empty-col
(ds/replace-missing [:a] :value dfn/mean)
(ds/missing)
(dtype/ecount))))))
(deftest replace-missing-selector-fn
(let [ds (ds/->dataset {:a [nil nil 2 4]
:b [nil nil 4 6]
:c [nil nil "A" "B"]})
ds-replaced (-> ds
(ds/replace-missing cf/numeric :value dfn/mean)
(ds/replace-missing cf/categorical :value "C"))]
(is (= [3 3 2 4] (vec (ds-replaced :a))))
(is (= [5 5 4 6] (vec (ds-replaced :b))))
(is (= ["C" "C" "A" "B"] (vec (ds-replaced :c))))))
(deftest replace-missing-ldt
(let [dtds (ds/->dataset {:dt [(java.time.LocalDateTime/of 2020 1 1 1 1 1)
nil nil nil
(java.time.LocalDateTime/of 2020 10 1 1 1 1)]})]
(is (= (seq ((ds/replace-missing dtds :lerp) :dt))
[(java.time.LocalDateTime/of 2020 1 1 1 1 1)
(java.time.LocalDateTime/of 2020 3 9 13 1 1)
(java.time.LocalDateTime/of 2020 5 17 1 1 1)
(java.time.LocalDateTime/of 2020 7 24 13 1 1)
(java.time.LocalDateTime/of 2020 10 1 1 1 1)]))))
(deftest replace-missing-abb
(let [dtds (ds/->dataset {:a [nil nil nil 1.0 2 nil nil nil
nil nil 4 nil 11 nil nil]
:b [2 2 2 nil nil nil nil nil
nil 13 nil 3 4 5 5]})
fds (ds/replace-missing dtds :abb)]
(is (= 0 (dtype/ecount (ds/missing fds))))))
(deftest dataset-column-nippy
(let [ds (ds/->dataset {:a [1 2]
:datasets [(ds/->dataset [{:a 1}])
(ds/->dataset [{:b 2}])]})
nippy-data (nippy/freeze ds)
thawed-ds (nippy/thaw nippy-data)]
(is (= (map meta (vals ds))
(map meta (vals thawed-ds))))
(is (= ds thawed-ds))))
(deftest unique-by-nil-regression
(-> (ds/->dataset [])
(ds/add-column (ds-col/new-column :abc [nil nil]))
(ds/unique-by-column :abc)))
(deftest missing-values-and-tensors
(let [ds (ds/->dataset {:a [1 nil 2]
:b [1.0 nil 2.0]
:c [5 nil 6]})]
(is (= 3
(->> (ds-tens/dataset->tensor ds :float64)
(dtype/->reader)
(filter #(Double/isNaN %))
(count))))))
(deftest bind->-test
(is (= 42
(ds/bind-> 41 x inc)))
(is (= 82
(ds/bind-> 41 x
(+ x))))
(is (= 31
(ds/bind-> 41 x
(- 10))))
(is (dfn/equals
[39.81 3.709 7.418]
(ds/bind-> (ds/->dataset "test/data/stocks.csv") ds
(assoc :logprice2 (dfn/log1p (ds "price")))
(assoc :logp3 (dfn/* 2 (ds :logprice2)))
(ds/select-columns ["price" :logprice2 :logp3])
(ds-tens/dataset->tensor)
(first)))))
(deftest parse-nils
(let [ds-a (ds/->dataset {:a [nil nil]})
ds-b (ds/->dataset [{:a nil} {:a nil}])]
(is (= (ds/row-count ds-a)
(ds/row-count ds-b)))
(is (= 2 (dtype/ecount (ds/missing ds-a)))
(= 2 (dtype/ecount (ds/missing ds-b))))))
(deftest parser-fn-failing-on-csv-entries
(let [stocks (ds/->dataset "test/data/stocks.csv"
{:key-fn keyword
:parser-fn {:date [:string #(subs % 0 5)]}})]
(is (= "Jan 1"
(first (stocks :date))))))
(deftest one-hot-failing
(let [str-ds (-> (ds/->dataset [{"a" 1 "b" "AA"}
{"a" 2 "b" "AA"}
{"a" 3 "b" "BB"}
{"a" 4 "b" "BB"}])
(ds/categorical->one-hot ["b"]))
kwd-ds (-> (ds/->dataset [{:a 1 :b "AA"}
{:a 2 :b "AA"}
{:a 3 :b "BB"}
{:a 4 :b "BB"}])
(ds/categorical->one-hot [:b]))]
(is (= #{"a" "b-AA" "b-BB"} (set (ds/column-names str-ds))))
(is (= #{:a :b-AA :b-BB} (set (ds/column-names kwd-ds))))))
(deftest select-memory
(let [original (ds/->dataset [{:a 0} {:a 1} {:a 2} {:a 3} {:a 4}])
new-ds (ds/select-rows original (range 4))]
(is (= (vec (range 4)) (vec (new-ds :a))))
(is (thrown? Throwable (vec (:a (ds/select-rows new-ds 4)))))))
(deftest custom-sort-by-column
(let [DS (-> (tech.v3.dataset/->dataset {:a [5 4 3 2 8 7 6]})
(ds/sort-by-column :a compare))]
(is (= (vec (sort [5 4 3 2 8 7 6]))
(vec (DS :a))))))
(deftest set-missing-new-column
(let [col (ds-col/new-column "abc" (repeat 10 1) nil [1 2 3])]
(is (= [1 nil nil nil 1 1 1 1 1 1] (vec col)))))
(deftest join-on-date
(let [A (ds/->dataset {:a [(java.time.LocalDate/of 2001 01 01)]
:b [11]})
B (ds/->dataset {:a [(java.time.LocalDate/of 2001 01 01)]
:c [22]})]
(ds-join/left-join :a A B)))
(deftest sample-repeatable-seed
(let [ds (ds/->dataset "test/data/stocks.csv")]
(is (= (vec (get (ds/sample ds 5 {:seed 20}) "symbol"))
(vec (get (ds/sample ds 5 {:seed 20}) "symbol"))))))
(deftest sample-arities
(let [ds (ds/->dataset "test/data/stocks.csv")]
(is (= (dtype/ecount (get (ds/sample ds) "symbol"))
(dtype/ecount (get (ds/sample ds 5) "symbol"))))))
(deftest string-table-addall
(let [data ["one" "two" "three"]
strt (str-table/make-string-table 0)]
(.addAll strt data)
(is (= (vec strt)
data))))
(deftest concat-copying-object-fail
(let [ds1 (ds/->dataset {:a [["A" 1]["B" 1]]})
ds2 (ds/->dataset {:a [["A" 2]["B" 2]]})
dsc (ds/concat-copying ds1 ds2)]
(is (= [["A" 1] ["B" 1] ["A" 2] ["B" 2]]
(vec (dsc :a))))))
(deftest concat-inplace-desc-stats
(let [ds (ds/->dataset [{"A" 1 "B" 2} {"A" 2 "B" 3}])]
(is (dfn/equals [1.5 2.5]
(-> (ds/concat ds ds)
(ds/descriptive-stats)
(:mean))))))
(deftest replace-missing-regression-181
[]
(let [ds (ds/->dataset {:a [nil nil 2 2]})]
(is (= [2 2 2 2]
(-> (ds/replace-missing ds :all :value dfn/mean)
:a
vec)))))
(deftest replace-missing-regression-184
(let [date-dtype (java.time.LocalDate/parse "2020-12-11")
ds (ds/->dataset {:a [nil 2 nil nil 4 nil 6 nil]
:b [3. nil nil 6. nil 9. nil 12.]
:c [nil "A" nil nil "B" nil "C" nil]
:d ["A" nil nil "B" nil "C" nil "D"]
:e (dtype-dt/plus-temporal-amount
(dtype/make-container
:local-date
[nil date-dtype nil nil date-dtype nil date-dtype nil])
(dfn/* 10 (range 8))
:days)})
ds' (ds/replace-missing ds :midpoint)]
(is (= [2.0 2.0 3.0 3.0 4.0 5.0 6.0 6.0] (vec (ds' :a))))
(is (= [3.0 4.5 4.5 6.0 7.5 9.0 10.5 12.0] (vec (ds' :b))))
(is (= [nil "A" "A" "A" "B" "B" "C" "C"] (vec (ds' :c))))
(is (= ["A" "A" "A" "B" "B" "C" "C" "D"] (vec (ds' :d))))
(is (= ["2020-12-21" "2020-12-21" "2021-01-05" "2021-01-05" "2021-01-20"
"2021-01-30" "2021-02-09" "2021-02-09"]
(mapv str (:e ds'))))
(let [ds (ds/->dataset {:a [nil 2 nil nil nil 4 nil 6 nil]
:b [3. nil nil nil 6. nil 9. nil 12.]
:c [nil "A" nil nil "B" nil nil "C" nil]
:d ["A" nil nil "B" nil nil "C" nil "D"]
:e (dtype-dt/plus-temporal-amount
(dtype/make-container
:local-date
[nil date-dtype nil nil nil date-dtype nil
date-dtype nil])
(dfn/* 10 (range 9))
:days)})
ds' (ds/replace-missing ds :nearest)
ds'' (ds/replace-missing ds :mid)]
(is (= [2 2 2 2 4 4 4 6 6] (vec (ds' :a))))
(is (= [2 2 2 2 4 4 4 6 6] (vec (ds'' :a))))
(is (= [3.0 3.0 3.0 6.0 6.0 6.0 9.0 9.0 12.0] (vec (ds' :b)))))))
(deftest column-to-double-regression-187
(let [col1 (ds-col/new-column :col1 [1 2 3])]
(is (dfn/equals [1 2 3]
(ds-col/to-double-array col1))))
(let [col1 (ds-col/new-column :col1 (int-array [1 2 3]))]
(is (dfn/equals (ds-col/to-double-array col1) [1 2 3]))))
(deftest boolean-csv-column-names
(try
(ds/write!
(ds/->dataset {false [1]}) "test/out.csv")
(is (= ["false"] (-> (ds/->dataset "test/out.csv")
(ds/column-names))))
(finally (.delete (java.io.File. "test/out.csv")))))
(deftest to-double-array-returns-double-array
(let [data (ds/->dataset [{:a 1.0 :b 2.0}
{:a 3.0}])]
(is (instance? (Class/forName "[D") (ds-col/to-double-array (data :a))))
(is (every? identity (dfn/eq [2.0 Double/NaN]
(ds-col/to-double-array (data :b)))))))
(deftest write-with-nil-name
(let [data (-> (ds/->dataset [{:a 1.0 :b 2.0}
{:a 3.0}])
(vary-meta assoc :name nil))]
(try
(ds/write! data "test/data/nil-name.csv")
(finally
(.delete (java.io.File. "test/data/nil-name.csv"))))))
(deftest create-dataset-scalars
(let [data (ds/->dataset {:a [1 2 3 4]
:b "hey"
:c (range)
:d 1})]
(is (= ["hey" "hey" "hey" "hey"]
(vec (data :b))))
(is (= [:int64 :string :int64 :int64]
(mapv (comp :datatype meta) (vals data))))))
(deftest create-dataset-seq
(let [data (ds/->dataset {:calendar-year '(2020 2021 2020 2021)
:setting '("A" "A" "B" "B")
:bigdata (cycle [1 2 3 4])})]
(is (= 4 (ds/row-count data)))))
(deftest empty-dataset-on-select-nothing
(let [dataset (ds/->dataset "test/data/stocks.csv")]
(is (= 0 (ds/row-count (ds/select-columns dataset nil))))
(is (= 0 (ds/row-count (ds/select-rows dataset nil))))
(is (= (ds/column-count dataset)
(ds/column-count (ds/select-rows dataset nil))))))
(deftest column-cast-test-cce-fail
(let [ds (ds/->dataset {:col1 [1 2 3 "NaN"]} {:parser-fn :string})]
(is (= [1.0 2.0 3.0]
(->> (ds/column-cast ds :col1 [:float64 :relaxed?])
(#(ds/column % :col1))
(take 3)
(vec))))))
(deftest desc-stats-ok
(let [ds (ds/->dataset [])]
(is '()
(ds/brief ds))))
(deftest desc-stats-also-ok
(let [ds (ds/->dataset {"col1" [] "col2" [1]})]
(is '()
(ds/brief ds))))
(deftest desc-stats-oob
(let [ds (ds/->dataset {"col1" []})]
(is '()
(ds/brief ds))))
(deftest column-map-regression-1
(let [testds (ds/->dataset [{:a 1.0 :b 2.0} {:a 3.0 :b 5.0} {:a 4.0 :b nil}])]
;;result scanned for both datatype and missing set
(is (= (vec [3.0 6.0 nil])
(:b2 (ds/column-map testds :b2 #(when % (inc %)) [:b]))))
;;result scanned for missing set only. Result used in-place.
(is (= (vec [3.0 6.0 nil])
(:b2 (ds/column-map testds :b2 #(when % (inc %))
{:datatype :float64} [:b]))))
;;Nothing scanned at all.
(is (= (vec [3.0 6.0 nil])
(:b2 (ds/column-map testds :b2 #(inc %)
{:datatype :float64
:missing-fn ds-col/union-missing-sets} [:b]))))
;;Missing used to scanning causes NPE at inc.
;;Now data is casted to Double/NaN
#_(is (thrown? Throwable
(ds/column-map testds :b2 #(inc %)
{:datatype :float64}
[:b])))))
(deftest remove-columns-issue-242
(is (= [:a "c" :d :e]
(vec (-> (tech.v3.dataset/->dataset {:a [1] :b [2] "c" [3]
:d [4] :e [5]})
(tech.v3.dataset/drop-columns [:b])
(ds/column-names))))))
(deftest column-cast-packed-date
(let [x (ds/->dataset [{:a 0 :b "2020-03-05"} {:a 1 :b nil}])
y (ds/column-cast x :b :packed-local-date)]
(is (instance? java.time.LocalDate ((y :b) 0)))
(is (nil? ((y :b) 1)))))
(deftest dataset->data-regression-249
(let [src-ds (ds/concat (ds/->dataset {:x ["1"]
:y ["2" "3"]})
(ds/->dataset {:x ["4"]
:y ["5"]}))
ds-data (ds/dataset->data src-ds)
rehydrated (ds/data->dataset ds-data)]
(is (= (vec (src-ds :x))
(vec (rehydrated :x))))
(is (= (ds/missing src-ds)
(ds/missing rehydrated)))))
(deftest dataset->data-regression-250
(let [src-ds (ds/->dataset {:x [1]
:y [[3 4]]})
new-ds (-> (nippy/freeze src-ds)
(nippy/thaw))]
(is (= (vec (src-ds :y))
(vec (new-ds :y))))))
(deftest freeze-thaw-column
(let [{:keys [date price symbol]}
(ds/->dataset "test/data/stocks.csv" {:key-fn keyword})
date-data (nippy/freeze date)
symbol-data (nippy/freeze symbol)
ndate (nippy/thaw date-data)
nsym (nippy/thaw symbol-data)
nds (ds/new-dataset [ndate nsym])]
(is (= (vec date)
(nds :date)))
(is (= (vec symbol)
(nds :symbol)))))
(deftest negative-index-on-columns-gets-last
(let [ds (ds/->dataset "test/data/stocks.csv")
last-idx (dec (ds/row-count ds))
symbol (ds "symbol")]
(is (= (symbol last-idx) (symbol -1)))))
;; This was a bad idea. Concatenating, just the same as concatenating sequences of maps
;; should not require the same columns across all datasets. That creates extremely
;; error prone code.
(deftest concat-doesnt-require-same-columns
(let [ds (ds/concat-copying
(ds/->dataset {:a (range 10)
:c (repeat 10 (dtype-dt/local-date))})
(ds/->dataset {:b (range 10)}))]
(is (= 20 (ds/row-count ds)))
(is (= 10 (dtype/ecount (ds/missing (ds :a)))))
(is (= 10 (dtype/ecount (ds/missing (ds :b)))))))
;;It is way too confusing for users to have to navigate pack/unpack code in any
;;normal situation.
(deftest filter-sort-columns-uses-unpacked-datatypes
(let [stocks (ds/->dataset "test/data/stocks.csv")
test-val (second (stocks "date"))]
(is (not= 0 (ds/row-count (ds/filter-column stocks "date" #(= % test-val)))))
;;make sure sorting still works
(is (= (ds/row-count stocks)
(ds/row-count (ds/sort-by-column stocks "date"))))))
(deftest binary-ops-on-integer-missing-results-in-nan
(let [src-ds (ds/->dataset {:a [1 2 nil 4]})
dst-ds (assoc src-ds :b (dfn/+ (:a src-ds ) 1))]
(is (= 1 (dtype/ecount (ds/missing (dst-ds :b)))))
(is (= [2.0 3.0 nil 5.0]
(vec (dst-ds :b))))))
(deftest sort-works-with-nan
(let [ds (ds/->dataset {:a [1 nil 2 nil nil 4]} )
ds-first (ds/sort-by-column ds :a nil {:nan-strategy :first})
ds-last (ds/sort-by-column ds :a nil {:nan-strategy :last})]
(is (= [nil nil nil 1 2 4] (vec (ds-first :a))))
(is (= [1 2 4 nil nil nil] (vec (ds-last :a))))
(is (thrown? Exception (ds/sort-by-column ds :a nil {:nan-strategy :exception})))))
(deftest concat-packed-date-with-date-results-in-local-date-or-packed-local-date
(let [ds (ds/->dataset (repeat 10 {:a (dtype-dt/local-date)})
{:parser-fn {:a :local-date}})
ds-packed (ds/->dataset {:a (repeat 10 (dtype-dt/local-date))}
{:parser-fn {:a :packed-local-date}})
res-inp (ds/concat-inplace ds ds-packed)
res-cp (ds/concat-copying ds ds-packed)]
(is (#{:local-date :packed-local-date} (dtype/elemwise-datatype (res-inp :a))))
(is (#{:local-date :packed-local-date} (dtype/elemwise-datatype (res-cp :a))))))
(deftest row-map-test
(let [ds (ds/->dataset "test/data/stocks.csv")]
(is (thrown? Exception (ds/row-map ds #(hash-map :price2 (* (% :price) (% :price))))))
(is (dfn/equals (dfn/sq (ds "price"))
(-> (ds/row-map ds #(hash-map :price2 (* (% "price") (% "price"))))
(ds/column :price2))))))
(deftest extend-packed-date-with-empty
(let [ds-a (ds/->dataset {:b (range 20)})
ds (ds/->dataset (repeat 10 {:a (dtype-dt/local-date)})
{:parser-fn {:a :packed-local-date}})
fin-ds (merge ds-a ds)]
(is (not (nil? (.toString (fin-ds :a)))))))
(deftest desc-stats-date-col
(let [src-ds (tech.v3.dataset/->dataset
{:date-time-with-nil ["Jul 1, 2011" nil]}
{:parser-fn :local-date})
{:keys [min mean max]} (tech.v3.dataset/descriptive-stats src-ds)
val ((src-ds :date-time-with-nil) 0)]
(is (every? #(= % val) [(min 0) (mean 0) (max 0)]))))
(deftest nth-col-neg-indexes
(let [data ((ds/->dataset {:a (range 10)}) :a)]
(is (thrown? Throwable (nth data 10)))
(is (= :a (nth data 10 :a)))
(is (thrown? Throwable (nth data -11)))
(is (= :a (nth data -11 :a)))
(is (= 0 (nth data -10 :a)))))
(deftest column-rolling-regression
(is (every? identity (dfn/eq
[##NaN 2.0 2.5 3.5]
(rolling/fixed-rolling-window
((ds/->dataset {:a [##NaN 2 3 4]}) :a)
2 dfn/mean))))
(is (every? identity (dfn/eq
[##NaN 2.0 2.5 3.5]
(rolling/fixed-rolling-window
(ds-col/new-column [nil 2 3 4])
2 dfn/mean))))
(is (every? identity (dfn/eq
[##NaN 2.0 2.5 3.5]
(rolling/fixed-rolling-window
(ds-col/new-column (double-array [##NaN 2 3 4]))
2 dfn/mean)))))
(deftest concat-nil-is-nil
(is (= nil (apply ds/concat nil)))
(is (= nil (apply ds/concat-copying nil)))
(is (= nil (apply ds/concat-inplace nil))))
(deftest replace-missing-whacks-metadata-274
(let [ds (-> (ds/->dataset {:a [0 nil 1 nil 2]})
(ds/update-column :a (fn [a-col]
(with-meta a-col {:a :b}))))
dsm (ds/replace-missing-value ds [:a] 10)
dsmm (ds/replace-missing ds [:a] :down)]
(is (= {:a :b} (select-keys (meta (ds :a)) [:a])))
(is (= {:a :b} (select-keys (meta (dsm :a)) [:a])))
(is (= {:a :b} (select-keys (meta (dsmm :a)) [:a])))))
(deftest induction-test
(let [induct-ds (-> (ds/->dataset {:a [0 1 2 3] :b [1 2 3 4]})
(ds/induction (fn [ds]
{:sum-of-previous-row (dfn/sum (ds/rowvec-at ds -1))
:sum-a (dfn/sum (ds :a))
:sum-b (dfn/sum (ds :b))})))]
(is (= [0.0 1.0 3.0 6.0]
(induct-ds :sum-b)))
(is (= [0.0 0.0 1.0 3.0]
(induct-ds :sum-a)))
(is (= [0.0 1.0 5.0 14.0]
(induct-ds :sum-of-previous-row)))))
(deftest row-mapcat
(let [ds (ds/->dataset {:rid (range 10)
:data (repeatedly 10 #(rand-int 3))})
mds (ds/row-mapcat ds (fn [row]
(for [idx (range (row :data))]
{:idx idx})))
n-rows (long (dfn/sum (ds :data)))]
(is (= n-rows (ds/row-count mds)))))
(deftest array-of-structs-all-dtypes
(let [sdef (dt-struct/define-datatype! :alldtypes
[{:name :i8 :datatype :int8}
{:name :u8 :datatype :uint8}
{:name :i16 :datatype :int16}
{:name :u16 :datatype :uint16}
{:name :i32 :datatype :int32}
{:name :u32 :datatype :uint32}
{:name :i64 :datatype :int64}
{:name :u64 :datatype :uint64}
{:name :f32 :datatype :float32}
{:name :f64 :datatype :float64}])
ary (dt-struct/new-array-of-structs :alldtypes 10)
cmap (dt-struct/column-map ary)
_ (doseq [col (vals cmap)]
(dtype/copy! (range 10) col))
ds (ds/->dataset cmap)
props (sdef :data-layout)]
(doseq [prop props]
(let [col (ds/column ds (:name prop))
cmeta (meta col)]
(is (= (:datatype cmeta) (:datatype prop)) (str prop))
(is (= (vec (cmap (:name prop)))
(vec col))
(str prop))))))
(deftest replace-missing-packed-local-date
(let [date (dtype-dt/local-date)
ds (-> (ds/->dataset {:a [date nil nil date nil]})
(ds/replace-missing :all :value date))]
(is (== 0 (dtype/ecount (ds/missing ds))))
(is (= (vec (repeat 5 date))
(vec (ds :a))))))
(deftest variable-rolling-window-doubles
(let [ds (ds/->dataset {:a (double-array (range 100))
:b (range 100)})
small-win (ds/head (ds-roll/rolling ds {:window-type :variable
:window-size 10
:column-name :a}
{:b-mean (ds-roll/mean :b)}))
big-win (ds/head (ds-roll/rolling ds {:window-type :variable
:window-size 20
:column-name :a}
{:b-mean (ds-roll/mean :b)}))]
(is (dfn/equals [4.5 5.5 6.5 7.5 8.5] (vec (small-win :b-mean))))
(is (dfn/equals [0.0 0.5 1.0 1.5 2.0]
(-> (ds-roll/rolling ds {:window-type :variable
:window-size 10
:column-name :a
:relative-window-position :left}
{:b-mean (ds-roll/mean :b)})
(ds/head)
(ds/column :b-mean)
(vec))))
(is (dfn/equals [2.0 2.5 3.0 3.5 4.0]
(-> (ds-roll/rolling ds {:window-type :variable
:window-size 10
:column-name :a
:relative-window-position :center}
{:b-mean (ds-roll/mean :b)})
(ds/head)
(ds/column :b-mean)
(vec))))
(is (dfn/equals [9.5 10.5 11.5 12.5 13.5] (vec (big-win :b-mean))))))
(deftest rolling-multi-column-reducer
(let [ds (ds/->dataset {:a (range 100)
:b (range 100)})
fin-ds (ds-roll/rolling ds 10 {:c {:column-name [:a :b]
:reducer (fn [a b]
(+ (dfn/sum a) (dfn/sum b)))
:datatype :float64}})]
(is (= :float64 (dtype/elemwise-datatype (fin-ds :c))))
(is (= [20.0 30.0 42.0 56.0 72.0]
(vec (take 5 (fin-ds :c)))))))
(deftest unroll-single-column
(is (= (vec (range 9))
(-> (ds/->dataset {:a [[0 1 2 3] [4 5] [6 7 8]]})
(ds/unroll-column :a)
(ds/column :a)
(vec)))))
(deftest construct-with-hashmap
(let [hm (doto (java.util.HashMap.)
(.put :a 1)
(.put :b 2))
ds (ds/->dataset [hm hm hm])]
(is (= (vector 1 1 1)
(vec (ds :a))))))
(deftest double-nan-missing
(let [ds (ds/->dataset {:a [0.0 Double/NaN 2.0]
:b [0 nil 2]
:c [:a nil :b]})]
(is (= [2.0]
(-> (ds/filter-column ds :a identity)
(ds/column :a)
(vec))))
(is (= [2.0]
(-> (ds/filter-column ds :b identity)
(ds/column :a)
(vec))))
(is (= [0.0 2.0]
(-> ds
(ds/filter-column :c identity)
(ds/column :a)
(vec))))))
(deftest issue-315
(is (not (nil? (ds/concat (ds/drop-rows (ds/->dataset [{:a 1 :b 2}]) [0])
(ds/drop-rows (ds/->dataset [{:a 1 :c3 2}]) [0]))))))
(deftest issue-259
(let [ds (ds/->dataset [{"a o" 1 "b o" 2} {"a o" 5 "b o" 3}]
{:key-fn #(keyword (clojure.string/replace % " " "-"))})]
(is (= #{:b-o :a-o} (set (map (comp :name meta) (vals ds))))))
(let [ds (ds/->dataset {"a o" [1 5] "b o" [2 3]}
{:key-fn #(keyword (clojure.string/replace % " " "-"))})]
(is (= #{:b-o :a-o} (set (map (comp :name meta) (vals ds))))))
(let [ds (ds/->dataset [{"Foo" 1 , "Bar" 2}]
{:key-fn #(keyword (.toLowerCase %))})]
(is (= #{:foo :bar}
(set (map (comp :name meta) (vals ds))))))
(let [ds (ds/->dataset (java.io.ByteArrayInputStream. (.getBytes "Foo,Bar\n1,2"))
{:key-fn #(keyword (.toLowerCase %))
:file-type :csv})]
(is (= #{:foo :bar}
(set (map (comp :name meta) (vals ds)))))))
(deftest discrete-categorical-issue-322
(let [ds (ds/->dataset "test/data/stocks.csv")]
(is (thrown? Exception (ds/categorical->number ds ["symbol"] {"AAPL" 1
"MSFT" 2.2
"AMZN" 3
"IBM" 4
"GOOG" 5})))
(is (= (set (range 1 6))
(->> (-> (ds/categorical->number ds ["symbol"] {"AAPL" 1
"MSFT" 2
"AMZN" 3
"IBM" 4
"GOOG" 5})
(ds/column "symbol"))
(map long)
(set))))))
(deftest column-meta-roundtrip
(is (= :v
(->
(ds-base/column->data (ds-col/new-column :a [0] {:k :v}))
(ds-base/data->column)
meta
:k
))))
(deftest print-all-test
(let [ds (ds/->dataset (for [i (range 1000)] {:a i}))]
(is (= (meta (ds/print-all ds))
(meta (ds-print/print-range ds :all))))
(is (> (count (with-out-str (println (ds/print-all ds))))
1000))))
(deftest column-copy-test
[]
(let [short-col (:a (ds/->dataset (interleave
(repeat 10 {:a (short 25)})
(repeat 10 {:a nil}))))]
(is (= (vec (apply concat (repeat 10 [25 -32768])))
(vec (dtype/->array short-col))))
(is (= (vec (apply concat (repeat 10 [true false])))
(vec (dfn/finite? (dtype/->array :float64 short-col)))))
(is (= (vec (apply concat (repeat 10 [true false])))
(vec (dfn/finite? short-col))))
(is (= 25
(Math/round (dfn/mean short-col))))))
(deftest select-columns-test
(let [DS (ds/->dataset {:A [1 2 3]
:B [4 5 6]
:C ["A" "B" "C"]})]
(is (= (ds/select-columns DS [:C])
(ds/select-columns DS cf/categorical)))
(is (= (ds/select-columns DS cf/numeric)
(ds/select-columns DS [:A :B])))))
(deftest drop-columns-test
(let [DS (ds/->dataset {:A [1 2 3]
:B [4 5 6]
:C ["A" "B" "C"]})]
(is (= (ds/drop-columns DS cf/categorical)
(ds/remove-columns DS cf/categorical)
(ds/select-columns DS [:A :B])))
(is (= (ds/drop-columns DS cf/numeric)
(ds/remove-columns DS cf/numeric)
(ds/select-columns DS [:C])))))
(deftest column-select-test
(let [c (ds-col/new-column :test [0 1 2 3 4 5])]
(is (= [0 1 2]
(ds-col/select c [0 1 2])))
(is (= [0 1 2]
(ds-col/select c (dfn/< c 3))))))
(deftest dataset-column-select-test
(let [ds (ds/->dataset {:A [1 2 3 4 5]
:B [2 3 4 5 6]})]
(is (= (ds/->dataset {:A [1 5]
:B [2 6]})
(ds/select ds :all [0 4])))
(is (= (ds/->dataset {:A [1 2]
:B [2 3]})
(ds/select ds :all (dfn/< (:A ds) 3))))))
(deftest basic-desc-stats
(let [ds (ds/->dataset "test/data/stocks.csv")
stats (ds/descriptive-stats ds)]
(is (not (nil? (.toString ^Object stats))))))
(deftest extend-prepend-packed-column
(let [ds (ds/->dataset {:a [(LocalDate/of 2022 12 28)]})
acol (ds :a)
pa (col-impl/prepend-column-with-empty acol 5)
ap (col-impl/extend-column-with-empty acol 5)]
(is (= [nil nil nil nil nil (LocalDate/of 2022 12 28)]
(vec pa)))
(is (= [(LocalDate/of 2022 12 28) nil nil nil nil nil]
(vec ap)))))
(deftest filter-regression-342
(ds/filter-column (ds/->dataset (repeat 1000 {:datatype :float64 :b 2} ))
:datatype #(= % :object)))
(deftest head-tail-regression-343
(let [ds (ds/->dataset {:a (repeat 1000 :a)
:b (range 1000)})]))
(deftest mixed-boolean-values
(is (= :object (:datatype (meta ((ds/->dataset {:a [1 true false]}) :a))))))
(deftest fast-parser-ds-creation
(let [test-ds (ds/->dataset {:a (range 2000) :b (range 2000) :c (range 2000)})
a-parser (ds-api/dataset-parser {:dataset-name "just/a/column"})
parser (ds-api/dataset-parser {:dataset-name "all/three/columns"})]
(ds-proto/add-rows parser (ds/rows test-ds))
(ds-proto/add-rows a-parser (ds/rows (ds/select-columns test-ds [:a])))
(dotimes [idx 4000] @parser)
(dotimes [idx 4000] @a-parser)
(dotimes [idx 4000] (nth parser -1))
(dotimes [idx 10] (vec parser))
(println "3 column creation")
(time (dotimes [idx 1000] @parser))
(println "1 column creation")
(time (dotimes [idx 1000] @a-parser))
(println "row-at time")
(println (nth parser -1))
(time
(dotimes [idx 1000] (nth parser -1)))
(time (vec parser))
(is (= {:a 1999 :b 1999 :c 1999} (nth parser -1)))))
(deftest select-columns-repeat-columns
(let [ds (-> (ds/->dataset {:a [1 2] :b [3 4]})
(ds/select-columns [:a :b :a]))]
(is (= [:a :b] (vec (ds/column-names ds))))))
(deftest vararg-column-map
(let [ds (ds/->dataset {:foo (range 0 5)
:bar (repeatedly #(rand-int 100))
:baz (repeatedly #(rand-int 100))})]
;;This threw before.
(is (not (nil?
(ds/add-or-update-column ds :quz
(apply ds-col/column-map
(fn [foo bar baz]
(if (zero? (mod (+ foo bar baz) 7)) "mod 7" "not mod 7"))
nil (ds/columns ds))))))))
(deftest ioobe-issue-360
(is (thrown? IndexOutOfBoundsException (ds/select-rows (ds/->dataset {:a []}) [0])))
(is (thrown? IndexOutOfBoundsException (ds/select-rows (ds/->dataset []) [0]))))
(deftest failed-pmap-column-issue-367
(is (== (ds/row-count (ds/->dataset {:a (tech.v3.parallel.for/pmap identity [1 2 3])})) 3))
(is (== (ds/row-count (ds/->dataset {:a (list 1 2 3)
:b (cycle [1 2 3 4])}))
3))
(is (== (ds/row-count (ds/->dataset {:a [1 2 3]
:b (cycle [1 2 3 4])}))
3))
(is (== (ds/row-count (ds/->dataset {:a (list 1 2 3)
:b 2}))
3)))
(deftest group-by-column->index-issue-372
(let [data (ds/group-by-column->indexes (ds/->dataset {:a (int-array (concat (range 10) (range 10) (range 10)))})
:a)]
(is (= 3 (count (get data (int 0)))))))
(deftest select-bool-issue-387
(let [ds (ds/->dataset {:a (range 10)})
vec-of-bools [true false true false true false true false true false]
expected [0 2 4 6 8]]
(is (= expected
(-> (ds/select ds :all (dtype/make-list :boolean vec-of-bools))
:a)))
(is (= expected
(-> (ds/select ds :all vec-of-bools)
:a)))))
(deftest disable-na-as-missing
(let [expected-column ["foo" "NA"]
ds1 (ds/->dataset {:a expected-column} {:disable-na-as-missing? true})
ds2 (ds/->dataset (for [v expected-column] {:a v}) {:disable-na-as-missing? true})]
(is (= expected-column (:a ds1)))
(is (= expected-column (:a ds2)))))
(deftest fixed-type-disable-na-as-missing
(let [data [{:a "no"} {:a "NA"} {:a "na"}]
ds1 (ds/->dataset data {:parser-fn :string :disable-na-as-missing? true})
ds2 (ds/->dataset data {:parser-fn :string :disable-na-as-missing? false})]
(is (= ["no" "NA" "na"] (:a ds1)))
(is (= ["no" nil nil] (:a ds2)))))
(deftest sub-buffer-col-incorrect-missing
(let [ds (-> (ds/->dataset {:a (range 20)})
(ds/row-map (fn [m] {:a (if (>= (:a m) 10)
nil (:a m))})
{:parallelism 2
:min-n 1}))
col (ds :a)
subcol (dtype/sub-buffer col 10 5)]
(is (= (range 10 20)
(bitmap/->random-access (ds/missing col))))
(is (= (range 5)
(bitmap/->random-access (ds/missing subcol))))))
(deftest issue-413-reduction-on-instant-column
(let [ds (ds/->dataset {:x (range 5)
:y (repeatedly 5 #(java.time.Instant/now))})]
(is (= 3 (count (into [] (take 3) (:y ds)))))))
(deftest issue-432-issue-371
(let [sds (ds/print-all (ds/->dataset {:x (repeatedly 50 rand)}))]
(is (= :all (:print-index-range (meta (ds/sort-by-column sds :x)))))
(is (= :all (:print-index-range (meta (ds/filter-column sds :x pos?)))))))
(deftest issue-447-filter-column-by-keyword
(is (= [:a :a :a :a :a]
(-> (ds/->dataset {:a [:a :b :a :c :a :d :a :e :a :f]})
(ds/filter-column :a :a)
(ds/column :a)
(vec)))))
(deftest issue-450-incorrect-distinct
(is (= 2
(->
(ds/->dataset {:y [:a :b :b :a :a :a :b :b]})
(ds/categorical->number [:y] [] :float64)
:y
distinct
count))))
(deftest clone-causes-filter-fail
(let [ds (-> (ds/->dataset "test/data/stocks.csv" {:key-fn keyword})
(ds/filter (fn [row]
(and
(.isAfter ^LocalDate (get row :date) (LocalDate/parse "2009-06-01"))
(= (get row :symbol) "AMZN")))))]
(is (= (vec (:date ds))
(vec (:date (dtype/clone ds)))))))
(deftest replace-missing-empty-column-issue-458
(is (= [100 100 100]
(-> (ds/->dataset [{:name "fred"
:age nil}
{:name "ethel"
:age nil}
{:name "sally"
:age nil}])
(ds/replace-missing [:age] :value 100)
(ds/column :age)
vec))))
(defn rolling-off-edge-fn
[]
(let [ds-fn (fn [relative-window-position]
(-> (ds/->dataset {:x (concat (repeat 20 1)
(repeat 20 0))})
(ds-roll/rolling 10 {:mean-x (ds-roll/mean :x)} {:relative-window-position relative-window-position})
(ds/print-all)))]
(is (not= (ds-fn :left)
(ds-fn :center)))
(is (not= (ds-fn :center)
(ds-fn :right)))
(is (not= (ds-fn :left)
(ds-fn :right)))))
(deftest rolling-off-edge
(rolling-off-edge-fn))
(defn stacked-rolling-fn
[]
(let [ds0 (-> (ds/->dataset {:y (repeat 20 2)
:x (range)
:t 0})
(ds-roll/rolling 10 {:mean-y (ds-roll/mean :y)} {:relative-window-position :left}))
ds1 (-> (ds/->dataset {:y (repeat 40 1)
:x (range)
:t 1})
(ds-roll/rolling 10 {:mean-y (ds-roll/mean :y)} {:relative-window-position :left}))
ds (-> (ds/concat ds0 ds1)
(ds/print-all))]
;; HH: 2025-09-08 - My condolences if this fails on your architecture
(is (every? #{1.0 2.0} (:mean-y ds)))))
(deftest stacked-rolling
(stacked-rolling-fn))
(comment
(require '[criterium.core :as crit])
(def data (vec (repeatedly 100000 (fn [] {:a (rand-int 20) :b (rand) :c (rand)}))))
(def ds (ds/->dataset data))
(crit/quick-bench (group-by :a data))
(crit/quick-bench (ds/group-by-column ds :a {:map-fn hamf/mut-long-hashtable-map}))
(crit/quick-bench (transduce (comp (filter #(> (:a %) 10))
(map #(* (:b %) (:c %))))
+ 0.0 data))
(require '[tech.v3.datatype.functional :as dfn])
(crit/quick-bench (as-> ds ds
(ds/filter-column ds :a #(> % 10))
(dfn/+ (ds :b) (ds :c))
(dfn/sum-fast ds)))
(require '[ham-fisted.api :as hamf])
(crit/quick-bench (as-> ds ds
(ds/filter-column ds :a (hamf/long-predicate a (> a 10)))
(dfn/+ (ds :b) (ds :c))
(dfn/sum-fast ds)))
(crit/quick-bench (transduce (comp (filter #(> (long (:a %)) 10))
(map #(* (double (:b %)) (double (:c %)))))
+ 0.0 data))
)