Files
2026-02-08 11:20:43 -10:00

38 lines
1.2 KiB
Clojure
Vendored

(ns tech.v3.dataset.github-test
(:require [tech.v3.dataset :as ds]
[tech.v3.datatype :as dtype]
[clojure.test :refer [deftest is]]))
(comment
;;This sometimes returns a 500 error.
(deftest load-github-events
(let [ds (ds/->dataset "https://api.github.com/events" {:file-type :json
:key-fn keyword})]
(is (every? keyword? (ds/column-names ds)))
(is (= [8 30] (dtype/shape ds)))))
(do
(require '[tech.v3.datatype.functional :as dfn])
(require '[tech.v3.datatype.argops :as argops])
(require '[tech.v3.datatype.unary-pred :as un-pred])
(defonce flights (ds/->dataset "https://raw.githubusercontent.com/Rdatatable/data.table/master/vignettes/flights14.csv")))
(time (-> (dfn/+ (flights "arr_delay")
(flights "dep_delay"))
(dfn/< 0)
(un-pred/bool-reader->indexes)
(dtype/ecount)))
;;Another way to get the same result is to use summation. Booleans are
;;interpreted very specifically below where false is 0 and 1 is true.
;;Double summation is very fast.
(time (-> (dfn/+ (flights "arr_delay")
(flights "dep_delay"))
(dfn/< 0)
(dfn/sum)))
)