init research
This commit is contained in:
@@ -0,0 +1,37 @@
|
||||
(ns tech.v3.dataset.github-test
|
||||
(:require [tech.v3.dataset :as ds]
|
||||
[tech.v3.datatype :as dtype]
|
||||
[clojure.test :refer [deftest is]]))
|
||||
|
||||
|
||||
|
||||
(comment
|
||||
;;This sometimes returns a 500 error.
|
||||
(deftest load-github-events
|
||||
(let [ds (ds/->dataset "https://api.github.com/events" {:file-type :json
|
||||
:key-fn keyword})]
|
||||
(is (every? keyword? (ds/column-names ds)))
|
||||
(is (= [8 30] (dtype/shape ds)))))
|
||||
(do
|
||||
(require '[tech.v3.datatype.functional :as dfn])
|
||||
(require '[tech.v3.datatype.argops :as argops])
|
||||
(require '[tech.v3.datatype.unary-pred :as un-pred])
|
||||
(defonce flights (ds/->dataset "https://raw.githubusercontent.com/Rdatatable/data.table/master/vignettes/flights14.csv")))
|
||||
|
||||
(time (-> (dfn/+ (flights "arr_delay")
|
||||
(flights "dep_delay"))
|
||||
(dfn/< 0)
|
||||
(un-pred/bool-reader->indexes)
|
||||
(dtype/ecount)))
|
||||
|
||||
;;Another way to get the same result is to use summation. Booleans are
|
||||
;;interpreted very specifically below where false is 0 and 1 is true.
|
||||
;;Double summation is very fast.
|
||||
(time (-> (dfn/+ (flights "arr_delay")
|
||||
(flights "dep_delay"))
|
||||
(dfn/< 0)
|
||||
(dfn/sum)))
|
||||
|
||||
|
||||
|
||||
)
|
||||
Reference in New Issue
Block a user