init research
This commit is contained in:
@@ -0,0 +1,99 @@
|
||||
(ns tech.v3.libs.fastexcel-test
|
||||
(:require [tech.v3.libs.fastexcel :as xlsx-parse]
|
||||
[tech.v3.dataset :as ds]
|
||||
[tech.v3.datatype :as dtype]
|
||||
[clojure.test :refer [deftest is testing]]))
|
||||
|
||||
(def xls-file "test/data/file_example_XLS_1000.xls")
|
||||
(def xlsx-file "test/data/file_example_XLSX_1000.xlsx")
|
||||
(def sparse-file "test/data/sparsefile.xlsx")
|
||||
(def stocks-file "test/data/stocks.xlsx")
|
||||
(def stocks-bad-date-file "test/data/stocks-bad-date.xlsx")
|
||||
(def duplicate-headers-file "test/data/duplicate-headers.xlsx")
|
||||
|
||||
|
||||
|
||||
(deftest happy-path-parse-test
|
||||
(let [ds (first (xlsx-parse/workbook->datasets xlsx-file))]
|
||||
(is (= #{"column-0" "Age" "Country" "First Name" "Gender" "Date" "Last Name" "Id"}
|
||||
(set (ds/column-names ds))))
|
||||
(is (= #{:float64 :string}
|
||||
(set (map dtype/get-datatype (ds/columns ds)))))
|
||||
(is (= 1000 (ds/row-count ds)))
|
||||
(is (= 8 (ds/column-count ds)))))
|
||||
|
||||
|
||||
|
||||
(deftest sparse-file-parse-test
|
||||
(let [ds (first (xlsx-parse/workbook->datasets sparse-file))]
|
||||
(is (= 8 (ds/row-count ds)))
|
||||
(is (= 8 (ds/column-count ds)))
|
||||
(is (every? #(= (set (range 8)) %)
|
||||
(map (comp set ds/missing ds) ["column-0" "a" "column-6"])))
|
||||
(is (= [1.0 1.0 1.0 "a" 2.0 23.0]
|
||||
(->> (ds/columns ds)
|
||||
(mapcat (comp dtype/->reader ds/drop-missing))
|
||||
vec)))))
|
||||
|
||||
(deftest datetime-test
|
||||
(let [ds (first (xlsx-parse/workbook->datasets
|
||||
stocks-file
|
||||
{:parser-fn {"date" :packed-local-date}}))]
|
||||
(is (= :packed-local-date (dtype/get-datatype (ds "date"))))))
|
||||
|
||||
|
||||
(deftest bad-datetime-test
|
||||
(let [ds (first (xlsx-parse/workbook->datasets stocks-bad-date-file))]
|
||||
(is (= :string (dtype/get-datatype (ds "date"))))
|
||||
(is (= {java.lang.String 29}
|
||||
(->> (ds "date")
|
||||
(map type)
|
||||
frequencies)))))
|
||||
|
||||
|
||||
(deftest skip-rows-test
|
||||
(let [ds (ds/->dataset "test/data/holdings-daily-us-en-mdy.xlsx"
|
||||
{:n-initial-skip-rows 4
|
||||
:parser-fn {"Identifier" :string
|
||||
"Weight" :float64}})]
|
||||
;;column-8 had no data
|
||||
(is (= #{:float64 :string :boolean}
|
||||
(set (map dtype/get-datatype (vals ds)))))
|
||||
(is (= ["Name"
|
||||
"Ticker"
|
||||
"Identifier"
|
||||
"SEDOL"
|
||||
"Weight"
|
||||
"Sector"
|
||||
"Shares Held"
|
||||
"Local Currency"
|
||||
"column-8"]
|
||||
(vec (ds/column-names ds))))))
|
||||
|
||||
(deftest ensure-unique-headers-test
|
||||
(testing "that all headers are are forced to be unique"
|
||||
(let [ds (ds/->dataset duplicate-headers-file
|
||||
{:ensure-unique-column-names? true})]
|
||||
(is (ds/column-count ds) 7)
|
||||
(is (count (set (ds/column-names ds))) 7))
|
||||
(let [ds (first (xlsx-parse/workbook->datasets duplicate-headers-file
|
||||
{:ensure-unique-column-names? true}))]
|
||||
(is (ds/column-count ds) 7)
|
||||
(is (count (set (ds/column-names ds))) 7)))
|
||||
|
||||
(testing "that exception is thrown on duplicate headers"
|
||||
(is (thrown? RuntimeException (ds/->dataset duplicate-headers-file)))
|
||||
(is (thrown? RuntimeException (xlsx-parse/workbook->datasets duplicate-headers-file))))
|
||||
|
||||
(testing "that custom postfix-fn works correctly"
|
||||
(let [ds (ds/->dataset duplicate-headers-file
|
||||
{:ensure-unique-column-names? true
|
||||
:unique-column-name-fn (fn [col-idx colname] (str colname "::" col-idx))})]
|
||||
(is (some? (ds/column ds "column::2")))
|
||||
(is (some? (ds/column ds "column::4")))
|
||||
(is (some? (ds/column ds "column-1::6"))))))
|
||||
|
||||
|
||||
(deftest number-colname
|
||||
(let [ds (ds/->dataset "test/data/number_column.xlsx")]
|
||||
(is (= (first (ds/column-names ds)) 0.0))))
|
||||
Reference in New Issue
Block a user