Files
2026-02-08 11:20:43 -10:00

116 lines
4.5 KiB
Clojure
Vendored

(ns tech.v3.libs.poi-test
(:require [tech.v3.libs.poi :as xlsx-parse]
[tech.v3.dataset :as ds]
[tech.v3.dataset.column :as ds-col]
[tech.v3.datatype.functional :as dfn]
[tech.v3.datatype :as dtype]
[clojure.test :refer [deftest is testing]]))
(def xls-file "test/data/file_example_XLS_1000.xls")
(def xlsx-file "test/data/file_example_XLSX_1000.xlsx")
(def sparse-file "test/data/sparsefile.xlsx")
(def stocks-file "test/data/stocks.xlsx")
(def duplicate-headers-file "test/data/duplicate-headers.xls")
(deftest happy-path-parse-test
(let [ds (first (xlsx-parse/workbook->datasets xlsx-file))
ds2 (first (xlsx-parse/workbook->datasets xlsx-file))]
(is (= #{"column-0" "Age" "Country" "First Name" "Gender" "Date" "Last Name" "Id"}
(set (ds/column-names ds))))
(is (= #{"column-0" "Age" "Country" "First Name" "Gender" "Date" "Last Name" "Id"}
(set (ds/column-names ds2))))
(is (= #{:float64 :string}
(set (map dtype/get-datatype (ds/columns ds)))))
(is (= 1000 (ds/row-count ds)))
(is (= 1000 (ds/row-count ds2)))
(is (= 8 (ds/column-count ds)))
(is (= 8 (ds/column-count ds2)))
(is (dfn/equals (ds "Age") (ds2 "Age")))
(is (dfn/equals (ds "Id") (ds2 "Id")))))
(deftest sparse-file-parse-test
(let [ds (first (xlsx-parse/workbook->datasets sparse-file))]
(is (= 8 (ds/row-count ds)))
(is (= 8 (ds/column-count ds)))
(is (every? #(= (set (range 8)) %)
(map (comp set ds-col/missing ds) ["column-0" "a" "column-6"])))
(is (= [1.0 1.0 1.0 "a" 2.0 23.0]
(->> (ds/columns ds)
(mapcat (comp dtype/->reader ds/drop-missing))
vec)))))
(deftest datetime-test
(let [ds (first (xlsx-parse/workbook->datasets
stocks-file
{:parser-fn {"date" :packed-local-date}}))]
(is (= :packed-local-date (dtype/get-datatype (ds "date"))))))
(deftest custom-parser-test
(let [ds (first (xlsx-parse/workbook->datasets
xls-file
{:parser-fn {"Date" [:local-date
"dd/MM/yyyy"]}}))]
(is (= :local-date (dtype/get-datatype (ds "Date"))))))
(deftest integer-field-test
(let [ds (first (xlsx-parse/workbook->datasets
xls-file
{:parser-fn {"Id" :int64}}))]
(is (= :int64 (dtype/get-datatype (ds "Id"))))))
(deftest xls-keyword-colnames
(let [ds (first (xlsx-parse/workbook->datasets
xls-file
{:key-fn keyword}))]
;;The first column is an integer so keyword returns nil for that.
;;This is also a good example in that the system produces keywords with spaces
;;in them...that definitely isn't ideal.
(is (every? keyword? (rest (ds/column-names ds))))))
(deftest key-fn-number-columns
(let [ds (first (xlsx-parse/workbook->datasets xlsx-file {:key-fn keyword}))]
(is (= 0 (count (filter nil? (ds/column-names ds)))))
(is (= #{:column-0 :Age :Country (keyword "First Name") :Gender :Date
(keyword "Last Name") (keyword "Id")}
(set (ds/column-names ds))))))
(deftest auto-infer-dates
(let [ds (first (xlsx-parse/workbook->datasets "test/data/stocks-with-dates.xlsx"))]
(is (= #{:string :packed-local-date :float64}
(->> (vals ds)
(map (comp :datatype meta))
set)))))
(deftest ensure-unique-headers-test
(testing "that all headers are are forced to be unique"
(let [ds (ds/->dataset duplicate-headers-file
{:ensure-unique-column-names? true})]
(is (ds/column-count ds) 7)
(is (count (set (ds/column-names ds))) 7))
(let [ds (first (xlsx-parse/workbook->datasets duplicate-headers-file
{:ensure-unique-column-names? true}))]
(is (ds/column-count ds) 7)
(is (count (set (ds/column-names ds))) 7)))
(testing "that exception is thrown on duplicate headers"
(is (thrown? RuntimeException (ds/->dataset duplicate-headers-file)))
(is (thrown? RuntimeException (xlsx-parse/workbook->datasets duplicate-headers-file))))
(testing "that custom postfix-fn works correctly"
(let [ds (ds/->dataset duplicate-headers-file
{:ensure-unique-column-names? true
:unique-column-name-fn (fn [col-idx colname] (str colname "::" col-idx))})]
(is (some? (ds/column ds "column::2")))
(is (some? (ds/column ds "column::4")))
(is (some? (ds/column ds "column-1::6"))))))