128 lines
4.6 KiB
Clojure
128 lines
4.6 KiB
Clojure
(ns df-bridge.core
|
|
"Bridge between Kotlin DataFrame and Clojure data ecosystem.
|
|
Converts via Map<String, List> -- the natural columnar interchange type."
|
|
(:require [tech.v3.dataset :as ds]
|
|
[tablecloth.api :as tc])
|
|
(:import [org.jetbrains.kotlinx.dataframe.api ToDataFrameKt TypeConversionsKt]
|
|
[org.jetbrains.kotlinx.dataframe DataFrame DataColumn DataRow]))
|
|
|
|
(def ^:private companion (DataColumn/Companion))
|
|
|
|
;;; --- Helpers ---
|
|
|
|
(defn- datarow->map
|
|
"Recursively convert a KT DataRow (from ColumnGroup) to a Clojure map."
|
|
[^DataRow row]
|
|
(let [m (TypeConversionsKt/toMap row)]
|
|
(into {} (map (fn [[k v]]
|
|
[(keyword k)
|
|
(if (instance? DataRow v) (datarow->map v) v)])
|
|
m))))
|
|
|
|
(defn- deep-convert-col-values
|
|
"Convert column values, turning DataRow objects into Clojure maps."
|
|
[values]
|
|
(mapv (fn [v] (if (instance? DataRow v) (datarow->map v) v)) values))
|
|
|
|
;;; --- Kotlin DataFrame -> Clojure ---
|
|
|
|
(defn kt->col-map
|
|
"Convert a Kotlin DataFrame to a column-oriented Clojure map.
|
|
Returns {\"col1\" [v1 v2 ...] \"col2\" [v1 v2 ...]}.
|
|
ColumnGroup columns are converted to vectors of nested keyword maps."
|
|
[kt-df]
|
|
(into {}
|
|
(map (fn [[k v]] [k (deep-convert-col-values v)]))
|
|
(TypeConversionsKt/toMap kt-df)))
|
|
|
|
(defn kt->dataset
|
|
"Convert a Kotlin DataFrame to a tech.ml.dataset.
|
|
Note: ColumnGroups become columns of maps (TMD doesn't have nested columns)."
|
|
[kt-df]
|
|
(ds/->dataset (kt->col-map kt-df)))
|
|
|
|
(defn kt->tc
|
|
"Convert a Kotlin DataFrame to a tablecloth dataset."
|
|
[kt-df]
|
|
(tc/dataset (kt->col-map kt-df)))
|
|
|
|
(defn kt->rows
|
|
"Convert a Kotlin DataFrame to a seq of Clojure maps (row-oriented).
|
|
ColumnGroups become nested keyword maps."
|
|
[kt-df]
|
|
(mapv (fn [row] (datarow->map row))
|
|
(iterator-seq (.iterator kt-df))))
|
|
|
|
;;; --- Clojure -> Kotlin DataFrame ---
|
|
|
|
(defn col-map->kt
|
|
"Convert a column-oriented map to a Kotlin DataFrame.
|
|
Input: {\"col1\" [v1 v2 ...] \"col2\" [v1 v2 ...]}."
|
|
[col-map]
|
|
(let [jmap (java.util.HashMap. ^java.util.Map col-map)]
|
|
(ToDataFrameKt/toDataFrame jmap)))
|
|
|
|
(defn dataset->kt
|
|
"Convert a tech.ml.dataset / tablecloth dataset to a Kotlin DataFrame."
|
|
[ds]
|
|
(let [col-map (java.util.HashMap.)]
|
|
(doseq [col-name (ds/column-names ds)]
|
|
(.put col-map
|
|
(if (keyword? col-name) (name col-name) (str col-name))
|
|
(vec (ds/column ds col-name))))
|
|
(ToDataFrameKt/toDataFrame col-map)))
|
|
|
|
(defn rows->kt
|
|
"Convert a seq of row maps to a Kotlin DataFrame.
|
|
Uses the @JvmName variant for Iterable<Map<String,Any?>>."
|
|
[rows]
|
|
(let [jrows (java.util.ArrayList.
|
|
(mapv (fn [m] (java.util.HashMap. ^java.util.Map
|
|
(into {} (map (fn [[k v]] [(name k) v])) m)))
|
|
rows))]
|
|
(ToDataFrameKt/toDataFrameMapStringAnyNullable jrows)))
|
|
|
|
;;; --- ColumnGroup support ---
|
|
|
|
(defn make-column-group
|
|
"Create a KT DataFrame ColumnGroup from Clojure data.
|
|
group-name: string name for the group
|
|
col-map: {\"col1\" [v1 v2 ...] ...} data for the nested columns"
|
|
[group-name col-map]
|
|
(let [cols (mapv (fn [[k v]]
|
|
(.createWithTypeInference companion (str k) (java.util.ArrayList. v) false))
|
|
col-map)
|
|
inner-df (ToDataFrameKt/toDataFrameAnyColumn cols)]
|
|
(.createColumnGroup companion (str group-name) inner-df)))
|
|
|
|
(defn make-kt-with-groups
|
|
"Create a KT DataFrame with ColumnGroups from a spec.
|
|
spec is a vector of [name data] pairs where data is either:
|
|
- a vector of values (creates a ValueColumn)
|
|
- a map of {col-name values} (creates a ColumnGroup)"
|
|
[spec]
|
|
(let [cols (mapv (fn [[col-name data]]
|
|
(if (map? data)
|
|
(make-column-group col-name data)
|
|
(.createWithTypeInference companion (str col-name) (java.util.ArrayList. data) false)))
|
|
spec)]
|
|
(ToDataFrameKt/toDataFrameAnyColumn cols)))
|
|
|
|
;;; --- Roundtrip test ---
|
|
|
|
(defn roundtrip-test
|
|
"Quick sanity test: Clojure map -> KT DataFrame -> Clojure map."
|
|
[]
|
|
(let [input {"name" (java.util.ArrayList. ["Alice" "Bob" "Charlie"])
|
|
"age" (java.util.ArrayList. [30 25 35])
|
|
"score" (java.util.ArrayList. [95.5 87.3 92.1])}
|
|
kt-df (col-map->kt input)
|
|
output (kt->col-map kt-df)]
|
|
{:input input
|
|
:kt-df-class (class kt-df)
|
|
:kt-df-rows (.rowsCount kt-df)
|
|
:kt-df-cols (.columnsCount kt-df)
|
|
:kt-df-col-names (vec (.columnNames kt-df))
|
|
:output output
|
|
:roundtrip-ok? (= (get output "name") (get input "name"))}))
|