Files
df-research/bridge/src/df_bridge/core.clj
2026-02-08 11:20:43 -10:00

128 lines
4.6 KiB
Clojure

(ns df-bridge.core
"Bridge between Kotlin DataFrame and Clojure data ecosystem.
Converts via Map<String, List> -- the natural columnar interchange type."
(:require [tech.v3.dataset :as ds]
[tablecloth.api :as tc])
(:import [org.jetbrains.kotlinx.dataframe.api ToDataFrameKt TypeConversionsKt]
[org.jetbrains.kotlinx.dataframe DataFrame DataColumn DataRow]))
(def ^:private companion (DataColumn/Companion))
;;; --- Helpers ---
(defn- datarow->map
"Recursively convert a KT DataRow (from ColumnGroup) to a Clojure map."
[^DataRow row]
(let [m (TypeConversionsKt/toMap row)]
(into {} (map (fn [[k v]]
[(keyword k)
(if (instance? DataRow v) (datarow->map v) v)])
m))))
(defn- deep-convert-col-values
"Convert column values, turning DataRow objects into Clojure maps."
[values]
(mapv (fn [v] (if (instance? DataRow v) (datarow->map v) v)) values))
;;; --- Kotlin DataFrame -> Clojure ---
(defn kt->col-map
"Convert a Kotlin DataFrame to a column-oriented Clojure map.
Returns {\"col1\" [v1 v2 ...] \"col2\" [v1 v2 ...]}.
ColumnGroup columns are converted to vectors of nested keyword maps."
[kt-df]
(into {}
(map (fn [[k v]] [k (deep-convert-col-values v)]))
(TypeConversionsKt/toMap kt-df)))
(defn kt->dataset
"Convert a Kotlin DataFrame to a tech.ml.dataset.
Note: ColumnGroups become columns of maps (TMD doesn't have nested columns)."
[kt-df]
(ds/->dataset (kt->col-map kt-df)))
(defn kt->tc
"Convert a Kotlin DataFrame to a tablecloth dataset."
[kt-df]
(tc/dataset (kt->col-map kt-df)))
(defn kt->rows
"Convert a Kotlin DataFrame to a seq of Clojure maps (row-oriented).
ColumnGroups become nested keyword maps."
[kt-df]
(mapv (fn [row] (datarow->map row))
(iterator-seq (.iterator kt-df))))
;;; --- Clojure -> Kotlin DataFrame ---
(defn col-map->kt
"Convert a column-oriented map to a Kotlin DataFrame.
Input: {\"col1\" [v1 v2 ...] \"col2\" [v1 v2 ...]}."
[col-map]
(let [jmap (java.util.HashMap. ^java.util.Map col-map)]
(ToDataFrameKt/toDataFrame jmap)))
(defn dataset->kt
"Convert a tech.ml.dataset / tablecloth dataset to a Kotlin DataFrame."
[ds]
(let [col-map (java.util.HashMap.)]
(doseq [col-name (ds/column-names ds)]
(.put col-map
(if (keyword? col-name) (name col-name) (str col-name))
(vec (ds/column ds col-name))))
(ToDataFrameKt/toDataFrame col-map)))
(defn rows->kt
"Convert a seq of row maps to a Kotlin DataFrame.
Uses the @JvmName variant for Iterable<Map<String,Any?>>."
[rows]
(let [jrows (java.util.ArrayList.
(mapv (fn [m] (java.util.HashMap. ^java.util.Map
(into {} (map (fn [[k v]] [(name k) v])) m)))
rows))]
(ToDataFrameKt/toDataFrameMapStringAnyNullable jrows)))
;;; --- ColumnGroup support ---
(defn make-column-group
"Create a KT DataFrame ColumnGroup from Clojure data.
group-name: string name for the group
col-map: {\"col1\" [v1 v2 ...] ...} data for the nested columns"
[group-name col-map]
(let [cols (mapv (fn [[k v]]
(.createWithTypeInference companion (str k) (java.util.ArrayList. v) false))
col-map)
inner-df (ToDataFrameKt/toDataFrameAnyColumn cols)]
(.createColumnGroup companion (str group-name) inner-df)))
(defn make-kt-with-groups
"Create a KT DataFrame with ColumnGroups from a spec.
spec is a vector of [name data] pairs where data is either:
- a vector of values (creates a ValueColumn)
- a map of {col-name values} (creates a ColumnGroup)"
[spec]
(let [cols (mapv (fn [[col-name data]]
(if (map? data)
(make-column-group col-name data)
(.createWithTypeInference companion (str col-name) (java.util.ArrayList. data) false)))
spec)]
(ToDataFrameKt/toDataFrameAnyColumn cols)))
;;; --- Roundtrip test ---
(defn roundtrip-test
"Quick sanity test: Clojure map -> KT DataFrame -> Clojure map."
[]
(let [input {"name" (java.util.ArrayList. ["Alice" "Bob" "Charlie"])
"age" (java.util.ArrayList. [30 25 35])
"score" (java.util.ArrayList. [95.5 87.3 92.1])}
kt-df (col-map->kt input)
output (kt->col-map kt-df)]
{:input input
:kt-df-class (class kt-df)
:kt-df-rows (.rowsCount kt-df)
:kt-df-cols (.columnsCount kt-df)
:kt-df-col-names (vec (.columnNames kt-df))
:output output
:roundtrip-ok? (= (get output "name") (get input "name"))}))