(ns exploration "Side-by-side exploration: Kotlin DataFrame bridge + Clojure data stack. Render with Clay: (require '[scicloj.clay.v2.api :as clay]) (clay/make! {:source-path \"notebooks/exploration.clj\"})" (:require [tablecloth.api :as tc] [tech.v3.dataset :as ds] [tech.v3.datatype.functional :as dfn] [scicloj.tableplot.v1.plotly :as plotly] [scicloj.kindly.v4.kind :as kind] [df-bridge.core :as bridge] [malli.provider :as mp]) (:import [org.jetbrains.kotlinx.dataframe.api ToDataFrameKt TypeConversionsKt])) ;; # Kotlin DataFrame <-> Clojure Bridge Exploration ;; ## 1. Create data in Kotlin DataFrame, bring it to Clojure ;; Build a dataset on the Kotlin side (simulating data coming from a Kotlin service): (def kt-data (let [n 500 rng (java.util.Random. 42) categories (cycle ["electronics" "clothing" "food" "books" "sports"]) regions (cycle ["north" "south" "east" "west"])] (java.util.HashMap. {"product_id" (java.util.ArrayList. (mapv str (range n))) "category" (java.util.ArrayList. (vec (take n categories))) "region" (java.util.ArrayList. (vec (take n regions))) "price" (java.util.ArrayList. (mapv (fn [_] (+ 5.0 (* 195.0 (.nextDouble rng)))) (range n))) "quantity" (java.util.ArrayList. (mapv (fn [_] (+ 1 (.nextInt rng 100))) (range n))) "rating" (java.util.ArrayList. (mapv (fn [_] (+ 1.0 (* 4.0 (.nextDouble rng)))) (range n)))}))) (def kt-df (ToDataFrameKt/toDataFrame kt-data)) ;; Kotlin DataFrame info: (kind/md (format "**Kotlin DataFrame**: %d rows x %d columns — columns: %s" (.rowsCount kt-df) (.columnsCount kt-df) (vec (.columnNames kt-df)))) ;; ## 2. Bridge to tablecloth (def sales (bridge/kt->tc kt-df)) sales ;; ## 3. Basic tablecloth operations ;; ### Summary by category (def by-category (-> sales (tc/group-by "category") (tc/aggregate {"avg-price" (fn [ds] (dfn/mean (ds/column ds "price"))) "avg-rating" (fn [ds] (dfn/mean (ds/column ds "rating"))) "total-qty" (fn [ds] (dfn/sum (ds/column ds "quantity")))}))) by-category ;; ### Filter: high-value items (price > 100, rating > 3.5) (def premium (-> sales (tc/select-rows (fn [row] (and (> (get row "price") 100.0) (> (get row "rating") 3.5)))))) (kind/md (format "**Premium items**: %d out of %d" (tc/row-count premium) (tc/row-count sales))) premium ;; ## 4. Visualization with tableplot ;; ### Price distribution by category (-> sales (plotly/base {:=x "price"}) (plotly/layer-histogram {:=histogram-nbins 30 :=color "category"})) ;; ### Price vs Rating scatter (-> sales (plotly/base {:=x "price" :=y "rating"}) (plotly/layer-point {:=color "category" :=mark-size 6})) ;; ### Total quantity by region (bar chart) (def qty-by-region (-> sales (tc/group-by "region") (tc/aggregate {"total-qty" (fn [ds] (dfn/sum (ds/column ds "quantity")))}))) (-> qty-by-region (plotly/base {:=x :$group-name :=y "total-qty"}) (plotly/layer-bar {})) ;; ### Average price by category (bar chart) (-> by-category (plotly/base {:=x :$group-name :=y "avg-price"}) (plotly/layer-bar {})) ;; ## 5. Roundtrip: modify in Clojure, send back to Kotlin (def enriched (-> sales (tc/map-columns "revenue" ["price" "quantity"] *) (tc/select-columns ["product_id" "category" "region" "price" "quantity" "revenue" "rating"]))) (def kt-enriched (bridge/dataset->kt enriched)) (kind/md (format "**Roundtrip**: enriched tablecloth dataset -> KT DataFrame: %d rows x %d cols, columns: %s" (.rowsCount kt-enriched) (.columnsCount kt-enriched) (vec (.columnNames kt-enriched)))) ;; Revenue distribution: (-> enriched (plotly/base {:=x "revenue"}) (plotly/layer-histogram {:=histogram-nbins 40 :=color "category"})) ;; ## 6. Schema inference with malli (def row-sample (take 10 (bridge/kt->rows kt-df))) (def inferred-schema (mp/provide row-sample)) (kind/md (str "**Malli inferred schema from KT DataFrame rows:**\n```clojure\n" (pr-str inferred-schema) "\n```"))