init research

This commit is contained in:
2026-02-08 11:20:43 -10:00
commit bdf064f54d
3041 changed files with 1592200 additions and 0 deletions
+293
View File
@@ -0,0 +1,293 @@
{:paths ["src" "resources" "target/classes"]
:deps {;;org.clojure/clojure {:mvn/version "1.11.1"}
cnuernber/dtype-next {:mvn/version "11.005"}
techascent/tech.io {:mvn/version "4.33"
:exclusions [org.apache.commons/commons-compress]}
org.apache.datasketches/datasketches-java {:mvn/version "4.2.0"}
com.cnuernber/charred {:mvn/version "1.038"}}
:aliases
{:codox
{:extra-deps {codox-theme-rdash/codox-theme-rdash {:mvn/version "0.1.2"}
nrepl/nrepl {:mvn/version "0.8.3"}
cider/cider-nrepl {:mvn/version "0.25.5"}
com.cnuernber/codox {:mvn/version "1.001"}}
:exec-fn codox.main/-main
:exec-args {:group-id "techascent"
:artifact-id "tech.ml.dataset"
:version "8.003"
:name "TMD"
:description "A Clojure high performance data processing system"
:metadata {:doc/format :markdown}
:html {:transforms [[:head] [:append [:script {:async true
:src "https://www.googletagmanager.com/gtag/js?id=G-RGTB4J7LGP"}]]
[:head] [:append [:script "window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-95TVFC1FEB');"]]]}
:themes [:rdash]
:source-paths ["src"]
:output-path "docs"
:doc-paths ["topics"]
:source-uri "https://github.com/techascent/tech.ml.dataset/blob/master/{filepath}#L{line}"
:namespaces [tech.v3.dataset
tech.v3.dataset.math
tech.v3.dataset.tensor
tech.v3.dataset.join
tech.v3.dataset.modelling
tech.v3.dataset.column
tech.v3.dataset.clipboard
tech.v3.dataset.neanderthal
tech.v3.dataset.metamorph
tech.v3.dataset.categorical
tech.v3.dataset.rolling
tech.v3.dataset.reductions
tech.v3.dataset.reductions.apache-data-sketch
tech.v3.dataset.column-filters
tech.v3.dataset.io.datetime
tech.v3.dataset.io.univocity
tech.v3.dataset.io.string-row-parser
tech.v3.dataset.io.csv
tech.v3.dataset.set
tech.v3.dataset.print
tech.v3.dataset.zip
tech.v3.libs.poi
tech.v3.libs.parquet
tech.v3.libs.fastexcel
tech.v3.libs.arrow
tech.v3.libs.guava.cache
tech.v3.libs.tribuo
tech.v3.libs.clj-transit]}}
;; Run with clj -T:build function-in-build
:dev
{:extra-deps
{criterium/criterium {:mvn/version "0.4.5"}
com.cognitect/transit-clj {:mvn/version "1.0.333"}
net.java.dev.jna/jna {:mvn/version "5.12.1"}
uncomplicate/neanderthal {:mvn/version "0.49.1"}
org.bytedeco/mkl {:mvn/version "2024.0-1.5.10"}
ch.qos.logback/logback-classic {:mvn/version "1.3.5"}
com.clojure-goes-fast/clj-memory-meter {:mvn/version "0.1.0"}
com.cnuernber/benchmark {:mvn/version "1.000-beta-2"}
org.apache.poi/poi-ooxml {:mvn/version "5.2.3"
:exclusions [commons-codec/commons-codec]}
com.univocity/univocity-parsers {:mvn/version "2.9.0"}
org.dhatim/fastexcel-reader {:mvn/version "0.16.4"
:exclusions [org.apache.poi/poi-ooxml]}
org.apache.arrow/arrow-vector {:mvn/version "18.2.0"
:exclusions [commons-codec/commons-codec
com.fasterxml.jackson.core/jackson-core
com.fasterxml.jackson.core/jackson-annotations
com.fasterxml.jackson.core/jackson-databind
org.slf4j/slf4j-api]}
com.github.luben/zstd-jni {:mvn/version "1.5.4-1"}
org.lz4/lz4-java {:mvn/version "1.8.0"}
org.apache.parquet/parquet-hadoop {:mvn/version "1.12.0"
:exclusions [org.slf4j/slf4j-log4j12]}
org.apache.hadoop/hadoop-common {:mvn/version "3.3.0"
:exclusions [com.sun.jersey/jersey-core
com.sun.jersey/jersey-json
com.sun.jersey/jersey-server
com.sun.jersey/jersey-servlet
dnsjava/dnsjava
org.eclipse.jetty/jetty-server
org.eclipse.jetty/jetty-servlet
org.eclipse.jetty/jetty-util
org.eclipse.jetty/jetty-webapp
javax.activation/javax.activation-api
javax.servlet.jsp/jsp-api
javax.servlet/javax.servlet-api
io.netty/netty-codec
io.netty/netty-handler
io.netty/netty-transport
io.netty/netty-transport-native-epoll
org.codehaus.jettison/jettison
org.apache.zookeeper/zookeeper
org.apache.curator/curator-recipes
org.apache.curator/curator-client
org.apache.htrace/htrace-core4
org.apache.hadoop.thirdparty/hadoop-shaded-protobuf_3_7
org.apache.hadoop/hadoop-auth
org.apache.kerby/kerb-core
commons-cli/commons-cli
commons-net/commons-net
org.apache.commons/commons-lang3
org.apache.commons/commons-text
org.apache.commons/commons-configuration2
com.google.re2j/re2j
com.google.code.findbugs/jsr305
com.jcraft/jsch
log4j/log4j
org.slf4j/slf4j-log4j12]
}
;; We literally need this for 1 POJO formatting object.
org.apache.hadoop/hadoop-mapreduce-client-core {:mvn/version "3.3.0"
:exclusions [org.slf4j/slf4j-log4j12
org.apache.avro/avro
org.apache.hadoop/hadoop-yarn-client
org.apache.hadoop/hadoop-yarn-common
org.apache.hadoop/hadoop-annotations
org.apache.hadoop/hadoop-hdfs-client
io.netty/netty
com.google.inject.extensions/guice-servlet]}
com.cnuernber/jarrow {:mvn/version "1.000"}
org.tribuo/tribuo-all {:mvn/version "4.3.1" :extension "pom"}
}
:extra-paths ["neanderthal" "test"]}
:dev-mac-m1
{:extra-deps
{criterium/criterium {:mvn/version "0.4.5"}
com.cognitect/transit-clj {:mvn/version "1.0.333"}
net.java.dev.jna/jna {:mvn/version "5.12.1"}
;uncomplicate/neanderthal {:mvn/version "0.45.0"}
ch.qos.logback/logback-classic {:mvn/version "1.3.5"}
com.clojure-goes-fast/clj-memory-meter {:mvn/version "0.1.0"}
com.clojure-goes-fast/clj-async-profiler {:mvn/version "1.6.2"}
com.cnuernber/benchmark {:mvn/version "1.000-beta-2"}
org.apache.poi/poi-ooxml {:mvn/version "5.2.3"
:exclusions [commons-codec/commons-codec]}
;; dev.hardwoodhq/hardwood-core {:mvn/version "1.0.0-SNAPSHOT"}
com.univocity/univocity-parsers {:mvn/version "2.9.0"}
org.dhatim/fastexcel-reader {:mvn/version "0.16.4"
:exclusions [org.apache.poi/poi-ooxml]}
org.apache.arrow/arrow-vector {:mvn/version "18.2.0"
:exclusions [commons-codec/commons-codec
com.fasterxml.jackson.core/jackson-core
com.fasterxml.jackson.core/jackson-annotations
com.fasterxml.jackson.core/jackson-databind
org.slf4j/slf4j-api]}
org.lz4/lz4-java {:mvn/version "1.8.0"}
com.github.luben/zstd-jni {:mvn/version "1.5.4-1"}
com.cnuernber/jarrow {:mvn/version "1.000"}
org.apache.parquet/parquet-hadoop {:mvn/version "1.12.0"
:exclusions [org.slf4j/slf4j-log4j12]}
org.apache.hadoop/hadoop-common {:mvn/version "3.3.0"
:exclusions [com.sun.jersey/jersey-core
com.sun.jersey/jersey-json
com.sun.jersey/jersey-server
com.sun.jersey/jersey-servlet
dnsjava/dnsjava
org.eclipse.jetty/jetty-server
org.eclipse.jetty/jetty-servlet
org.eclipse.jetty/jetty-util
org.eclipse.jetty/jetty-webapp
javax.activation/javax.activation-api
javax.servlet.jsp/jsp-api
javax.servlet/javax.servlet-api
io.netty/netty-codec
io.netty/netty-handler
io.netty/netty-transport
io.netty/netty-transport-native-epoll
org.codehaus.jettison/jettison
org.apache.zookeeper/zookeeper
org.apache.curator/curator-recipes
org.apache.curator/curator-client
org.apache.htrace/htrace-core4
org.apache.hadoop.thirdparty/hadoop-shaded-protobuf_3_7
org.apache.hadoop/hadoop-auth
org.apache.kerby/kerb-core
commons-cli/commons-cli
commons-net/commons-net
org.apache.commons/commons-lang3
org.apache.commons/commons-text
org.apache.commons/commons-configuration2
com.google.re2j/re2j
com.google.code.findbugs/jsr305
com.jcraft/jsch
log4j/log4j
org.slf4j/slf4j-log4j12]
}
;; We literally need this for 1 POJO formatting object.
org.apache.hadoop/hadoop-mapreduce-client-core {:mvn/version "3.3.0"
:exclusions [org.slf4j/slf4j-log4j12
org.apache.avro/avro
org.apache.hadoop/hadoop-yarn-client
org.apache.hadoop/hadoop-yarn-common
org.apache.hadoop/hadoop-annotations
org.apache.hadoop/hadoop-hdfs-client
io.netty/netty
com.google.inject.extensions/guice-servlet]}
org.xerial.snappy/snappy-java {:mvn/version "1.1.8.4"}
;org.tribuo/tribuo-all {:mvn/version "4.2.0" :extension "pom"}
}
:extra-paths ["test"]
:jvm-opts ["-Djdk.attach.allowAttachSelf=true" "--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED"
"--illegal-access=permit" "--add-opens=java.base/java.lang=ALL-UNNAMED"
"--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED"
"--add-opens=java.base/java.util.concurrent=ALL-UNNAMED"
"--add-opens=java.base/java.lang=ALL-UNNAMED"
"--add-opens=java.base/java.math=ALL-UNNAMED"
"--add-opens=java.base/jdk.internal.foreign=ALL-UNNAMED"
"--add-opens=java.base/sun.nio.ch=ALL-UNNAMED"
"--add-opens=java.base/java.io=ALL-UNNAMED"
"--add-opens=java.base/java.util=ALL-UNNAMED"
"--add-opens=java.base/java.lang.ref=ALL-UNNAMED"
]
}
:jdk-8 {}
:jdk-11
{:jvm-opts ["--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED"]}
:jdk-17
{:jvm-opts ["--add-modules" "jdk.incubator.foreign,jdk.incubator.vector"
"--enable-native-access=ALL-UNNAMED"
"--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED"
"--add-opens=java.base/sun.nio.ch=ALL-UNNAMED"]}
:jdk-21
{:jvm-opts ["--add-modules" "jdk.incubator.foreign,jdk.incubator.vector"
"--enable-native-access=ALL-UNNAMED"
"--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED"
"--add-opens=java.base/sun.nio.ch=ALL-UNNAMED"]}
:codegen
{:extra-paths ["src" "dev"]
:exec-fn tech.v3.dataset.codegen/-main}
:build
{:deps {io.github.clojure/tools.build {:git/tag "v0.9.6" :git/sha "8e78bcc"}}
:ns-default build}
:test
{:extra-deps {com.cognitect/test-runner
{:git/url "https://github.com/cognitect-labs/test-runner"
:sha "209b64504cb3bd3b99ecfec7937b358a879f55c1"}}
:extra-paths ["neanderthal" "test"]
:main-opts ["-m" "cognitect.test-runner"]}
:deploy
{:replace-deps {slipset/deps-deploy {:mvn/version "0.1.5"}}
:exec-fn deps-deploy.deps-deploy/deploy
:exec-args {:installer :remote
:sign-releases? true
:artifact "target/tech.ml.dataset.jar"}}
:install
{:replace-deps {slipset/deps-deploy {:mvn/version "0.1.5"}}
:exec-fn deps-deploy.deps-deploy/deploy
:exec-args {:installer :local
:artifact "target/tech.ml.dataset.jar"}}}}