init research
This commit is contained in:
+43
@@ -0,0 +1,43 @@
|
||||
import org.jetbrains.kotlin.gradle.dsl.JvmTarget
|
||||
|
||||
plugins {
|
||||
application
|
||||
kotlin("jvm")
|
||||
|
||||
// uses the 'old' Gradle plugin instead of the compiler plugin for now
|
||||
id("org.jetbrains.kotlinx.dataframe")
|
||||
|
||||
// only mandatory if `kotlin.dataframe.add.ksp=false` in gradle.properties
|
||||
id("com.google.devtools.ksp")
|
||||
}
|
||||
|
||||
repositories {
|
||||
mavenLocal() // in case of local dataframe development
|
||||
mavenCentral()
|
||||
}
|
||||
|
||||
dependencies {
|
||||
// implementation("org.jetbrains.kotlinx:dataframe:X.Y.Z")
|
||||
implementation(project(":"))
|
||||
|
||||
// exposed + sqlite database support
|
||||
implementation(libs.sqlite)
|
||||
implementation(libs.exposed.core)
|
||||
implementation(libs.exposed.kotlin.datetime)
|
||||
implementation(libs.exposed.jdbc)
|
||||
implementation(libs.exposed.json)
|
||||
implementation(libs.exposed.money)
|
||||
}
|
||||
|
||||
kotlin {
|
||||
compilerOptions {
|
||||
jvmTarget = JvmTarget.JVM_1_8
|
||||
freeCompilerArgs.add("-Xjdk-release=8")
|
||||
}
|
||||
}
|
||||
|
||||
tasks.withType<JavaCompile> {
|
||||
sourceCompatibility = JavaVersion.VERSION_1_8.toString()
|
||||
targetCompatibility = JavaVersion.VERSION_1_8.toString()
|
||||
options.release.set(8)
|
||||
}
|
||||
+107
@@ -0,0 +1,107 @@
|
||||
package org.jetbrains.kotlinx.dataframe.examples.exposed
|
||||
|
||||
import org.jetbrains.exposed.v1.core.BiCompositeColumn
|
||||
import org.jetbrains.exposed.v1.core.Column
|
||||
import org.jetbrains.exposed.v1.core.Expression
|
||||
import org.jetbrains.exposed.v1.core.ExpressionAlias
|
||||
import org.jetbrains.exposed.v1.core.ResultRow
|
||||
import org.jetbrains.exposed.v1.core.Table
|
||||
import org.jetbrains.exposed.v1.jdbc.Query
|
||||
import org.jetbrains.kotlinx.dataframe.AnyFrame
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
|
||||
import org.jetbrains.kotlinx.dataframe.api.convertTo
|
||||
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.codeGen.NameNormalizer
|
||||
import org.jetbrains.kotlinx.dataframe.impl.schema.DataFrameSchemaImpl
|
||||
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
|
||||
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
|
||||
import kotlin.reflect.KProperty1
|
||||
import kotlin.reflect.full.isSubtypeOf
|
||||
import kotlin.reflect.full.memberProperties
|
||||
import kotlin.reflect.typeOf
|
||||
|
||||
/**
|
||||
* Retrieves all columns of any [Iterable][Iterable]`<`[ResultRow][ResultRow]`>`, like [Query][Query],
|
||||
* from Exposed row by row and converts the resulting [Map] into a [DataFrame], cast to type [T].
|
||||
*
|
||||
* In notebooks, the untyped version works just as well due to runtime inference :)
|
||||
*/
|
||||
inline fun <reified T : Any> Iterable<ResultRow>.convertToDataFrame(): DataFrame<T> =
|
||||
convertToDataFrame().convertTo<T>()
|
||||
|
||||
/**
|
||||
* Retrieves all columns of an [Iterable][Iterable]`<`[ResultRow][ResultRow]`>` from Exposed, like [Query][Query],
|
||||
* row by row and converts the resulting [Map] of lists into a [DataFrame] by calling
|
||||
* [Map.toDataFrame].
|
||||
*/
|
||||
@JvmName("convertToAnyFrame")
|
||||
fun Iterable<ResultRow>.convertToDataFrame(): AnyFrame {
|
||||
val map = mutableMapOf<String, MutableList<Any?>>()
|
||||
for (row in this) {
|
||||
for (expression in row.fieldIndex.keys) {
|
||||
map.getOrPut(expression.readableName) {
|
||||
mutableListOf()
|
||||
} += row[expression]
|
||||
}
|
||||
}
|
||||
return map.toDataFrame()
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a simple column name from [this] [Expression].
|
||||
*
|
||||
* Might need to be expanded with multiple types of [Expression].
|
||||
*/
|
||||
val Expression<*>.readableName: String
|
||||
get() = when (this) {
|
||||
is Column<*> -> name
|
||||
is ExpressionAlias<*> -> alias
|
||||
is BiCompositeColumn<*, *, *> -> getRealColumns().joinToString("_") { it.readableName }
|
||||
else -> toString()
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a [DataFrameSchema] from the declared [Table] instance.
|
||||
*
|
||||
* This is not needed for conversion, but it can be useful to create a DataFrame [@DataSchema][DataSchema] instance.
|
||||
*
|
||||
* @param columnNameToAccessor Optional [MutableMap] which will be filled with entries mapping
|
||||
* the SQL column name to the accessor name from the [Table].
|
||||
* This can be used to define a [NameNormalizer] later.
|
||||
* @see toDataFrameSchemaWithNameNormalizer
|
||||
*/
|
||||
@Suppress("UNCHECKED_CAST")
|
||||
fun Table.toDataFrameSchema(columnNameToAccessor: MutableMap<String, String> = mutableMapOf()): DataFrameSchema {
|
||||
// we use reflection to go over all `Column<*>` properties in the Table object
|
||||
val columns = this::class.memberProperties
|
||||
.filter { it.returnType.isSubtypeOf(typeOf<Column<*>>()) }
|
||||
.associate { prop ->
|
||||
prop as KProperty1<Table, Column<*>>
|
||||
|
||||
// retrieve the SQL column name
|
||||
val columnName = prop.get(this).name
|
||||
// store the SQL column name together with the accessor name in the map
|
||||
columnNameToAccessor[columnName] = prop.name
|
||||
|
||||
// get the column type from `val a: Column<Type>`
|
||||
val type = prop.returnType.arguments.first().type!!
|
||||
|
||||
// and we add the name and column shema type to the `columns` map :)
|
||||
columnName to ColumnSchema.Value(type)
|
||||
}
|
||||
return DataFrameSchemaImpl(columns)
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a [DataFrameSchema] from the declared [Table] instance with a [NameNormalizer] to
|
||||
* convert the SQL column names to the corresponding Kotlin property names.
|
||||
*
|
||||
* This is not needed for conversion, but it can be useful to create a DataFrame [@DataSchema][DataSchema] instance.
|
||||
*
|
||||
* @see toDataFrameSchema
|
||||
*/
|
||||
fun Table.toDataFrameSchemaWithNameNormalizer(): Pair<DataFrameSchema, NameNormalizer> {
|
||||
val columnNameToAccessor = mutableMapOf<String, String>()
|
||||
return Pair(toDataFrameSchema(), NameNormalizer { columnNameToAccessor[it] ?: it })
|
||||
}
|
||||
+96
@@ -0,0 +1,96 @@
|
||||
package org.jetbrains.kotlinx.dataframe.examples.exposed
|
||||
|
||||
import org.jetbrains.exposed.v1.core.Column
|
||||
import org.jetbrains.exposed.v1.core.SortOrder
|
||||
import org.jetbrains.exposed.v1.core.count
|
||||
import org.jetbrains.exposed.v1.jdbc.Database
|
||||
import org.jetbrains.exposed.v1.jdbc.SchemaUtils
|
||||
import org.jetbrains.exposed.v1.jdbc.batchInsert
|
||||
import org.jetbrains.exposed.v1.jdbc.deleteAll
|
||||
import org.jetbrains.exposed.v1.jdbc.select
|
||||
import org.jetbrains.exposed.v1.jdbc.selectAll
|
||||
import org.jetbrains.exposed.v1.jdbc.transactions.transaction
|
||||
import org.jetbrains.kotlinx.dataframe.api.asSequence
|
||||
import org.jetbrains.kotlinx.dataframe.api.count
|
||||
import org.jetbrains.kotlinx.dataframe.api.describe
|
||||
import org.jetbrains.kotlinx.dataframe.api.groupBy
|
||||
import org.jetbrains.kotlinx.dataframe.api.print
|
||||
import org.jetbrains.kotlinx.dataframe.api.sortByDesc
|
||||
import org.jetbrains.kotlinx.dataframe.size
|
||||
import java.io.File
|
||||
|
||||
/**
|
||||
* Describes a simple bridge between [Exposed](https://www.jetbrains.com/exposed/) and DataFrame!
|
||||
*/
|
||||
fun main() {
|
||||
// defining where to find our SQLite database for Exposed
|
||||
val resourceDb = "chinook.db"
|
||||
val dbPath = File(object {}.javaClass.classLoader.getResource(resourceDb)!!.toURI()).absolutePath
|
||||
val db = Database.connect(url = "jdbc:sqlite:$dbPath", driver = "org.sqlite.JDBC")
|
||||
|
||||
// let's read the database!
|
||||
val df = transaction(db) {
|
||||
// addLogger(StdOutSqlLogger) // enable if you want to see verbose logs
|
||||
|
||||
// tables in Exposed need to be defined, see tables.kt
|
||||
SchemaUtils.create(Customers, Artists, Albums)
|
||||
|
||||
println()
|
||||
|
||||
// In Exposed, we can write queries like this.
|
||||
// Here, we count per country how many customers there are and print the results:
|
||||
Customers
|
||||
.select(Customers.country, Customers.customerId.count())
|
||||
.groupBy(Customers.country)
|
||||
.orderBy(Customers.customerId.count() to SortOrder.DESC)
|
||||
.forEach {
|
||||
println("${it[Customers.country]}: ${it[Customers.customerId.count()]} customers")
|
||||
}
|
||||
|
||||
println()
|
||||
|
||||
// Perform the specific query you want to read into the DataFrame.
|
||||
// Note: DataFrames are in-memory structures, so don't make it too large if you don't have the RAM ;)
|
||||
val query = Customers.selectAll() // .where { Customers.company.isNotNull() }
|
||||
|
||||
println()
|
||||
|
||||
// read and convert the query to a typed DataFrame
|
||||
// see compatibilityLayer.kt for how we created convertToDataFrame<>()
|
||||
// and see tables.kt for how we created DfCustomers!
|
||||
query.convertToDataFrame<DfCustomers>()
|
||||
}
|
||||
|
||||
println(df.size())
|
||||
|
||||
// now we have a DataFrame, we can perform DataFrame operations,
|
||||
// like doing the same operation as we did in Exposed above
|
||||
df.groupBy { country }.count()
|
||||
.sortByDesc { "count"<Int>() }
|
||||
.print(columnTypes = true, borders = true)
|
||||
|
||||
// or just general statistics
|
||||
df.describe()
|
||||
.print(columnTypes = true, borders = true)
|
||||
|
||||
// or make plots using Kandy! It's all up to you
|
||||
|
||||
// writing a DataFrame back into an SQL database with Exposed can also be done easily!
|
||||
transaction(db) {
|
||||
// addLogger(StdOutSqlLogger) // enable if you want to see verbose logs
|
||||
|
||||
// first delete the original contents
|
||||
Customers.deleteAll()
|
||||
|
||||
println()
|
||||
|
||||
// batch-insert our dataframe back into the SQL database as a sequence of rows
|
||||
Customers.batchInsert(df.asSequence()) { dfRow ->
|
||||
// we simply go over each value in the row and put it in the right place in the Exposed statement
|
||||
for (column in Customers.columns) {
|
||||
@Suppress("UNCHECKED_CAST")
|
||||
this[column as Column<Any?>] = dfRow[column.name]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
+97
@@ -0,0 +1,97 @@
|
||||
package org.jetbrains.kotlinx.dataframe.examples.exposed
|
||||
|
||||
import org.jetbrains.exposed.v1.core.Column
|
||||
import org.jetbrains.exposed.v1.core.Table
|
||||
import org.jetbrains.kotlinx.dataframe.annotations.ColumnName
|
||||
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
|
||||
import org.jetbrains.kotlinx.dataframe.api.generateDataClasses
|
||||
import org.jetbrains.kotlinx.dataframe.api.print
|
||||
|
||||
object Albums : Table() {
|
||||
val albumId: Column<Int> = integer("AlbumId").autoIncrement()
|
||||
val title: Column<String> = varchar("Title", 160)
|
||||
val artistId: Column<Int> = integer("ArtistId")
|
||||
|
||||
override val primaryKey = PrimaryKey(albumId)
|
||||
}
|
||||
|
||||
object Artists : Table() {
|
||||
val artistId: Column<Int> = integer("ArtistId").autoIncrement()
|
||||
val name: Column<String> = varchar("Name", 120)
|
||||
|
||||
override val primaryKey = PrimaryKey(artistId)
|
||||
}
|
||||
|
||||
object Customers : Table() {
|
||||
val customerId: Column<Int> = integer("CustomerId").autoIncrement()
|
||||
val firstName: Column<String> = varchar("FirstName", 40)
|
||||
val lastName: Column<String> = varchar("LastName", 20)
|
||||
val company: Column<String?> = varchar("Company", 80).nullable()
|
||||
val address: Column<String?> = varchar("Address", 70).nullable()
|
||||
val city: Column<String?> = varchar("City", 40).nullable()
|
||||
val state: Column<String?> = varchar("State", 40).nullable()
|
||||
val country: Column<String?> = varchar("Country", 40).nullable()
|
||||
val postalCode: Column<String?> = varchar("PostalCode", 10).nullable()
|
||||
val phone: Column<String?> = varchar("Phone", 24).nullable()
|
||||
val fax: Column<String?> = varchar("Fax", 24).nullable()
|
||||
val email: Column<String> = varchar("Email", 60)
|
||||
val supportRepId: Column<Int?> = integer("SupportRepId").nullable()
|
||||
|
||||
override val primaryKey = PrimaryKey(customerId)
|
||||
}
|
||||
|
||||
/**
|
||||
* Exposed requires you to provide [Table] instances to
|
||||
* provide type-safe access to your columns and data.
|
||||
*
|
||||
* While DataFrame can infer types at runtime, which is enough for Kotlin Notebook,
|
||||
* to get type safe access at compile time, we need to define a [@DataSchema][DataSchema].
|
||||
*
|
||||
* This is what we created the [toDataFrameSchema] function for!
|
||||
*/
|
||||
fun main() {
|
||||
val (schema, nameNormalizer) = Customers.toDataFrameSchemaWithNameNormalizer()
|
||||
|
||||
// checking whether the schema is converted correctly.
|
||||
// schema.print()
|
||||
|
||||
// printing a @DataSchema data class to copy-paste into the code.
|
||||
// we use a NameNormalizer to let DataFrame generate the same accessors as in the Table
|
||||
// while keeping the correct column names
|
||||
schema.generateDataClasses(
|
||||
markerName = "DfCustomers",
|
||||
nameNormalizer = nameNormalizer,
|
||||
).print()
|
||||
}
|
||||
|
||||
// created by Customers.toDataFrameSchema()
|
||||
// The same can be done for the other tables
|
||||
@DataSchema
|
||||
data class DfCustomers(
|
||||
@ColumnName("Address")
|
||||
val address: String?,
|
||||
@ColumnName("City")
|
||||
val city: String?,
|
||||
@ColumnName("Company")
|
||||
val company: String?,
|
||||
@ColumnName("Country")
|
||||
val country: String?,
|
||||
@ColumnName("CustomerId")
|
||||
val customerId: Int,
|
||||
@ColumnName("Email")
|
||||
val email: String,
|
||||
@ColumnName("Fax")
|
||||
val fax: String?,
|
||||
@ColumnName("FirstName")
|
||||
val firstName: String,
|
||||
@ColumnName("LastName")
|
||||
val lastName: String,
|
||||
@ColumnName("Phone")
|
||||
val phone: String?,
|
||||
@ColumnName("PostalCode")
|
||||
val postalCode: String?,
|
||||
@ColumnName("State")
|
||||
val state: String?,
|
||||
@ColumnName("SupportRepId")
|
||||
val supportRepId: Int?,
|
||||
)
|
||||
Vendored
BIN
Binary file not shown.
Reference in New Issue
Block a user