init research

This commit is contained in:
2026-02-08 11:20:43 -10:00
commit bdf064f54d
3041 changed files with 1592200 additions and 0 deletions
@@ -0,0 +1,14 @@
# Kotlin DataFrame Compiler Gradle Plugin Example
An IntelliJ IDEA Gradle Kotlin project demonstrating the use of the
[Kotlin DataFrame Compiler Plugin](https://kotlin.github.io/dataframe/compiler-plugin.html).
We recommend using an up-to-date IntelliJ IDEA for the best experience,
as well as the latest Kotlin plugin version.
> [!WARNING]
> For proper functionality in IntelliJ IDEA requires version 2025.2 or newer.
[Download Kotlin DataFrame Compiler Plugin Gradle Example](https://github.com/Kotlin/dataframe/raw/example-projects-archives/kotlin-dataframe-plugin-gradle-example.zip)
See also [Kotlin DataFrame Compiler Maven Plugin Example](../kotlin-dataframe-plugin-maven-example)
@@ -0,0 +1,39 @@
import org.jlleitschuh.gradle.ktlint.KtlintExtension
plugins {
id("org.jlleitschuh.gradle.ktlint") version "12.3.0"
val kotlinVersion = "2.3.0-RC3"
kotlin("jvm") version kotlinVersion
// Add the Kotlin DataFrame Compiler plugin of the same version as the Kotlin plugin.
kotlin("plugin.dataframe") version kotlinVersion
application
}
group = "org.example"
version = "1.0-SNAPSHOT"
repositories {
mavenCentral()
}
dependencies {
// Add general `dataframe` dependency
implementation("org.jetbrains.kotlinx:dataframe:1.0.0-Beta4")
// Add `kandy` dependency
implementation("org.jetbrains.kotlinx:kandy-lets-plot:0.8.3")
testImplementation(kotlin("test"))
}
tasks.test {
useJUnitPlatform()
}
kotlin {
jvmToolchain(11)
}
configure<KtlintExtension> {
version = "1.6.0"
// rules are set up through .editorconfig
}
@@ -0,0 +1,4 @@
kotlin.code.style=official
# Disabling incremental compilation will no longer be necessary
# when https://youtrack.jetbrains.com/issue/KT-66735 is resolved.
kotlin.incremental=false
@@ -0,0 +1,7 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-9.1.0-bin.zip
networkTimeout=10000
validateDistributionUrl=true
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
@@ -0,0 +1,11 @@
pluginManagement {
repositories {
maven("https://packages.jetbrains.team/maven/p/kt/dev/")
mavenCentral()
gradlePluginPortal()
}
}
plugins {
id("org.gradle.toolchains.foojay-resolver-convention") version "1.0.0"
}
rootProject.name = "kotlin-dataframe-plugin-gradle-example"
@@ -0,0 +1,110 @@
package org.jetbrains.kotlinx.dataframe.examples.plugin
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.api.add
import org.jetbrains.kotlinx.dataframe.api.aggregate
import org.jetbrains.kotlinx.dataframe.api.convert
import org.jetbrains.kotlinx.dataframe.api.convertTo
import org.jetbrains.kotlinx.dataframe.api.filter
import org.jetbrains.kotlinx.dataframe.api.groupBy
import org.jetbrains.kotlinx.dataframe.api.into
import org.jetbrains.kotlinx.dataframe.api.max
import org.jetbrains.kotlinx.dataframe.api.rename
import org.jetbrains.kotlinx.dataframe.api.renameToCamelCase
import org.jetbrains.kotlinx.dataframe.api.with
import org.jetbrains.kotlinx.dataframe.io.readCsv
import org.jetbrains.kotlinx.dataframe.io.writeCsv
import org.jetbrains.kotlinx.kandy.dsl.plot
import org.jetbrains.kotlinx.kandy.letsplot.export.save
import org.jetbrains.kotlinx.kandy.letsplot.feature.layout
import org.jetbrains.kotlinx.kandy.letsplot.layers.bars
import java.net.URL
// Declare data schema for the DataFrame from jetbrains_repositories.csv.
@DataSchema
data class Repositories(
val full_name: String,
val html_url: URL,
val stargazers_count: Int,
val topics: String,
val watchers: Int,
)
// Define kinds of repositories.
enum class RepoKind {
Kotlin,
IntelliJ,
Other,
}
// A rule for determining the kind of repository based on its name and topics.
fun getKind(fullName: String, topics: List<String>): RepoKind {
fun checkContains(name: String) = name in topics || fullName.lowercase().contains(name)
return when {
checkContains("kotlin") -> RepoKind.Kotlin
checkContains("idea") || checkContains("intellij") -> RepoKind.IntelliJ
else -> RepoKind.Other
}
}
fun main() {
val repos = DataFrame
// Read DataFrame from the CSV file.
.readCsv("https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv")
// And convert it to match the `Repositories` schema.
.convertTo<Repositories>()
// With Compiler Plugin, the DataFrame schema changes immediately after each operation:
// For example, if a new column is added or the old one is renamed (or its type is changed)
// during the operation, you can use the new name immediately in the following operations:
repos
// Add a new "name" column...
.add("name") { full_name.substringAfterLast("/") }
// ... and now we can use "name" extension in DataFrame operations, such as `filter`.
.filter { name.lowercase().contains("kotlin") }
// Let's update the DataFrame with some operations using these features.
val reposUpdated = repos
// Rename columns to CamelCase.
// Note that after that, in the following operations, extension properties will have
// new names corresponding to the column names.
.renameToCamelCase()
// Rename "stargazersCount" column to "stars".
.rename { stargazersCount }.into("stars")
// And we can immediately use the updated name in the filtering.
.filter { stars > 50 }
// Convert values in the "topic" column (which were `String` initially)
// to the list of topics.
.convert { topics }.with {
val inner = it.removeSurrounding("[", "]")
if (inner.isEmpty()) emptyList() else inner.split(',').map(String::trim)
}
// Now "topics" is a `List<String>` column.
// Add a new column with the number of topics.
.add("topicCount") { topics.size }
// Add a new column with the kind of repository.
.add("kind") { getKind(fullName, topics) }
// Write the updated DataFrame to a CSV file.
reposUpdated.writeCsv("jetbrains_repositories_new.csv")
reposUpdated
// Group repositories by kind
.groupBy { kind }
// And then compute the maximum stars in each group.
.aggregate {
max { stars } into "maxStars"
}
// Build a bar plot showing the maximum number of stars per repository kind.
.plot {
bars {
x(kind)
y(maxStars)
}
layout.title = "Max stars per repo kind"
}
// Save the plot to an SVG file.
.save("kindToStars.svg")
}