init research

2026-02-08 11:20:43 -10:00
commit bdf064f54d
3041 changed files with 1592200 additions and 0 deletions
@@ -0,0 +1,14 @@
+# Kotlin DataFrame Compiler Gradle Plugin Example
+
+An IntelliJ IDEA Gradle Kotlin project demonstrating the use of the  
+[Kotlin DataFrame Compiler Plugin](https://kotlin.github.io/dataframe/compiler-plugin.html).
+
+We recommend using an up-to-date IntelliJ IDEA for the best experience,
+as well as the latest Kotlin plugin version.
+
+> [!WARNING]
+> For proper functionality in IntelliJ IDEA requires version 2025.2 or newer.
+
+[Download Kotlin DataFrame Compiler Plugin Gradle Example](https://github.com/Kotlin/dataframe/raw/example-projects-archives/kotlin-dataframe-plugin-gradle-example.zip)
+
+See also [Kotlin DataFrame Compiler Maven Plugin Example](../kotlin-dataframe-plugin-maven-example)
@@ -0,0 +1,39 @@
+import org.jlleitschuh.gradle.ktlint.KtlintExtension
+
+plugins {
+    id("org.jlleitschuh.gradle.ktlint") version "12.3.0"
+
+    val kotlinVersion = "2.3.0-RC3"
+    kotlin("jvm") version kotlinVersion
+    // Add the Kotlin DataFrame Compiler plugin of the same version as the Kotlin plugin.
+    kotlin("plugin.dataframe") version kotlinVersion
+
+    application
+}
+
+group = "org.example"
+version = "1.0-SNAPSHOT"
+
+repositories {
+    mavenCentral()
+}
+
+dependencies {
+    // Add general `dataframe` dependency
+    implementation("org.jetbrains.kotlinx:dataframe:1.0.0-Beta4")
+    // Add `kandy` dependency
+    implementation("org.jetbrains.kotlinx:kandy-lets-plot:0.8.3")
+    testImplementation(kotlin("test"))
+}
+
+tasks.test {
+    useJUnitPlatform()
+}
+kotlin {
+    jvmToolchain(11)
+}
+
+configure<KtlintExtension> {
+    version = "1.6.0"
+    // rules are set up through .editorconfig
+}
@@ -0,0 +1,4 @@
+kotlin.code.style=official
+# Disabling incremental compilation will no longer be necessary
+# when https://youtrack.jetbrains.com/issue/KT-66735 is resolved.
+kotlin.incremental=false
@@ -0,0 +1,7 @@
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-9.1.0-bin.zip
+networkTimeout=10000
+validateDistributionUrl=true
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
@@ -0,0 +1,11 @@
+pluginManagement {
+    repositories {
+        maven("https://packages.jetbrains.team/maven/p/kt/dev/")
+        mavenCentral()
+        gradlePluginPortal()
+    }
+}
+plugins {
+    id("org.gradle.toolchains.foojay-resolver-convention") version "1.0.0"
+}
+rootProject.name = "kotlin-dataframe-plugin-gradle-example"
@@ -0,0 +1,110 @@
+package org.jetbrains.kotlinx.dataframe.examples.plugin
+
+import org.jetbrains.kotlinx.dataframe.DataFrame
+import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
+import org.jetbrains.kotlinx.dataframe.api.add
+import org.jetbrains.kotlinx.dataframe.api.aggregate
+import org.jetbrains.kotlinx.dataframe.api.convert
+import org.jetbrains.kotlinx.dataframe.api.convertTo
+import org.jetbrains.kotlinx.dataframe.api.filter
+import org.jetbrains.kotlinx.dataframe.api.groupBy
+import org.jetbrains.kotlinx.dataframe.api.into
+import org.jetbrains.kotlinx.dataframe.api.max
+import org.jetbrains.kotlinx.dataframe.api.rename
+import org.jetbrains.kotlinx.dataframe.api.renameToCamelCase
+import org.jetbrains.kotlinx.dataframe.api.with
+import org.jetbrains.kotlinx.dataframe.io.readCsv
+import org.jetbrains.kotlinx.dataframe.io.writeCsv
+import org.jetbrains.kotlinx.kandy.dsl.plot
+import org.jetbrains.kotlinx.kandy.letsplot.export.save
+import org.jetbrains.kotlinx.kandy.letsplot.feature.layout
+import org.jetbrains.kotlinx.kandy.letsplot.layers.bars
+import java.net.URL
+
+// Declare data schema for the DataFrame from jetbrains_repositories.csv.
+@DataSchema
+data class Repositories(
+    val full_name: String,
+    val html_url: URL,
+    val stargazers_count: Int,
+    val topics: String,
+    val watchers: Int,
+)
+
+// Define kinds of repositories.
+enum class RepoKind {
+    Kotlin,
+    IntelliJ,
+    Other,
+}
+
+// A rule for determining the kind of repository based on its name and topics.
+fun getKind(fullName: String, topics: List<String>): RepoKind {
+    fun checkContains(name: String) = name in topics || fullName.lowercase().contains(name)
+
+    return when {
+        checkContains("kotlin") -> RepoKind.Kotlin
+        checkContains("idea") || checkContains("intellij") -> RepoKind.IntelliJ
+        else -> RepoKind.Other
+    }
+}
+
+fun main() {
+    val repos = DataFrame
+        // Read DataFrame from the CSV file.
+        .readCsv("https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv")
+        // And convert it to match the `Repositories` schema.
+        .convertTo<Repositories>()
+
+    // With Compiler Plugin, the DataFrame schema changes immediately after each operation:
+    // For example, if a new column is added or the old one is renamed (or its type is changed)
+    // during the operation, you can use the new name immediately in the following operations:
+    repos
+        // Add a new "name" column...
+        .add("name") { full_name.substringAfterLast("/") }
+        // ... and now we can use "name" extension in DataFrame operations, such as `filter`.
+        .filter { name.lowercase().contains("kotlin") }
+
+    // Let's update the DataFrame with some operations using these features.
+    val reposUpdated = repos
+        // Rename columns to CamelCase.
+        // Note that after that, in the following operations, extension properties will have
+        // new names corresponding to the column names.
+        .renameToCamelCase()
+        // Rename "stargazersCount" column to "stars".
+        .rename { stargazersCount }.into("stars")
+        // And we can immediately use the updated name in the filtering.
+        .filter { stars > 50 }
+        // Convert values in the "topic" column (which were `String` initially)
+        // to the list of topics.
+        .convert { topics }.with {
+            val inner = it.removeSurrounding("[", "]")
+            if (inner.isEmpty()) emptyList() else inner.split(',').map(String::trim)
+        }
+        // Now "topics" is a `List<String>` column.
+        // Add a new column with the number of topics.
+        .add("topicCount") { topics.size }
+        // Add a new column with the kind of repository.
+        .add("kind") { getKind(fullName, topics) }
+
+    // Write the updated DataFrame to a CSV file.
+    reposUpdated.writeCsv("jetbrains_repositories_new.csv")
+
+    reposUpdated
+        // Group repositories by kind
+        .groupBy { kind }
+        // And then compute the maximum stars in each group.
+        .aggregate {
+            max { stars } into "maxStars"
+        }
+        // Build a bar plot showing the maximum number of stars per repository kind.
+        .plot {
+            bars {
+                x(kind)
+                y(maxStars)
+            }
+            layout.title = "Max stars per repo kind"
+        }
+        // Save the plot to an SVG file.
+        .save("kindToStars.svg")
+}