init research

This commit is contained in:
2026-02-08 11:20:43 -10:00
commit bdf064f54d
3041 changed files with 1592200 additions and 0 deletions
@@ -0,0 +1,39 @@
target/
!.mvn/wrapper/maven-wrapper.jar
!**/src/main/**/target/
!**/src/test/**/target/
.kotlin
### IntelliJ IDEA ###
.idea/modules.xml
.idea/jarRepositories.xml
.idea/compiler.xml
.idea/libraries/
*.iws
*.iml
*.ipr
### Eclipse ###
.apt_generated
.classpath
.factorypath
.project
.settings
.springBeans
.sts4-cache
### NetBeans ###
/nbproject/private/
/nbbuild/
/dist/
/nbdist/
/.nb-gradle/
build/
!**/src/main/**/build/
!**/src/test/**/build/
### VS Code ###
.vscode/
### Mac OS ###
.DS_Store
@@ -0,0 +1,14 @@
# Kotlin DataFrame Compiler Maven Plugin Example
An IntelliJ IDEA Maven Kotlin project demonstrating the use of the
[Kotlin DataFrame Compiler Plugin](https://kotlin.github.io/dataframe/compiler-plugin.html).
We recommend using an up-to-date IntelliJ IDEA for the best experience,
as well as the latest Kotlin plugin version.
> [!WARNING]
> For proper functionality in IntelliJ IDEA requires version 2025.3 or newer.
> [Download Kotlin DataFrame Compiler Plugin Maven Example](https://github.com/Kotlin/dataframe/raw/example-projects-archives/kotlin-dataframe-plugin-maven-example.zip)
See also [Kotlin DataFrame Compiler Gradle Plugin Example](../kotlin-dataframe-plugin-gradle-example)
@@ -0,0 +1,112 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>dataframe_maven</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<kotlin.code.style>official</kotlin.code.style>
<kotlin.compiler.jvmTarget>11</kotlin.compiler.jvmTarget>
</properties>
<repositories>
<repository>
<id>mavenCentral</id>
<url>https://repo1.maven.org/maven2/</url>
</repository>
</repositories>
<build>
<sourceDirectory>src/main/kotlin</sourceDirectory>
<testSourceDirectory>src/test/kotlin</testSourceDirectory>
<plugins>
<plugin>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-maven-plugin</artifactId>
<version>2.3.0-RC3</version>
<configuration>
<compilerPlugins>
<plugin>kotlin-dataframe</plugin>
</compilerPlugins>
</configuration>
<dependencies>
<dependency>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-maven-dataframe</artifactId>
<version>2.3.0-RC3</version>
</dependency>
</dependencies>
<executions>
<execution>
<id>compile</id>
<phase>compile</phase>
<goals>
<goal>compile</goal>
</goals>
</execution>
<execution>
<id>test-compile</id>
<phase>test-compile</phase>
<goals>
<goal>test-compile</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.22.2</version>
</plugin>
<plugin>
<artifactId>maven-failsafe-plugin</artifactId>
<version>2.22.2</version>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>1.6.0</version>
<configuration>
<mainClass>MainKt</mainClass>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-test-junit5</artifactId>
<version>2.3.0-RC3</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<version>5.10.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.jetbrains.kotlin</groupId>
<artifactId>kotlin-stdlib</artifactId>
<version>2.3.0-RC3</version>
</dependency>
<!-- DataFrame and Kandy dependencies -->
<dependency>
<groupId>org.jetbrains.kotlinx</groupId>
<artifactId>dataframe</artifactId>
<version>1.0.0-Beta4</version>
</dependency>
<dependency>
<groupId>org.jetbrains.kotlinx</groupId>
<artifactId>kandy-lets-plot</artifactId>
<version>0.8.3</version>
</dependency>
</dependencies>
</project>
@@ -0,0 +1,110 @@
package org.jetbrains.kotlinx.dataframe.examples.plugin
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.api.add
import org.jetbrains.kotlinx.dataframe.api.aggregate
import org.jetbrains.kotlinx.dataframe.api.convert
import org.jetbrains.kotlinx.dataframe.api.convertTo
import org.jetbrains.kotlinx.dataframe.api.filter
import org.jetbrains.kotlinx.dataframe.api.groupBy
import org.jetbrains.kotlinx.dataframe.api.into
import org.jetbrains.kotlinx.dataframe.api.max
import org.jetbrains.kotlinx.dataframe.api.rename
import org.jetbrains.kotlinx.dataframe.api.renameToCamelCase
import org.jetbrains.kotlinx.dataframe.api.with
import org.jetbrains.kotlinx.dataframe.io.readCsv
import org.jetbrains.kotlinx.dataframe.io.writeCsv
import org.jetbrains.kotlinx.kandy.dsl.plot
import org.jetbrains.kotlinx.kandy.letsplot.export.save
import org.jetbrains.kotlinx.kandy.letsplot.feature.layout
import org.jetbrains.kotlinx.kandy.letsplot.layers.bars
import java.net.URL
// Declare data schema for the DataFrame from jetbrains_repositories.csv.
@DataSchema
data class Repositories(
val full_name: String,
val html_url: URL,
val stargazers_count: Int,
val topics: String,
val watchers: Int,
)
// Define kinds of repositories.
enum class RepoKind {
Kotlin,
IntelliJ,
Other,
}
// A rule for determining the kind of repository based on its name and topics.
fun getKind(fullName: String, topics: List<String>): RepoKind {
fun checkContains(name: String) = name in topics || fullName.lowercase().contains(name)
return when {
checkContains("kotlin") -> RepoKind.Kotlin
checkContains("idea") || checkContains("intellij") -> RepoKind.IntelliJ
else -> RepoKind.Other
}
}
fun main() {
val repos = DataFrame
// Read DataFrame from the CSV file.
.readCsv("https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv")
// And convert it to match the `Repositories` schema.
.convertTo<Repositories>()
// With Compiler Plugin, the DataFrame schema changes immediately after each operation:
// For example, if a new column is added or the old one is renamed (or its type is changed)
// during the operation, you can use the new name immediately in the following operations:
repos
// Add a new "name" column...
.add("name") { full_name.substringAfterLast("/") }
// ... and now we can use "name" extension in DataFrame operations, such as `filter`.
.filter { name.lowercase().contains("kotlin") }
// Let's update the DataFrame with some operations using these features.
val reposUpdated = repos
// Rename columns to CamelCase.
// Note that after that, in the following operations, extension properties will have
// new names corresponding to the column names.
.renameToCamelCase()
// Rename "stargazersCount" column to "stars".
.rename { stargazersCount }.into("stars")
// And we can immediately use the updated name in the filtering.
.filter { stars > 50 }
// Convert values in the "topic" column (which were `String` initially)
// to the list of topics.
.convert { topics }.with {
val inner = it.removeSurrounding("[", "]")
if (inner.isEmpty()) emptyList() else inner.split(',').map(String::trim)
}
// Now "topics" is a `List<String>` column.
// Add a new column with the number of topics.
.add("topicCount") { topics.size }
// Add a new column with the kind of repository.
.add("kind") { getKind(fullName, topics) }
// Write the updated DataFrame to a CSV file.
reposUpdated.writeCsv("jetbrains_repositories_new.csv")
reposUpdated
// Group repositories by kind
.groupBy { kind }
// And then compute the maximum stars in each group.
.aggregate {
max { stars } into "maxStars"
}
// Build a bar plot showing the maximum number of stars per repository kind.
.plot {
bars {
x(kind)
y(maxStars)
}
layout.title = "Max stars per repo kind"
}
// Save the plot to an SVG file.
.save("kindToStars.svg")
}