init research

This commit is contained in:
2026-02-08 11:20:43 -10:00
commit bdf064f54d
3041 changed files with 1592200 additions and 0 deletions
@@ -0,0 +1,10 @@
package org.jetbrains.dataframe.ksp
public object DataFrameNames {
public const val DATAFRAME_PACKAGE: String = "org.jetbrains.kotlinx.dataframe"
public const val DATA_SCHEMA: String = "org.jetbrains.kotlinx.dataframe.annotations.DataSchema"
public const val SHORT_COLUMN_NAME: String = "ColumnName"
public const val COLUMN_NAME: String = "org.jetbrains.kotlinx.dataframe.annotations.$SHORT_COLUMN_NAME"
public const val DATA_FRAME: String = "$DATAFRAME_PACKAGE.DataFrame"
public const val DATA_ROW: String = "$DATAFRAME_PACKAGE.DataRow"
}
@@ -0,0 +1,32 @@
package org.jetbrains.dataframe.ksp
import com.google.devtools.ksp.processing.KSPLogger
import com.google.devtools.ksp.processing.Resolver
import com.google.devtools.ksp.processing.SymbolProcessor
import com.google.devtools.ksp.symbol.KSAnnotated
public class DataFrameSymbolProcessor(
private val codeGenerator: com.google.devtools.ksp.processing.CodeGenerator,
private val logger: KSPLogger,
private val resolutionDir: String?,
) : SymbolProcessor {
override fun process(resolver: Resolver): List<KSAnnotated> {
val extensionsGenerator = ExtensionsGenerator(resolver, codeGenerator, logger)
val (validDataSchemas, invalidDataSchemas) = extensionsGenerator.resolveDataSchemaDeclarations()
validDataSchemas.forEach {
val file = it.origin.containingFile ?: return@forEach
extensionsGenerator.generateExtensions(file, it.origin, it.properties)
}
val dataSchemaGenerator = DataSchemaGenerator(resolver, resolutionDir, logger, codeGenerator)
val importStatements = dataSchemaGenerator.resolveImportStatements()
importStatements.forEach { importStatement ->
dataSchemaGenerator.generateDataSchema(importStatement)
}
// by returning invalidDataSchemas we defer the processing of incomplete DataSchema declarations
// for example when DataSchema declaration references another one generated by @file:ImportDataSchema
return invalidDataSchemas
}
}
@@ -0,0 +1,12 @@
package org.jetbrains.dataframe.ksp
import com.google.devtools.ksp.processing.SymbolProcessor
import com.google.devtools.ksp.processing.SymbolProcessorEnvironment
import com.google.devtools.ksp.processing.SymbolProcessorProvider
public class DataFrameSymbolProcessorProvider : SymbolProcessorProvider {
override fun create(environment: SymbolProcessorEnvironment): SymbolProcessor {
val resolutionDir = environment.options["dataframe.resolutionDir"]
return DataFrameSymbolProcessor(environment.codeGenerator, environment.logger, resolutionDir)
}
}
@@ -0,0 +1,333 @@
package org.jetbrains.dataframe.ksp
import com.google.devtools.ksp.KspExperimental
import com.google.devtools.ksp.getAnnotationsByType
import com.google.devtools.ksp.processing.Dependencies
import com.google.devtools.ksp.processing.KSPLogger
import com.google.devtools.ksp.processing.Resolver
import com.google.devtools.ksp.symbol.KSFile
import org.jetbrains.kotlinx.dataframe.annotations.CsvOptions
import org.jetbrains.kotlinx.dataframe.annotations.DataSchemaVisibility
import org.jetbrains.kotlinx.dataframe.annotations.ImportDataSchema
import org.jetbrains.kotlinx.dataframe.annotations.JdbcOptions
import org.jetbrains.kotlinx.dataframe.annotations.JsonOptions
import org.jetbrains.kotlinx.dataframe.api.JsonPath
import org.jetbrains.kotlinx.dataframe.codeGen.CodeGenerator
import org.jetbrains.kotlinx.dataframe.codeGen.MarkerVisibility
import org.jetbrains.kotlinx.dataframe.codeGen.NameNormalizer
import org.jetbrains.kotlinx.dataframe.impl.codeGen.CodeGenerationReadResult
import org.jetbrains.kotlinx.dataframe.impl.codeGen.DfReadResult
import org.jetbrains.kotlinx.dataframe.impl.codeGen.from
import org.jetbrains.kotlinx.dataframe.impl.codeGen.toStandaloneSnippet
import org.jetbrains.kotlinx.dataframe.impl.codeGen.urlCodeGenReader
import org.jetbrains.kotlinx.dataframe.impl.codeGen.urlDfReader
import org.jetbrains.kotlinx.dataframe.io.ArrowFeather
import org.jetbrains.kotlinx.dataframe.io.CsvDeephaven
import org.jetbrains.kotlinx.dataframe.io.Excel
import org.jetbrains.kotlinx.dataframe.io.JSON
import org.jetbrains.kotlinx.dataframe.io.OpenApi
import org.jetbrains.kotlinx.dataframe.io.TsvDeephaven
import org.jetbrains.kotlinx.dataframe.io.databaseCodeGenReader
import org.jetbrains.kotlinx.dataframe.io.db.driverClassNameFromUrl
import org.jetbrains.kotlinx.dataframe.io.isUrl
import org.jetbrains.kotlinx.dataframe.io.readSqlQuery
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import java.io.File
import java.net.MalformedURLException
import java.net.URI
import java.net.URL
import java.sql.Connection
import java.sql.DriverManager
@OptIn(KspExperimental::class)
public class DataSchemaGenerator(
private val resolver: Resolver,
private val resolutionDir: String?,
private val logger: KSPLogger,
private val codeGenerator: com.google.devtools.ksp.processing.CodeGenerator,
) {
public fun resolveImportStatements(): List<ImportDataSchemaStatement> = resolvePathImports(resolver).toList()
public class ImportDataSchemaStatement(
public val origin: KSFile,
public val name: String,
public val dataSource: CodeGeneratorDataSource,
public val visibility: MarkerVisibility,
public val normalizationDelimiters: List<Char>,
public val withDefaultPath: Boolean,
public val csvOptions: CsvOptions,
public val jsonOptions: JsonOptions,
public val jdbcOptions: JdbcOptions,
public val isJdbc: Boolean = false,
public val enableExperimentalOpenApi: Boolean = false,
)
public class CodeGeneratorDataSource(public val pathRepresentation: String, public val data: URL)
private fun resolvePathImports(resolver: Resolver) =
resolver.getSymbolsWithAnnotation(ImportDataSchema::class.qualifiedName!!)
.filterIsInstance<KSFile>()
.flatMap { file ->
file.getAnnotationsByType(ImportDataSchema::class).mapNotNull { it.toStatement(file, logger) }
}
private fun ImportDataSchema.toStatement(file: KSFile, logger: KSPLogger): ImportDataSchemaStatement? {
val url = if (isUrl(path)) {
try {
URI(this.path).toURL()
} catch (exception: MalformedURLException) {
logger.error("'${this.path}' is not valid URL: ${exception.message}", file)
return null
}
} else {
// revisit architecture for an addition of the new data source https://github.com/Kotlin/dataframe/issues/450
if (path.startsWith("jdbc")) {
return ImportDataSchemaStatement(
origin = file,
name = name,
// URL better to make nullable or make hierarchy here
dataSource = CodeGeneratorDataSource(this.path, URI("http://example.com/pages/").toURL()),
visibility = visibility.toMarkerVisibility(),
normalizationDelimiters = normalizationDelimiters.toList(),
withDefaultPath = withDefaultPath,
csvOptions = csvOptions,
jsonOptions = jsonOptions,
jdbcOptions = jdbcOptions,
isJdbc = true,
enableExperimentalOpenApi = enableExperimentalOpenApi,
)
}
val resolutionDir: String = resolutionDir ?: run {
reportMissingKspArgument(file)
return null
}
val relativeFile = File(resolutionDir, path)
val absoluteFile = File(path)
val data = if (relativeFile.exists()) relativeFile else absoluteFile
try {
data.toURI().toURL() ?: return null
} catch (exception: MalformedURLException) {
logger.error(
"Failed to convert resolved path '${relativeFile.absolutePath}' or '${absoluteFile.absolutePath}' to URL: ${exception.message}",
file,
)
return null
}
}
return ImportDataSchemaStatement(
origin = file,
name = name,
dataSource = CodeGeneratorDataSource(this.path, url),
visibility = visibility.toMarkerVisibility(),
normalizationDelimiters = normalizationDelimiters.toList(),
withDefaultPath = withDefaultPath,
csvOptions = csvOptions,
jsonOptions = jsonOptions,
jdbcOptions = jdbcOptions,
enableExperimentalOpenApi = enableExperimentalOpenApi,
)
}
private fun DataSchemaVisibility.toMarkerVisibility(): MarkerVisibility =
when (this) {
DataSchemaVisibility.INTERNAL -> MarkerVisibility.INTERNAL
DataSchemaVisibility.IMPLICIT_PUBLIC -> MarkerVisibility.IMPLICIT_PUBLIC
DataSchemaVisibility.EXPLICIT_PUBLIC -> MarkerVisibility.EXPLICIT_PUBLIC
}
private fun reportMissingKspArgument(file: KSFile) {
logger.error(
"""
|KSP option with key "dataframe.resolutionDir" must be set in order to use relative path in @${ImportDataSchema::class.simpleName}
|DataFrame Gradle plugin should set it by default to "project.projectDir".
|If you do not use DataFrame Gradle plugin, configure option manually
""".trimMargin(),
symbol = file,
)
}
public fun generateDataSchema(importStatement: ImportDataSchemaStatement) {
val packageName = importStatement.origin.packageName.asString()
val name = importStatement.name
val schemaFile =
codeGenerator.createNewFile(Dependencies(true, importStatement.origin), packageName, "$name.Generated")
val formats = listOfNotNull(
CsvDeephaven(delimiter = importStatement.csvOptions.delimiter),
JSON(
typeClashTactic = JSON.TypeClashTactic.valueOf(importStatement.jsonOptions.typeClashTactic),
keyValuePaths = importStatement.jsonOptions.keyValuePaths.map(::JsonPath),
),
Excel(),
TsvDeephaven(),
ArrowFeather(),
if (importStatement.enableExperimentalOpenApi) OpenApi() else null,
)
// revisit architecture for an addition of the new data source https://github.com/Kotlin/dataframe/issues/450
if (importStatement.isJdbc) {
val url = importStatement.dataSource.pathRepresentation
// Force classloading
// TODO: probably will not work for the H2
Class.forName(driverClassNameFromUrl(url))
var userName = importStatement.jdbcOptions.user
var password = importStatement.jdbcOptions.password
// treat the passed userName and password parameters as env variables
if (importStatement.jdbcOptions.extractCredFromEnv) {
userName = System.getenv(userName) ?: userName
password = System.getenv(password) ?: password
}
val connection = DriverManager.getConnection(
url,
userName,
password,
)
connection.use {
val schema = generateSchemaForImport(importStatement, connection)
val codeGenerator = CodeGenerator.create(useFqNames = false)
val additionalImports: List<String> = listOf()
val codeGenResult = codeGenerator.generate(
schema = schema,
name = name,
fields = true,
extensionProperties = false,
isOpen = true,
visibility = importStatement.visibility,
knownMarkers = emptyList(),
readDfMethod = null,
fieldNameNormalizer = NameNormalizer.from(importStatement.normalizationDelimiters.toSet()),
)
val code = codeGenResult.toStandaloneSnippet(packageName, additionalImports)
schemaFile.bufferedWriter().use {
it.write(code)
}
return
}
}
// revisit architecture for an addition of the new data source https://github.com/Kotlin/dataframe/issues/450
// works for JDBC and OpenAPI only
// first try without creating a dataframe
when (
val codeGenResult = if (importStatement.isJdbc) {
CodeGenerator.databaseCodeGenReader(importStatement.dataSource.data, name)
} else {
CodeGenerator.urlCodeGenReader(importStatement.dataSource.data, name, formats, false)
}
) {
is CodeGenerationReadResult.Success -> {
val readDfMethod = codeGenResult.getReadDfMethod(
pathRepresentation = importStatement
.dataSource
.pathRepresentation
.takeIf { importStatement.withDefaultPath },
)
val code = codeGenResult
.code
.toStandaloneSnippet(packageName, readDfMethod.additionalImports)
schemaFile.bufferedWriter().use {
it.write(code)
}
return
}
is CodeGenerationReadResult.Error -> {
// logger.warn("Error while reading types-only from data at ${importStatement.dataSource.pathRepresentation}: ${codeGenResult.reason}")
}
}
// Usually works for others
// on error, try with reading dataframe first
val parsedDf = when (val readResult = CodeGenerator.urlDfReader(importStatement.dataSource.data, formats)) {
is DfReadResult.Error -> {
logger.error(
"Error while reading dataframe from data at ${importStatement.dataSource.pathRepresentation}: ${readResult.reason}",
)
return
}
is DfReadResult.Success -> readResult
}
val readDfMethod =
parsedDf.getReadDfMethod(
importStatement.dataSource.pathRepresentation.takeIf { importStatement.withDefaultPath },
)
val codeGenerator = CodeGenerator.create(useFqNames = false)
val codeGenResult = codeGenerator.generate(
schema = parsedDf.schema,
name = name,
fields = true,
extensionProperties = false,
isOpen = true,
visibility = importStatement.visibility,
knownMarkers = emptyList(),
readDfMethod = readDfMethod,
fieldNameNormalizer = NameNormalizer.from(importStatement.normalizationDelimiters.toSet()),
)
val code = codeGenResult.toStandaloneSnippet(packageName, readDfMethod.additionalImports)
schemaFile.bufferedWriter().use {
it.write(code)
}
}
private fun generateSchemaForImport(
importStatement: ImportDataSchemaStatement,
connection: Connection,
): DataFrameSchema {
logger.info("Table name: ${importStatement.jdbcOptions.tableName}")
logger.info("SQL query: ${importStatement.jdbcOptions.sqlQuery}")
val tableName = importStatement.jdbcOptions.tableName
val sqlQuery = importStatement.jdbcOptions.sqlQuery
return when {
isTableNameNotBlankAndQueryBlank(tableName, sqlQuery) -> generateSchemaForTable(connection, tableName)
isQueryNotBlankAndTableBlank(tableName, sqlQuery) -> generateSchemaForQuery(connection, sqlQuery)
areBothNotBlank(tableName, sqlQuery) -> throwBothFieldsFilledException(tableName, sqlQuery)
else -> throwBothFieldsEmptyException(tableName, sqlQuery)
}
}
private fun isTableNameNotBlankAndQueryBlank(tableName: String, sqlQuery: String) =
tableName.isNotBlank() && sqlQuery.isBlank()
private fun isQueryNotBlankAndTableBlank(tableName: String, sqlQuery: String) =
sqlQuery.isNotBlank() && tableName.isBlank()
private fun areBothNotBlank(tableName: String, sqlQuery: String) = sqlQuery.isNotBlank() && tableName.isNotBlank()
private fun generateSchemaForTable(connection: Connection, tableName: String) =
DataFrameSchema.readSqlTable(connection, tableName)
private fun generateSchemaForQuery(connection: Connection, sqlQuery: String) =
DataFrameSchema.readSqlQuery(connection, sqlQuery)
private fun throwBothFieldsFilledException(tableName: String, sqlQuery: String): Nothing =
throw RuntimeException(
"Table name '$tableName' and SQL query '$sqlQuery' both are filled! " +
"Clear 'tableName' or 'sqlQuery' properties in jdbcOptions with value to generate schema for SQL table or result of SQL query!",
)
private fun throwBothFieldsEmptyException(tableName: String, sqlQuery: String): Nothing =
throw RuntimeException(
"Table name '$tableName' and SQL query '$sqlQuery' both are empty! " +
"Populate 'tableName' or 'sqlQuery' properties in jdbcOptions with value to generate schema for SQL table or result of SQL query!",
)
}
@@ -0,0 +1,210 @@
package org.jetbrains.dataframe.ksp
import com.google.devtools.ksp.getVisibility
import com.google.devtools.ksp.processing.CodeGenerator
import com.google.devtools.ksp.processing.Dependencies
import com.google.devtools.ksp.processing.KSPLogger
import com.google.devtools.ksp.processing.Resolver
import com.google.devtools.ksp.symbol.ClassKind
import com.google.devtools.ksp.symbol.KSAnnotated
import com.google.devtools.ksp.symbol.KSClassDeclaration
import com.google.devtools.ksp.symbol.KSClassifierReference
import com.google.devtools.ksp.symbol.KSDeclaration
import com.google.devtools.ksp.symbol.KSFile
import com.google.devtools.ksp.symbol.KSName
import com.google.devtools.ksp.symbol.KSTypeReference
import com.google.devtools.ksp.symbol.KSValueArgument
import com.google.devtools.ksp.symbol.Modifier
import com.google.devtools.ksp.symbol.Visibility
import com.google.devtools.ksp.validate
import org.jetbrains.kotlinx.dataframe.codeGen.MarkerVisibility
import java.io.IOException
import java.io.OutputStreamWriter
public class ExtensionsGenerator(
private val resolver: Resolver,
private val codeGenerator: CodeGenerator,
private val logger: KSPLogger,
) {
private companion object {
val EXPECTED_VISIBILITIES = setOf(Visibility.PUBLIC, Visibility.INTERNAL)
}
public fun resolveDataSchemaDeclarations(): Pair<Sequence<DataSchemaDeclaration>, List<KSClassDeclaration>> {
val dataSchemaAnnotation = resolver.getKSNameFromString(DataFrameNames.DATA_SCHEMA)
val symbols = resolver.getSymbolsWithAnnotation(dataSchemaAnnotation.asString())
val (validDeclarations, invalidDeclarations) = symbols
.filterIsInstance<KSClassDeclaration>()
.partition { it.validate() }
val preprocessedDeclarations = validDeclarations
.asSequence()
.mapNotNull { it.toDataSchemaDeclarationOrNull() }
return Pair(preprocessedDeclarations, invalidDeclarations)
}
public class DataSchemaDeclaration(
public val origin: KSClassDeclaration,
public val properties: List<KSAnnotatedWithType>,
)
public class KSAnnotatedWithType(
private val declaration: KSAnnotated,
public val simpleName: KSName,
public val type: KSTypeReference,
) : KSAnnotated by declaration
private fun KSClassDeclaration.toDataSchemaDeclarationOrNull(): DataSchemaDeclaration? =
when {
isClassOrInterface() && effectivelyPublicOrInternal() -> {
DataSchemaDeclaration(
origin = this,
properties = getAllProperties()
.map { KSAnnotatedWithType(it, it.simpleName, it.type) }
.toList(),
)
}
else -> null
}
private fun KSClassDeclaration.isClassOrInterface() =
classKind == ClassKind.INTERFACE || classKind == ClassKind.CLASS
private fun KSClassDeclaration.effectivelyPublicOrInternal(): Boolean =
effectivelyPublicOrInternalOrNull(dataSchema = this) != null
private fun KSDeclaration.effectivelyPublicOrInternalOrNull(dataSchema: KSClassDeclaration): Visibility? {
val visibility = getVisibility()
if (visibility !in EXPECTED_VISIBILITIES) {
val message = buildString {
append(
"DataSchema declaration ${dataSchema.nameString} at ${dataSchema.location} should be $EXPECTED_VISIBILITIES",
)
if (this@effectivelyPublicOrInternalOrNull != dataSchema) {
append(", but it's parent $nameString is $visibility")
} else {
append("but is $visibility")
}
}
logger.error(message)
return null
}
return when (val parentDeclaration = parentDeclaration) {
null -> visibility
else -> when (parentDeclaration.effectivelyPublicOrInternalOrNull(dataSchema)) {
Visibility.PUBLIC -> visibility
Visibility.INTERNAL -> Visibility.INTERNAL
null -> null
else -> null
}
}
}
private val KSDeclaration.nameString get() = (qualifiedName ?: simpleName).asString()
public fun generateExtensions(file: KSFile, dataSchema: KSClassDeclaration, properties: List<KSAnnotatedWithType>) {
val packageName = file.packageName.asString()
val fileName = getFileName(dataSchema)
val generatedFile = codeGenerator.createNewFile(Dependencies(false, file), packageName, fileName)
try {
generatedFile.writer().use {
it.appendLine("""@file:Suppress("UNCHECKED_CAST", "USELESS_CAST")""")
if (packageName.isNotEmpty()) {
it.appendLine("package $packageName")
}
it.writeImports()
val extensions = renderExtensions(
declaration = dataSchema,
interfaceName = dataSchema.getQualifiedNameOrThrow(),
visibility = getMarkerVisibility(dataSchema),
properties = properties.map { property ->
Property(getColumnName(property), property.simpleName.asString(), property.type)
},
)
it.appendLine(extensions)
}
} catch (e: IOException) {
throw IOException("Error writing $fileName generated from declaration at ${file.location}", e)
}
}
private fun OutputStreamWriter.writeImports() {
appendLine("import org.jetbrains.kotlinx.dataframe.annotations.*")
appendLine("import org.jetbrains.kotlinx.dataframe.ColumnsScope")
appendLine("import org.jetbrains.kotlinx.dataframe.DataColumn")
appendLine("import org.jetbrains.kotlinx.dataframe.DataFrame")
appendLine("import org.jetbrains.kotlinx.dataframe.DataRow")
appendLine("import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup")
appendLine()
}
private fun getFileName(dataSchema: KSClassDeclaration, suffix: String = "Extensions") =
if (dataSchema.isTopLevel) {
val simpleName = dataSchema.simpleName.asString()
"$simpleName${'$'}$suffix"
} else {
val fqName = dataSchema.getQualifiedNameOrThrow()
"${fqName}${'$'}$suffix"
}
private val KSDeclaration.isTopLevel get() = parentDeclaration == null
private fun getMarkerVisibility(dataSchema: KSClassDeclaration) =
when (val visibility = dataSchema.getVisibility()) {
Visibility.PUBLIC ->
if (dataSchema.modifiers.contains(Modifier.PUBLIC)) {
MarkerVisibility.EXPLICIT_PUBLIC
} else {
MarkerVisibility.IMPLICIT_PUBLIC
}
Visibility.INTERNAL ->
MarkerVisibility.INTERNAL
Visibility.PRIVATE, Visibility.PROTECTED, Visibility.LOCAL, Visibility.JAVA_PACKAGE ->
error("DataSchema declaration should have $EXPECTED_VISIBILITIES, but was $visibility")
}
private fun getColumnName(property: KSAnnotatedWithType): String {
val columnNameAnnotation = property.annotations.firstOrNull { annotation ->
val annotationType = annotation.annotationType
val typeIsColumnNameOrNull = (annotationType.element as? KSClassifierReference)
?.referencedName()
?.let { it == DataFrameNames.SHORT_COLUMN_NAME } != false
val declarationIsColumnName = annotationType
.resolve()
.declaration
.qualifiedName
?.asString() == DataFrameNames.COLUMN_NAME
typeIsColumnNameOrNull && declarationIsColumnName
}
return if (columnNameAnnotation != null) {
when (val arg = columnNameAnnotation.arguments.singleOrNull()) {
null -> argumentMismatchError(property, columnNameAnnotation.arguments)
else -> (arg.value as? String) ?: typeMismatchError(property, arg)
}
} else {
property.simpleName.asString()
}
}
private fun typeMismatchError(property: KSAnnotatedWithType, arg: KSValueArgument): Nothing {
error(
"Expected one argument of type String in annotation ColumnName on property ${property.simpleName}, but got ${arg.value}",
)
}
private fun argumentMismatchError(property: KSAnnotatedWithType, args: List<KSValueArgument>): Nothing {
error(
"Expected one argument of type String in annotation ColumnName on property ${property.simpleName}, but got $args",
)
}
}
@@ -0,0 +1,20 @@
package org.jetbrains.dataframe.ksp
import com.google.devtools.ksp.symbol.KSDeclaration
import org.jetbrains.kotlinx.dataframe.impl.codeGen.quoteIfNeeded
public fun KSDeclaration.getQualifiedNameOrThrow(): String =
qualifiedName
?.let {
buildString {
val qualifier = it.getQualifier()
if (qualifier.isNotEmpty()) {
for (it in qualifier.split('.')) {
append(it.quoteIfNeeded() + '.')
}
}
append(it.getShortName().quoteIfNeeded())
}
}
?: error("@DataSchema declaration ${simpleName.asString()} at $location must have qualified name")
@@ -0,0 +1,129 @@
package org.jetbrains.dataframe.ksp
import com.google.devtools.ksp.KspExperimental
import com.google.devtools.ksp.innerArguments
import com.google.devtools.ksp.isAnnotationPresent
import com.google.devtools.ksp.symbol.KSClassDeclaration
import com.google.devtools.ksp.symbol.KSType
import com.google.devtools.ksp.symbol.KSTypeArgument
import com.google.devtools.ksp.symbol.KSTypeParameter
import com.google.devtools.ksp.symbol.KSTypeReference
import com.google.devtools.ksp.symbol.Variance
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.codeGen.AbstractMarker
import org.jetbrains.kotlinx.dataframe.codeGen.BaseField
import org.jetbrains.kotlinx.dataframe.codeGen.ExtensionsCodeGenerator
import org.jetbrains.kotlinx.dataframe.codeGen.FieldType
import org.jetbrains.kotlinx.dataframe.codeGen.MarkerVisibility
import org.jetbrains.kotlinx.dataframe.codeGen.ValidFieldName
@OptIn(KspExperimental::class)
internal fun renderExtensions(
declaration: KSClassDeclaration,
interfaceName: String,
visibility: MarkerVisibility,
properties: List<Property>,
): String {
val generator = ExtensionsCodeGenerator.create()
val typeArguments = declaration.typeParameters.map {
it.name.asString()
}
val typeParameters = declaration.typeParameters.map {
buildString {
append(it.name.asString())
val bounds = it.bounds.toList()
if (bounds.isNotEmpty()) {
append(" : ")
append(bounds.joinToString(",") { it.resolve().render() })
}
}
}
return generator.generate(object : AbstractMarker(typeParameters, typeArguments) {
override val name: String = interfaceName
override val fields: List<BaseField> = properties.map {
val type = it.propertyType.resolve()
val qualifiedTypeReference = getQualifiedTypeReference(type)
val fieldType = when {
(qualifiedTypeReference == "kotlin.collections.List" && type.singleTypeArgumentIsDataSchema()) ||
qualifiedTypeReference == DataFrameNames.DATA_FRAME ->
FieldType.FrameFieldType(
markerName = type.renderTypeArguments(),
nullable = type.isMarkedNullable,
renderAsList = true,
)
type.declaration.isAnnotationPresent(DataSchema::class) ->
FieldType.GroupFieldType(markerName = type.render(), renderAsObject = true)
qualifiedTypeReference == DataFrameNames.DATA_ROW ->
FieldType.GroupFieldType(
markerName = type.renderTypeArguments(),
renderAsObject = false,
)
else -> FieldType.ValueFieldType(type.render())
}
BaseFieldImpl(
fieldName = ValidFieldName.of(it.fieldName),
columnName = it.columnName,
fieldType = fieldType,
)
}
override val visibility: MarkerVisibility = visibility
}).declarations
}
private fun getQualifiedTypeReference(type: KSType) =
when (val declaration = type.declaration) {
is KSTypeParameter -> declaration.name.getShortName()
else -> declaration.getQualifiedNameOrThrow()
}
@OptIn(KspExperimental::class)
private fun KSType.singleTypeArgumentIsDataSchema() =
innerArguments.singleOrNull()?.type?.resolve()?.declaration?.isAnnotationPresent(DataSchema::class) ?: false
private fun KSType.render(): String {
val fqTypeReference = getQualifiedTypeReference(this)
return buildString {
append(fqTypeReference)
if (innerArguments.isNotEmpty()) {
append("<")
append(renderTypeArguments())
append(">")
}
if (isMarkedNullable) {
append("?")
}
}
}
private fun KSType.renderTypeArguments(): String = innerArguments.joinToString(", ") { render(it) }
private fun render(typeArgument: KSTypeArgument): String =
when (val variance = typeArgument.variance) {
Variance.STAR -> variance.label
Variance.INVARIANT, Variance.COVARIANT, Variance.CONTRAVARIANT ->
buildString {
append(variance.label)
if (variance.label.isNotEmpty()) {
append(" ")
}
append(
typeArgument.type?.resolve()?.render()
?: error("typeArgument.type should only be null for Variance.STAR"),
)
}
}
internal class Property(val columnName: String, val fieldName: String, val propertyType: KSTypeReference)
internal class BaseFieldImpl(
override val fieldName: ValidFieldName,
override val columnName: String,
override val fieldType: FieldType,
) : BaseField
@@ -0,0 +1 @@
org.jetbrains.dataframe.ksp.DataFrameSymbolProcessorProvider
@@ -0,0 +1,279 @@
package org.jetbrains.dataframe.ksp
import com.tschuchort.compiletesting.SourceFile
import io.kotest.assertions.asClue
import io.kotest.inspectors.forAtLeastOne
import io.kotest.matchers.shouldBe
import io.kotest.matchers.string.shouldContain
import org.jetbrains.dataframe.ksp.runner.KotlinCompileTestingCompilationResult
import org.jetbrains.dataframe.ksp.runner.KspCompilationTestRunner
import org.jetbrains.dataframe.ksp.runner.TestCompilationParameters
import org.junit.AfterClass
import org.junit.Before
import org.junit.BeforeClass
import java.sql.Connection
import java.sql.DriverManager
import java.sql.SQLException
import kotlin.test.Test
const val CONNECTION_URL = "jdbc:h2:mem:test;DB_CLOSE_DELAY=-1;MODE=MySQL;DATABASE_TO_UPPER=false"
@Suppress("unused")
class DataFrameJdbcSymbolProcessorTest {
companion object {
private lateinit var connection: Connection
val imports =
"""
import org.jetbrains.kotlinx.dataframe.annotations.*
import org.jetbrains.kotlinx.dataframe.columns.*
import org.jetbrains.kotlinx.dataframe.*
""".trimIndent()
const val GENERATED_FILE = $$"HelloJdbc$Extensions.kt"
@JvmStatic
@BeforeClass
fun setupDB() {
connection = DriverManager.getConnection(CONNECTION_URL)
createTestDatabase(connection)
}
@JvmStatic
@AfterClass
fun close() {
try {
connection.close()
} catch (e: SQLException) {
e.printStackTrace()
}
}
private fun createTestDatabase(connection: Connection) {
// Crate table Customer
connection.createStatement().execute(
"""
CREATE TABLE Customer (
id INT PRIMARY KEY,
name VARCHAR(50),
age INT
)
""".trimIndent(),
)
// Create table Sale
connection.createStatement().execute(
"""
CREATE TABLE Sale (
id INT PRIMARY KEY,
customerId INT,
amount DECIMAL(10, 2)
)
""".trimIndent(),
)
// add data to the Customer table
connection.createStatement().execute("INSERT INTO Customer (id, name, age) VALUES (1, 'John', 40)")
connection.createStatement().execute("INSERT INTO Customer (id, name, age) VALUES (2, 'Alice', 25)")
connection.createStatement().execute("INSERT INTO Customer (id, name, age) VALUES (3, 'Bob', 47)")
// add data to the Sale table
connection.createStatement().execute("INSERT INTO Sale (id, customerId, amount) VALUES (1, 1, 100.50)")
connection.createStatement().execute("INSERT INTO Sale (id, customerId, amount) VALUES (2, 2, 50.00)")
connection.createStatement().execute("INSERT INTO Sale (id, customerId, amount) VALUES (3, 1, 75.25)")
connection.createStatement().execute("INSERT INTO Sale (id, customerId, amount) VALUES (4, 3, 35.15)")
}
}
@Before
fun setup() {
KspCompilationTestRunner.compilationDir.deleteRecursively()
}
@Test
fun `failed compilation on wrong `() {
val result = KspCompilationTestRunner.compile(
TestCompilationParameters(
sources = listOf(
SourceFile.kotlin(
"MySources.kt",
"""
@file:ImportDataSchema(name = "Customer", path = "123")
package test
import org.jetbrains.kotlinx.dataframe.annotations.ImportDataSchema
import org.jetbrains.kotlinx.dataframe.annotations.JdbcOptions
import org.jetbrains.kotlinx.dataframe.api.filter
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.cast
import java.sql.Connection
import java.sql.DriverManager
import java.sql.SQLException
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig
""".trimIndent(),
),
),
),
)
result.successfulCompilation shouldBe false
}
@Test
fun `schema is imported`() {
val result = KspCompilationTestRunner.compile(
TestCompilationParameters(
sources = listOf(
SourceFile.kotlin(
"MySources.kt",
"""
@file:ImportDataSchema(
"Customer",
"$CONNECTION_URL",
jdbcOptions = JdbcOptions("", "", tableName = "Customer")
)
package test
import org.jetbrains.kotlinx.dataframe.annotations.ImportDataSchema
import org.jetbrains.kotlinx.dataframe.annotations.JdbcOptions
import org.jetbrains.kotlinx.dataframe.api.filter
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.cast
import java.sql.Connection
import java.sql.DriverManager
import java.sql.SQLException
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig
""".trimIndent(),
),
),
),
)
println(result.kspGeneratedFiles)
result.inspectLines("Customer.Generated.kt") {
it.forAtLeastOne { it shouldContain "val name: String" }
}
}
/**
* Test code is copied from h2Test `read from table` test.
*/
@Test
fun `schema extracted via readFromDB method is resolved`() {
val result = KspCompilationTestRunner.compile(
TestCompilationParameters(
sources = listOf(
SourceFile.kotlin(
"MySources.kt",
"""
@file:ImportDataSchema(
"Customer",
"$CONNECTION_URL",
jdbcOptions = JdbcOptions("", "", tableName = "Customer")
)
package test
import org.jetbrains.kotlinx.dataframe.annotations.ImportDataSchema
import org.jetbrains.kotlinx.dataframe.annotations.JdbcOptions
import org.jetbrains.kotlinx.dataframe.api.filter
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.cast
import java.sql.Connection
import java.sql.DriverManager
import java.sql.SQLException
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig
fun main() {
val tableName = "Customer"
DriverManager.getConnection("$CONNECTION_URL").use { connection ->
val df = DataFrame.readSqlTable(connection, tableName).cast<Customer>()
df.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }
val df1 = DataFrame.readSqlTable(connection, tableName, 1).cast<Customer>()
df1.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }
val dbConfig = DbConnectionConfig(url = "$CONNECTION_URL")
val df2 = DataFrame.readSqlTable(dbConfig, tableName).cast<Customer>()
df2.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }
val df3 = DataFrame.readSqlTable(dbConfig, tableName, 1).cast<Customer>()
df3.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }
}
}
""".trimIndent(),
),
),
),
)
result.successfulCompilation shouldBe true
}
/**
* Test code is copied from test above.
*/
@Test
fun `schema extracted via readFromDB method is resolved with db credentials from env variables`() {
val result = KspCompilationTestRunner.compile(
TestCompilationParameters(
sources = listOf(
SourceFile.kotlin(
"MySources.kt",
"""
@file:ImportDataSchema(
"Customer",
"$CONNECTION_URL",
jdbcOptions = JdbcOptions("", "", extractCredFromEnv = true, tableName = "Customer")
)
package test
import org.jetbrains.kotlinx.dataframe.annotations.ImportDataSchema
import org.jetbrains.kotlinx.dataframe.annotations.JdbcOptions
import org.jetbrains.kotlinx.dataframe.api.filter
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.cast
import java.sql.Connection
import java.sql.DriverManager
import java.sql.SQLException
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig
fun main() {
val tableName = "Customer"
DriverManager.getConnection("$CONNECTION_URL").use { connection ->
val df = DataFrame.readSqlTable(connection, tableName).cast<Customer>()
df.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }
val df1 = DataFrame.readSqlTable(connection, tableName, 1).cast<Customer>()
df1.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }
val dbConfig = DbConnectionConfig(url = "$CONNECTION_URL")
val df2 = DataFrame.readSqlTable(dbConfig, tableName).cast<Customer>()
df2.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }
val df3 = DataFrame.readSqlTable(dbConfig, tableName, 1).cast<Customer>()
df3.filter { it[Customer::age] != null && it[Customer::age]!! > 30 }
}
}
""".trimIndent(),
),
),
),
)
result.successfulCompilation shouldBe true
}
private fun KotlinCompileTestingCompilationResult.inspectLines(f: (List<String>) -> Unit) {
inspectLines(GENERATED_FILE, f)
}
private fun KotlinCompileTestingCompilationResult.inspectLines(filename: String, f: (List<String>) -> Unit) {
kspGeneratedFiles.single { it.name == filename }.readLines().asClue(f)
}
}
@@ -0,0 +1,26 @@
package org.jetbrains.dataframe.ksp
import io.ktor.server.application.call
import io.ktor.server.engine.embeddedServer
import io.ktor.server.netty.Netty
import io.ktor.server.response.respondFile
import io.ktor.server.routing.get
import io.ktor.server.routing.routing
import java.io.File
fun useHostedFile(file: File, f: (url: String) -> Unit) {
// duplicated in gradle/EmbeddedServerRunners.kt
val port = 14771
val server = embeddedServer(Netty, port = port) {
routing {
get("/") {
call.respondFile(file)
}
}
}.start()
try {
f("http://0.0.0.0:$port/")
} finally {
server.stop(500, 1000)
}
}
@@ -0,0 +1,134 @@
@file:OptIn(ExperimentalCompilerApi::class)
package org.jetbrains.dataframe.ksp.runner
import com.tschuchort.compiletesting.KotlinCompilation
import com.tschuchort.compiletesting.SourceFile
import org.jetbrains.kotlin.compiler.plugin.ExperimentalCompilerApi
import org.jetbrains.kotlin.config.JvmTarget
import java.io.File
import java.io.OutputStream
import java.net.URLClassLoader
internal object KotlinCompilationUtil {
fun prepareCompilation(
sources: List<SourceFile>,
outputStream: OutputStream,
tempDir: File,
classpaths: List<File> = emptyList(),
): KotlinCompilation {
val compilation = KotlinCompilation()
compilation.workingDir = tempDir
val srcRoot = compilation.workingDir.resolve("ksp/srcInput")
val javaSrcRoot = srcRoot.resolve("java")
val kotlinSrcRoot = srcRoot.resolve("kotlin")
compilation.sources = sources
// workaround for https://github.com/tschuchortdev/kotlin-compile-testing/issues/105
compilation.kotlincArguments += "-Xjava-source-roots=${javaSrcRoot.absolutePath}"
compilation.jvmDefault = "all"
compilation.jvmTarget = JvmTarget.JVM_1_8.description
compilation.inheritClassPath = false
compilation.verbose = false
compilation.classpaths = Classpaths.inheritedClasspath + classpaths
compilation.messageOutputStream = outputStream
compilation.kotlinStdLibJar = Classpaths.kotlinStdLibJar
compilation.kotlinStdLibCommonJar = Classpaths.kotlinStdLibCommonJar
compilation.kotlinStdLibJdkJar = Classpaths.kotlinStdLibJdkJar
compilation.kotlinReflectJar = Classpaths.kotlinReflectJar
compilation.kotlinScriptRuntimeJar = Classpaths.kotlinScriptRuntimeJar
return compilation
}
/**
* Helper object to persist common classpaths resolved by KCT to make sure it does not
* re-resolve host classpath repeatedly and also runs compilation with a smaller classpath.
* see: https://github.com/tschuchortdev/kotlin-compile-testing/issues/113
*/
private object Classpaths {
val inheritedClasspath: List<File>
/**
* These jars are files that Kotlin Compile Testing discovers from classpath. It uses a
* rather expensive way of discovering these so we cache them here for now.
*
* We can remove this cache once we update to a version that includes the fix in KCT:
* https://github.com/tschuchortdev/kotlin-compile-testing/pull/114
*/
val kotlinStdLibJar: File?
val kotlinStdLibCommonJar: File?
val kotlinStdLibJdkJar: File?
val kotlinReflectJar: File?
val kotlinScriptRuntimeJar: File?
init {
// create a KotlinCompilation to resolve common jars
val compilation = KotlinCompilation()
kotlinStdLibJar = compilation.kotlinStdLibJar
kotlinStdLibCommonJar = compilation.kotlinStdLibCommonJar
kotlinStdLibJdkJar = compilation.kotlinStdLibJdkJar
kotlinReflectJar = compilation.kotlinReflectJar
kotlinScriptRuntimeJar = compilation.kotlinScriptRuntimeJar
inheritedClasspath = getClasspathFromClassloader(
KotlinCompilationUtil::class.java.classLoader,
)
}
}
// ported from https://github.com/google/compile-testing/blob/master/src/main/java/com
// /google/testing/compile/Compiler.java#L231
private fun getClasspathFromClassloader(referenceClassLoader: ClassLoader): List<File> {
val platformClassLoader: ClassLoader = ClassLoader.getPlatformClassLoader()
var currentClassloader = referenceClassLoader
val systemClassLoader = ClassLoader.getSystemClassLoader()
// Concatenate search paths from all classloaders in the hierarchy
// 'till the system classloader.
val classpaths: MutableSet<String> = LinkedHashSet()
while (true) {
if (currentClassloader === systemClassLoader) {
classpaths.addAll(getSystemClasspaths())
break
}
if (currentClassloader === platformClassLoader) {
break
}
check(currentClassloader is URLClassLoader) {
"""
Classpath for compilation could not be extracted
since $currentClassloader is not an instance of URLClassloader
""".trimIndent()
}
// We only know how to extract classpaths from URLClassloaders.
currentClassloader.urLs.forEach { url ->
check(url.protocol == "file") {
"""
Given classloader consists of classpaths which are unsupported for
compilation.
""".trimIndent()
}
classpaths.add(url.path)
}
currentClassloader = currentClassloader.parent
}
return classpaths.map { File(it) }.filter { it.exists() }
}
}
/**
* Returns the list of File's in the system classpath
*
* @see getSystemClasspaths
*/
fun getSystemClasspathFiles(): Set<File> = getSystemClasspaths().map { File(it) }.toSet()
/**
* Returns the file paths from the system class loader
*
* @see getSystemClasspathFiles
*/
fun getSystemClasspaths(): Set<String> {
val pathSeparator = System.getProperty("path.separator")!!
return System.getProperty("java.class.path")!!.split(pathSeparator).toSet()
}
@@ -0,0 +1,117 @@
@file:OptIn(ExperimentalCompilerApi::class)
package org.jetbrains.dataframe.ksp.runner
import com.tschuchort.compiletesting.JvmCompilationResult
import com.tschuchort.compiletesting.KotlinCompilation
import com.tschuchort.compiletesting.SourceFile
import com.tschuchort.compiletesting.kspProcessorOptions
import com.tschuchort.compiletesting.kspSourcesDir
import com.tschuchort.compiletesting.symbolProcessorProviders
import com.tschuchort.compiletesting.useKsp2
import org.jetbrains.dataframe.ksp.DataFrameSymbolProcessorProvider
import org.jetbrains.kotlin.compiler.plugin.ExperimentalCompilerApi
import java.io.ByteArrayOutputStream
import java.io.File
import java.nio.file.Paths
@Suppress("unused")
internal class KotlinCompileTestingCompilationResult(
val delegate: JvmCompilationResult,
val successfulCompilation: Boolean,
val kspGeneratedFiles: List<File>,
val outputSourceDirs: List<File>,
private val rawOutput: String,
)
internal data class TestCompilationParameters(
val sources: List<SourceFile> = emptyList(),
val classpath: List<File> = emptyList(),
val options: Map<String, String> = emptyMap(),
)
internal object KspCompilationTestRunner {
val compilationDir: File = Paths.get("build/test-compile").toAbsolutePath().toFile()
fun compile(params: TestCompilationParameters): KotlinCompileTestingCompilationResult {
// looks like this requires a kotlin source file
// see: https://github.com/tschuchortdev/kotlin-compile-testing/issues/57
val sources = params.sources + SourceFile.kotlin("placeholder.kt", "")
val combinedOutputStream = ByteArrayOutputStream()
val kspCompilation = KotlinCompilationUtil.prepareCompilation(
sources = sources,
outputStream = combinedOutputStream,
classpaths = params.classpath,
tempDir = compilationDir,
)
kspCompilation.kspProcessorOptions.putAll(params.options)
// We don't support KSP2, but because we target Kotlin 2.2 the tests only work if I set them up like this.
kspCompilation.useKsp2()
kspCompilation.kspProcessorOptions["ksp.useKSP2"] = "false"
kspCompilation.kspProcessorOptions["useKSP2"] = "false"
kspCompilation.symbolProcessorProviders = mutableListOf(DataFrameSymbolProcessorProvider())
kspCompilation.compile().also {
println(it.messages)
if (it.exitCode == KotlinCompilation.ExitCode.COMPILATION_ERROR) {
return KotlinCompileTestingCompilationResult(
delegate = it,
successfulCompilation = false,
kspGeneratedFiles = emptyList(),
outputSourceDirs = emptyList(),
rawOutput = combinedOutputStream.toString(Charsets.UTF_8),
)
}
}
// ignore KSP result for now because KSP stops compilation, which might create false
// negatives when java code accesses kotlin code.
// TODO: fix once https://github.com/tschuchortdev/kotlin-compile-testing/issues/72 is
// fixed
// after ksp, compile without ksp with KSP's output as input
val finalCompilation = KotlinCompilationUtil.prepareCompilation(
sources = sources,
outputStream = combinedOutputStream,
classpaths = params.classpath,
tempDir = compilationDir,
)
// build source files from generated code
finalCompilation.sources += kspCompilation.kspJavaSourceDir.collectSourceFiles() +
kspCompilation.kspKotlinSourceDir.collectSourceFiles()
val result = finalCompilation.compile()
println(result.messages)
return KotlinCompileTestingCompilationResult(
delegate = result,
successfulCompilation = result.exitCode == KotlinCompilation.ExitCode.OK,
outputSourceDirs = listOf(
kspCompilation.kspJavaSourceDir,
kspCompilation.kspKotlinSourceDir,
),
kspGeneratedFiles = kspCompilation.kspJavaSourceDir.collectFiles() +
kspCompilation.kspKotlinSourceDir.collectFiles(),
rawOutput = combinedOutputStream.toString(Charsets.UTF_8),
)
}
// TODO get rid of these once kotlin compile testing supports two step compilation for KSP.
// https://github.com/tschuchortdev/kotlin-compile-testing/issues/72
private val KotlinCompilation.kspJavaSourceDir: File
get() = kspSourcesDir.resolve("java")
private val KotlinCompilation.kspKotlinSourceDir: File
get() = kspSourcesDir.resolve("kotlin")
private fun File.collectSourceFiles(): List<SourceFile> =
walkTopDown()
.filter { it.isFile }
.map { file -> SourceFile.fromPath(file) }
.toList()
private fun File.collectFiles(): List<File> =
walkTopDown()
.filter { it.isFile }
.toList()
}