init research

This commit is contained in:
2026-02-08 11:20:43 -10:00
commit bdf064f54d
3041 changed files with 1592200 additions and 0 deletions
+16
View File
@@ -0,0 +1,16 @@
## :dataframe-jupyter
This module, published as `dataframe-jupyter`, contains all logic and tests for DataFrame to be able to work
in [Jupyter notebooks](https://kotlin.github.io/dataframe/gettingstartedjupyternotebook.html)
and the [Kotlin Notebook IntelliJ Plugin](https://kotlin.github.io/dataframe/usage-with-kotlin-notebook-plugin.html).
The main integration point is at [Integration.kt](src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/Integration.kt).
This is what will be called when people write `%use dataframe` thanks to our
[Kotlin Notebook library descriptor](https://github.com/Kotlin/kotlin-jupyter-libraries/blob/master/dataframe.json).
This module is a friend module of [`:core`](../core) to be able to access internal APIs.
See [Get started with Kotlin DataFrame on Jupyter Notebook](https://kotlin.github.io/dataframe/gettingstartedjupyternotebook.html),
and [Usage with Kotlin Notebook Plugin](https://kotlin.github.io/dataframe/usage-with-kotlin-notebook-plugin.html).
This module targets java 11 because of the restriction from `org.jetbrains.kotlin.jupyter`.
+5
View File
@@ -0,0 +1,5 @@
public final class org/jetbrains/kotlinx/dataframe/jupyter/IntegrationKt {
public static final fun useSchemas (Lorg/jetbrains/kotlinx/jupyter/api/KotlinKernelHost;Ljava/lang/Iterable;)V
public static final fun useSchemas (Lorg/jetbrains/kotlinx/jupyter/api/KotlinKernelHost;[Lkotlin/reflect/KClass;)V
}
+71
View File
@@ -0,0 +1,71 @@
import org.jetbrains.kotlin.gradle.tasks.KotlinCompile
plugins {
with(convention.plugins) {
alias(kotlinJvm11)
}
with(libs.plugins) {
alias(publisher)
alias(jupyter.api)
alias(binary.compatibility.validator)
}
}
group = "org.jetbrains.kotlinx"
repositories {
// geo repository should come before Maven Central
maven(url = "https://repo.osgeo.org/repository/release")
maven(url = "https://packages.jetbrains.team/maven/p/kds/kotlin-ds-maven")
mavenCentral()
mavenLocal()
}
fun ExternalModuleDependency.excludeDataFrame() = exclude("org.jetbrains.kotlinx", "dataframe")
dependencies {
api(projects.dataframe)
// logger, need it for apache poi
implementation(libs.log4j.core)
implementation(libs.log4j.api)
testImplementation(libs.junit)
testImplementation(libs.serialization.json)
// experimental
testImplementation(projects.dataframeOpenapiGenerator)
testImplementation(projects.dataframeOpenapi)
testImplementation(projects.dataframeJupyter)
testImplementation(projects.dataframeGeoJupyter)
testImplementation(libs.kandy) { excludeDataFrame() }
testImplementation(libs.kandy.geo) { excludeDataFrame() }
testImplementation(libs.kandy.stats) { excludeDataFrame() }
testImplementation(libs.kotestAssertions) {
exclude("org.jetbrains.kotlin", "kotlin-stdlib-jdk8")
}
}
tasks.withType<KotlinCompile> {
friendPaths.from(project(projects.core.path).projectDir)
}
tasks.processJupyterApiResources {
libraryProducers = listOf("org.jetbrains.kotlinx.dataframe.jupyter.Integration")
}
tasks.test {
maxHeapSize = "2048m"
}
kotlinPublications {
publication {
publicationName = "dataframeJupyter"
artifactId = project.name
description = "Kotlin DataFrame integration with Kotlin Jupyter"
packageName = artifactId
}
}
@@ -0,0 +1,391 @@
package org.jetbrains.kotlinx.dataframe.jupyter
import org.jetbrains.kotlinx.dataframe.AnyCol
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.AnyRow
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.api.Convert
import org.jetbrains.kotlinx.dataframe.api.FormatClause
import org.jetbrains.kotlinx.dataframe.api.FormattedFrame
import org.jetbrains.kotlinx.dataframe.api.Gather
import org.jetbrains.kotlinx.dataframe.api.GroupBy
import org.jetbrains.kotlinx.dataframe.api.GroupClause
import org.jetbrains.kotlinx.dataframe.api.InsertClause
import org.jetbrains.kotlinx.dataframe.api.Merge
import org.jetbrains.kotlinx.dataframe.api.MoveClause
import org.jetbrains.kotlinx.dataframe.api.Pivot
import org.jetbrains.kotlinx.dataframe.api.PivotGroupBy
import org.jetbrains.kotlinx.dataframe.api.ReducedGroupBy
import org.jetbrains.kotlinx.dataframe.api.ReducedPivot
import org.jetbrains.kotlinx.dataframe.api.ReducedPivotGroupBy
import org.jetbrains.kotlinx.dataframe.api.RenameClause
import org.jetbrains.kotlinx.dataframe.api.ReplaceClause
import org.jetbrains.kotlinx.dataframe.api.Split
import org.jetbrains.kotlinx.dataframe.api.SplitWithTransform
import org.jetbrains.kotlinx.dataframe.api.Update
import org.jetbrains.kotlinx.dataframe.api.asDataFrame
import org.jetbrains.kotlinx.dataframe.api.columnsCount
import org.jetbrains.kotlinx.dataframe.api.isColumnGroup
import org.jetbrains.kotlinx.dataframe.codeGen.CodeGenerator
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
import org.jetbrains.kotlinx.dataframe.dataTypes.IFRAME
import org.jetbrains.kotlinx.dataframe.dataTypes.IMG
import org.jetbrains.kotlinx.dataframe.impl.codeGen.CodeGenerationReadResult
import org.jetbrains.kotlinx.dataframe.impl.codeGen.urlCodeGenReader
import org.jetbrains.kotlinx.dataframe.io.DataFrameHtmlData
import org.jetbrains.kotlinx.dataframe.io.DisplayConfiguration
import org.jetbrains.kotlinx.dataframe.io.SupportedCodeGenerationFormat
import org.jetbrains.kotlinx.jupyter.api.FieldHandler
import org.jetbrains.kotlinx.jupyter.api.HTML
import org.jetbrains.kotlinx.jupyter.api.JupyterClientType
import org.jetbrains.kotlinx.jupyter.api.KotlinKernelHost
import org.jetbrains.kotlinx.jupyter.api.Notebook
import org.jetbrains.kotlinx.jupyter.api.VariableName
import org.jetbrains.kotlinx.jupyter.api.declare
import org.jetbrains.kotlinx.jupyter.api.libraries.ColorScheme
import org.jetbrains.kotlinx.jupyter.api.libraries.FieldHandlerFactory
import org.jetbrains.kotlinx.jupyter.api.libraries.JupyterIntegration
import org.jetbrains.kotlinx.jupyter.api.libraries.resources
import kotlin.reflect.KClass
import kotlin.reflect.KProperty
import kotlin.reflect.KType
import kotlin.reflect.full.isSubtypeOf
/** Users will get an error if their Kotlin Jupyter kernel is older than this version. */
private const val MIN_KERNEL_VERSION = "0.15.0.606"
internal val newDataSchemas = mutableListOf<KClass<*>>()
internal class Integration(private val notebook: Notebook, private val options: MutableMap<String, String?>) :
JupyterIntegration() {
val version = options["v"]
// TODO temporary settings while these experimental modules are being developed
private val enableExperimentalCsv = options["enableExperimentalCsv"]
private val enableExperimentalOpenApi = options["enableExperimentalOpenApi"]
private val enableExperimentalGeo = options["enableExperimentalGeo"]
private fun KotlinKernelHost.updateImportDataSchemaVariable(
importDataSchema: ImportDataSchema,
property: KProperty<*>,
): VariableName? {
val formats = supportedFormats.filterIsInstance<SupportedCodeGenerationFormat>()
val name = property.name + "DataSchema"
return when (
val codeGenResult = CodeGenerator.urlCodeGenReader(importDataSchema.url, name, formats, true)
) {
is CodeGenerationReadResult.Success -> {
val readDfMethod = codeGenResult.getReadDfMethod(importDataSchema.url.toExternalForm())
val code = readDfMethod.additionalImports.joinToString("\n") +
"\n" +
codeGenResult.code
execute(code)
execute("""DISPLAY("Data schema successfully imported as ${property.name}: $name")""")
name
}
is CodeGenerationReadResult.Error -> {
execute(
"""DISPLAY("Failed to read data schema from ${importDataSchema.url}: ${codeGenResult.reason}")""",
)
null
}
}
}
private fun KotlinKernelHost.updateAnyFrameVariable(
df: AnyFrame,
property: KProperty<*>,
codeGen: ReplCodeGenerator,
): VariableName? =
execute(
codeWithTypeCastGenerator = codeGen.process(df, property),
property = property,
type = DataFrame::class.createStarProjectedType(false),
)
private fun KotlinKernelHost.updateAnyRowVariable(
row: AnyRow,
property: KProperty<*>,
codeGen: ReplCodeGenerator,
): VariableName? =
execute(
codeWithTypeCastGenerator = codeGen.process(row, property),
property = property,
type = DataRow::class.createStarProjectedType(false),
)
private fun KotlinKernelHost.updateColumnGroupVariable(
col: ColumnGroup<*>,
property: KProperty<*>,
codeGen: ReplCodeGenerator,
): VariableName? =
execute(
codeWithTypeCastGenerator = codeGen.process(col.asDataFrame(), property),
property = property,
type = ColumnGroup::class.createStarProjectedType(false),
)
private fun KotlinKernelHost.updateAnyColVariable(
col: AnyCol,
property: KProperty<*>,
codeGen: ReplCodeGenerator,
): VariableName? =
if (col.isColumnGroup()) {
val codeWithDfCaster = codeGen.process(col.asDataFrame(), property)
val codeWithColumnGroupCaster = codeWithDfCaster.copy {
codeWithDfCaster.typeCastGenerator("$it.asColumnGroup()")
}
execute(
codeWithTypeCastGenerator = codeWithColumnGroupCaster,
property = property,
type = DataColumn::class.createStarProjectedType(false),
)
} else {
null
}
private fun KotlinKernelHost.updateGroupByVariable(
instance: GroupBy<*, *>,
property: KProperty<*>,
codeGen: ReplCodeGenerator,
): VariableName? =
execute(
codeWithTypeCastGenerator = codeGen.process(instance),
property = property,
type = GroupBy::class.createStarProjectedType(false),
)
override fun Builder.onLoaded() {
if (version != null) {
if (enableExperimentalCsv?.toBoolean() == true) {
println("CSV module is already enabled by default now.")
}
if (enableExperimentalGeo?.toBoolean() == true) {
println("dataframe-geo module was extracted into separate descriptor: %use dataframe-geo")
}
if (enableExperimentalOpenApi?.toBoolean() == true) {
println("Enabling experimental OpenAPI 3.0.0 module: dataframe-openapi")
dependencies(
"org.jetbrains.kotlinx:dataframe-openapi:$version",
"org.jetbrains.kotlinx:dataframe-openapi-generator:$version",
)
}
}
try {
setMinimalKernelVersion(MIN_KERNEL_VERSION)
} catch (_: NoSuchMethodError) {
// will be thrown when a version < 0.11.0.198
throw IllegalStateException(
getKernelUpdateMessage(notebook.kernelVersion, MIN_KERNEL_VERSION, notebook.jupyterClientType),
)
}
val codeGen = ReplCodeGenerator.create()
val config = JupyterConfiguration(enableExperimentalOpenApi = enableExperimentalOpenApi?.toBoolean() == true)
if (notebook.jupyterClientType == JupyterClientType.KOTLIN_NOTEBOOK) {
config.display.isolatedOutputs = true
}
onLoaded {
declare("dataFrameConfig" to config)
}
resources {
if (!config.display.isolatedOutputs) {
js("DataFrame") {
if (config.display.localTesting()) {
classPath("init.js")
} else {
// Update this commit when new version of init.js is pushed
val initJsSha = "3db46ccccaa1291c0627307d64133317f545e6ae"
url("https://cdn.jsdelivr.net/gh/Kotlin/dataframe@$initJsSha/core/src/main/resources/init.js")
}
}
css("DataFrameTable") { classPath("table.css") }
}
}
with(JupyterHtmlRenderer(config.display, this)) {
render<DisableRowsLimitWrapper>(
{ "DataRow: index = ${it.value.rowsCount()}, columnsCount = ${it.value.columnsCount()}" },
applyRowsLimit = false,
)
render<GroupClause<*, *>>({ "Group" })
render<MoveClause<*, *>>({ "Move" })
render<RenameClause<*, *>>({ "Rename" })
render<ReplaceClause<*, *>>({ "Replace" })
render<InsertClause<*>>({ "Insert" })
render<FormatClause<*, *>>({ "Format" })
render<DataFrameHtmlData> {
// Our integration declares script and css definition. But in Kotlin Notebook outputs are isolated in IFrames
// That's why we include them directly in the output
if (notebook.jupyterClientType == JupyterClientType.KOTLIN_NOTEBOOK) {
it.withTableDefinitions().toJupyterHtmlData().toIFrame(notebook.currentColorScheme)
} else {
it.toJupyterHtmlData().toSimpleHtml(notebook.currentColorScheme)
}
}
render<AnyRow>(
{ "DataRow: index = ${it.index()}, columnsCount = ${it.columnsCount()}" },
)
render<ColumnGroup<*>>(
{
"""ColumnGroup: name = "${it.name()}", rowsCount = ${it.rowsCount()}, columnsCount = ${it.columnsCount()}"""
},
)
render<AnyCol>(
{ """DataColumn: name = "${it.name()}", type = ${renderType(it.type())}, size = ${it.size()}""" },
)
render<AnyFrame>(
{ "DataFrame: rowsCount = ${it.rowsCount()}, columnsCount = ${it.columnsCount()}" },
)
render<FormattedFrame<*>>(
{ "DataFrame: rowsCount = ${it.df().rowsCount()}, columnsCount = ${it.df().columnsCount()}" },
modifyConfig = { getDisplayConfiguration(it) },
)
render<GroupBy<*, *>>({ "GroupBy" })
render<ReducedGroupBy<*, *>>({ "ReducedGroupBy" })
render<Pivot<*>>({ "Pivot" })
render<ReducedPivot<*>>({ "ReducedPivot" })
render<PivotGroupBy<*>>({ "PivotGroupBy" })
render<ReducedPivotGroupBy<*>>({ "ReducedPivotGroupBy" })
render<SplitWithTransform<*, *, *>>({ "Split" })
render<Split<*, *>>({ "Split" })
render<Merge<*, *, *>>({ "Merge" })
render<Gather<*, *, *, *>>({ "Gather" })
render<IMG> { HTML(it.toString()) }
render<IFRAME> { HTML(it.toString()) }
render<Update<*, *>>({ "Update" })
render<Convert<*, *>>({ "Convert" })
}
import("org.jetbrains.kotlinx.dataframe.api.*")
import("org.jetbrains.kotlinx.dataframe.*")
import("org.jetbrains.kotlinx.dataframe.annotations.*")
import("org.jetbrains.kotlinx.dataframe.io.*")
import("org.jetbrains.kotlinx.dataframe.columns.*")
import("org.jetbrains.kotlinx.dataframe.jupyter.ImportDataSchema")
import("org.jetbrains.kotlinx.dataframe.jupyter.importDataSchema")
import("org.jetbrains.kotlinx.dataframe.jupyter.KotlinNotebookPluginUtils")
import("java.net.URL")
import("java.net.URI")
import("java.io.File")
import("kotlinx.datetime.Instant")
import("kotlinx.datetime.LocalDateTime")
import("kotlinx.datetime.LocalDate")
import("org.jetbrains.kotlinx.dataframe.dataTypes.*")
import("org.jetbrains.kotlinx.dataframe.impl.codeGen.urlCodeGenReader")
addTypeConverter(object : FieldHandler {
override val execution = FieldHandlerFactory.createUpdateExecution<Any> { instance, property ->
// TODO check property type first, then instance, Issue #1245
when (instance) {
is AnyCol -> updateAnyColVariable(instance, property, codeGen)
is ColumnGroup<*> -> updateColumnGroupVariable(instance, property, codeGen)
is AnyRow -> updateAnyRowVariable(instance, property, codeGen)
is AnyFrame -> updateAnyFrameVariable(instance, property, codeGen)
is ImportDataSchema -> updateImportDataSchemaVariable(instance, property)
is GroupBy<*, *> -> updateGroupByVariable(instance, property, codeGen)
else -> error("${instance::class} should not be handled by Dataframe field handler")
}
}
override fun accepts(value: Any?, property: KProperty<*>): Boolean =
value is AnyCol ||
value is ColumnGroup<*> ||
value is AnyRow ||
value is AnyFrame ||
value is ImportDataSchema ||
value is GroupBy<*, *>
})
fun KotlinKernelHost.addDataSchemas(classes: List<KClass<*>>) {
val code = classes
.joinToString("\n") { codeGen.process(it) }
.trim()
if (code.isNotEmpty()) {
execute(code)
}
}
onClassAnnotation<DataSchema> { addDataSchemas(it) }
beforeCellExecution {
if (newDataSchemas.isNotEmpty()) {
addDataSchemas(newDataSchemas)
newDataSchemas.clear()
}
}
val internalTypes = listOf(
ColumnReference::class,
).map { it.createStarProjectedType(true) }
markVariableInternal { property ->
// TODO: add more conditions to include all generated properties and other internal stuff
// that should not be shown to user in Jupyter variables view
internalTypes.any { property.returnType.isSubtypeOf(it) }
}
onColorSchemeChange {
config.display.useDarkColorScheme = (it == ColorScheme.DARK)
}
}
}
public fun KotlinKernelHost.useSchemas(schemaClasses: Iterable<KClass<*>>) {
newDataSchemas.addAll(schemaClasses)
}
public fun KotlinKernelHost.useSchemas(vararg schemaClasses: KClass<*>): Unit = useSchemas(schemaClasses.asIterable())
public inline fun <reified T> KotlinKernelHost.useSchema(): Unit = useSchemas(T::class)
// region friend module error suppression
@Suppress("INVISIBLE_REFERENCE")
private interface ReplCodeGenerator : org.jetbrains.kotlinx.dataframe.impl.codeGen.ReplCodeGenerator {
companion object {
fun create(): ReplCodeGenerator =
object :
ReplCodeGenerator,
org.jetbrains.kotlinx.dataframe.impl.codeGen.ReplCodeGenerator by
org.jetbrains.kotlinx.dataframe.impl.codeGen.ReplCodeGeneratorImpl() {}
}
}
@Suppress("INVISIBLE_REFERENCE")
private val supportedFormats
get() = org.jetbrains.kotlinx.dataframe.io.supportedFormats
@Suppress("INVISIBLE_REFERENCE")
private fun KClass<*>.createStarProjectedType(nullable: Boolean) =
org.jetbrains.kotlinx.dataframe.impl.createStarProjectedType(this, nullable)
@Suppress("INVISIBLE_REFERENCE")
private fun renderType(type: KType?) = org.jetbrains.kotlinx.dataframe.impl.renderType(type)
@Suppress("INVISIBLE_REFERENCE")
private fun <T> FormattedFrame<T>.df() = df
@Suppress("INVISIBLE_REFERENCE")
private fun DisplayConfiguration.localTesting() = localTesting
// endregion
@@ -0,0 +1,39 @@
package org.jetbrains.kotlinx.dataframe.jupyter
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.io.DisplayConfiguration
import org.jetbrains.kotlinx.dataframe.io.RendererDecimalFormat
import org.jetbrains.kotlinx.jupyter.api.MimeTypedResult
import org.jetbrains.kotlinx.jupyter.api.Notebook
import org.jetbrains.kotlinx.jupyter.api.Renderable
import org.jetbrains.kotlinx.jupyter.api.libraries.ExecutionHost
internal class JupyterCellRenderer(private val notebook: Notebook, private val host: ExecutionHost) :
ChainedCellRenderer(DefaultCellRenderer) {
override fun maybeContent(value: Any?, configuration: DisplayConfiguration): RenderedContent? {
val renderersProcessor = notebook.renderersProcessor
if (internallyRenderable(value)) return null
val renderedVal = renderersProcessor.renderValue(host, value)
val finalVal = if (renderedVal is Renderable) renderedVal.render(notebook) else renderedVal
if (finalVal is MimeTypedResult && "text/html" in finalVal) {
return RenderedContent.media(finalVal["text/html"] ?: "")
}
return renderValueForHtml(finalVal, configuration.cellContentLimit, configuration.decimalFormat)
}
override fun maybeTooltip(value: Any?, configuration: DisplayConfiguration): String? = null
}
internal fun internallyRenderable(value: Any?): Boolean =
when (value) {
is AnyFrame, is Double, is List<*>, null, "" -> true
else -> false
}
// region friend module error suppression
@Suppress("INVISIBLE_REFERENCE")
private fun renderValueForHtml(value: Any?, truncate: Int, format: RendererDecimalFormat) =
org.jetbrains.kotlinx.dataframe.io.renderValueForHtml(value, truncate, format)
// endregion
@@ -0,0 +1,156 @@
package org.jetbrains.kotlinx.dataframe.jupyter
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.api.FormattedFrame
import org.jetbrains.kotlinx.dataframe.api.allNulls
import org.jetbrains.kotlinx.dataframe.api.colsOf
import org.jetbrains.kotlinx.dataframe.api.getColumns
import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions
import org.jetbrains.kotlinx.dataframe.io.CustomEncoder
import org.jetbrains.kotlinx.dataframe.io.DataFrameHtmlData
import org.jetbrains.kotlinx.dataframe.io.DisplayConfiguration
import org.jetbrains.kotlinx.dataframe.io.toHtml
import org.jetbrains.kotlinx.dataframe.io.toJsonWithMetadata
import org.jetbrains.kotlinx.dataframe.io.toStaticHtml
import org.jetbrains.kotlinx.dataframe.jupyter.KotlinNotebookPluginUtils.convertToDataFrame
import org.jetbrains.kotlinx.jupyter.api.HtmlData
import org.jetbrains.kotlinx.jupyter.api.JupyterClientType
import org.jetbrains.kotlinx.jupyter.api.KotlinKernelVersion
import org.jetbrains.kotlinx.jupyter.api.MimeTypedResult
import org.jetbrains.kotlinx.jupyter.api.Notebook
import org.jetbrains.kotlinx.jupyter.api.libraries.JupyterIntegration
import org.jetbrains.kotlinx.jupyter.api.mimeResult
import org.jetbrains.kotlinx.jupyter.api.outputs.isIsolatedHtml
import org.jetbrains.kotlinx.jupyter.api.renderHtmlAsIFrameIfNeeded
/** Starting from this version, dataframe integration will respond with additional data for rendering in Kotlin Notebooks plugin. */
private const val MIN_KERNEL_VERSION_FOR_NEW_TABLES_UI = "0.11.0.311"
private const val MIN_IDE_VERSION_SUPPORT_JSON_WITH_METADATA = 241
private const val MIN_IDE_VERSION_SUPPORT_IMAGE_VIEWER = 242
private const val MIN_IDE_VERSION_SUPPORT_DATAFRAME_CONVERTABLE = 243
internal class JupyterHtmlRenderer(val display: DisplayConfiguration, val builder: JupyterIntegration.Builder)
internal inline fun <reified T : Any> JupyterHtmlRenderer.render(
noinline getFooter: (T) -> String,
crossinline modifyConfig: T.(DisplayConfiguration) -> DisplayConfiguration = { it },
applyRowsLimit: Boolean = true,
) = builder.renderWithHost<T> { host, value ->
val addHtml = (value as? DisableRowsLimitWrapper)?.addHtml ?: true
val contextRenderer = JupyterCellRenderer(this.notebook, host)
val reifiedDisplayConfiguration = value.modifyConfig(display)
val footer = getFooter(value)
val df = convertToDataFrame(value)
val isFormatted = reifiedDisplayConfiguration.cellFormatter != null ||
df.hasFormattedColumns()
val limit = if (applyRowsLimit) {
reifiedDisplayConfiguration.rowsLimit ?: df.rowsCount()
} else {
df.rowsCount()
}
val html by lazy {
DataFrameHtmlData
.tableDefinitions(
includeJs = reifiedDisplayConfiguration.isolatedOutputs,
includeCss = true,
).plus(
df.toHtml(
// is added later to make sure it's put outside of potential iFrames
configuration = reifiedDisplayConfiguration.copy(enableFallbackStaticTables = false),
cellRenderer = contextRenderer,
) { footer },
).toJupyterHtmlData()
}
// Generates a static version of the table which can be displayed in GitHub previews etc.
val staticHtml by lazy { df.toStaticHtml(reifiedDisplayConfiguration, DefaultCellRenderer).toJupyterHtmlData() }
if (notebook.kernelVersion >= KotlinKernelVersion.from(MIN_KERNEL_VERSION_FOR_NEW_TABLES_UI)!!) {
val ideBuildNumber = KotlinNotebookPluginUtils.getKotlinNotebookIDEBuildNumber()
// TODO Do we need to handle the improved meta data here as well?
val jsonEncodedDf = when {
!ideBuildNumber.supportsDynamicNestedTables() ->
encodeFrameNoDynamicNestedTables(df = df, limit = limit, isFormatted = isFormatted).toString()
else -> {
val encoders = buildList {
if (ideBuildNumber.supportsDataFrameConvertableValues()) {
add(DataframeConvertableEncoder(this))
}
if (ideBuildNumber.supportsImageViewer()) {
add(BufferedImageEncoder(Base64ImageEncodingOptions()))
}
}
df.toJsonWithMetadata(
rowLimit = limit,
nestedRowLimit = reifiedDisplayConfiguration.rowsLimit,
customEncoders = encoders,
isFormatted = isFormatted,
)
}
}
if (!addHtml) {
mimeResult(
"application/kotlindataframe+json" to jsonEncodedDf,
)
} else {
notebook.renderAsIFrameAsNeeded(data = html, staticData = staticHtml, jsonEncodedDf = jsonEncodedDf)
}
} else {
notebook.renderHtmlAsIFrameIfNeeded(data = html)
}
}
internal fun AnyFrame.hasFormattedColumns() =
this.getColumns { colsAtAnyDepth().colsOf<FormattedFrame<*>?> { !it.allNulls() } }.isNotEmpty()
private fun KotlinNotebookPluginUtils.IdeBuildNumber?.supportsDynamicNestedTables() =
this != null && majorVersion >= MIN_IDE_VERSION_SUPPORT_JSON_WITH_METADATA
private fun KotlinNotebookPluginUtils.IdeBuildNumber?.supportsImageViewer() =
this != null && majorVersion >= MIN_IDE_VERSION_SUPPORT_IMAGE_VIEWER
private fun KotlinNotebookPluginUtils.IdeBuildNumber?.supportsDataFrameConvertableValues() =
this != null && majorVersion >= MIN_IDE_VERSION_SUPPORT_DATAFRAME_CONVERTABLE
internal fun Notebook.renderAsIFrameAsNeeded(
data: HtmlData,
staticData: HtmlData,
jsonEncodedDf: String,
): MimeTypedResult {
val textHtml = if (jupyterClientType == JupyterClientType.KOTLIN_NOTEBOOK) {
data.generateIframePlaneText(currentColorScheme) +
staticData.toString(currentColorScheme)
} else {
(data + staticData).toString(currentColorScheme)
}
return mimeResult(
"text/html" to textHtml,
"application/kotlindataframe+json" to jsonEncodedDf,
).also { it.isIsolatedHtml = false }
}
internal fun DataFrameHtmlData.toJupyterHtmlData() = HtmlData(style, body, script)
// region friend module error suppression
@Suppress("INVISIBLE_REFERENCE")
private fun encodeFrameNoDynamicNestedTables(df: AnyFrame, limit: Int, isFormatted: Boolean) =
org.jetbrains.kotlinx.dataframe.impl.io.encodeFrameNoDynamicNestedTables(df, limit, isFormatted)
@Suppress("INVISIBLE_REFERENCE", "ktlint:standard:function-naming")
private fun DataframeConvertableEncoder(encoders: List<CustomEncoder>, rowLimit: Int? = null) =
org.jetbrains.kotlinx.dataframe.impl.io.DataframeConvertableEncoder(encoders, rowLimit)
@Suppress("INVISIBLE_REFERENCE", "ktlint:standard:function-naming")
private fun BufferedImageEncoder(options: Base64ImageEncodingOptions) =
org.jetbrains.kotlinx.dataframe.impl.io.BufferedImageEncoder(options)
// endregion
@@ -0,0 +1,33 @@
package org.jetbrains.kotlinx.dataframe.jupyter
import org.jetbrains.kotlinx.dataframe.codeGen.CodeWithTypeCastGenerator
import org.jetbrains.kotlinx.jupyter.api.KotlinKernelHost
import org.jetbrains.kotlinx.jupyter.api.VariableName
import kotlin.reflect.KProperty
import kotlin.reflect.KType
internal fun KotlinKernelHost.execute(
codeWithTypeCastGenerator: CodeWithTypeCastGenerator,
expression: String,
): VariableName? {
val code = codeWithTypeCastGenerator.declarationsWithCastExpression(expression)
return if (code.isNotBlank()) {
val result = execute(code)
if (codeWithTypeCastGenerator.hasCaster) {
result.name
} else {
null
}
} else {
null
}
}
internal fun KotlinKernelHost.execute(
codeWithTypeCastGenerator: CodeWithTypeCastGenerator,
property: KProperty<*>,
type: KType,
): VariableName? {
val variableName = "(${property.name}${if (property.returnType.isMarkedNullable) "!!" else ""} as $type)"
return execute(codeWithTypeCastGenerator, variableName)
}
@@ -0,0 +1,27 @@
package org.jetbrains.kotlinx.dataframe.jupyter
import org.jetbrains.kotlinx.jupyter.api.JupyterClientType
import org.jetbrains.kotlinx.jupyter.api.JupyterClientType.DATALORE
import org.jetbrains.kotlinx.jupyter.api.JupyterClientType.KOTLIN_NOTEBOOK
import org.jetbrains.kotlinx.jupyter.api.KotlinKernelVersion
private const val UPDATING_DATALORE_URL = "https://github.com/Kotlin/kotlin-jupyter/tree/master#datalore"
private const val UPDATING_KOTLIN_NOTEBOOK_URL = "https://github.com/Kotlin/kotlin-jupyter#kotlin-notebook"
private const val UPDATING = "https://github.com/Kotlin/kotlin-jupyter/tree/master#updating"
internal fun getKernelUpdateMessage(
kernelVersion: KotlinKernelVersion,
minKernelVersion: String,
clientType: JupyterClientType,
): String =
buildString {
append("Your Kotlin Jupyter kernel version appears to be out of date (version $kernelVersion). ")
appendLine("Please update it to version $minKernelVersion or newer to be able to use DataFrame.")
append("Follow the instructions at: ")
when (clientType) {
DATALORE -> appendLine(UPDATING_DATALORE_URL)
KOTLIN_NOTEBOOK -> appendLine(UPDATING_KOTLIN_NOTEBOOK_URL)
else -> appendLine(UPDATING)
}
}
@@ -0,0 +1,142 @@
package org.jetbrains.kotlinx.dataframe.jupyter
import org.intellij.lang.annotations.Language
import org.jetbrains.kotlinx.jupyter.api.Code
import org.junit.Test
class CodeGenerationTests : DataFrameJupyterTest() {
private fun Code.checkCompilation() {
lines().forEach {
execRendered(it)
}
}
@Test
fun `Type erased dataframe`() {
@Language("kts")
val a = """
fun create(): Any? = dataFrameOf("a")(1)
val df = create()
df.a
""".checkCompilation()
}
@Test
fun `nullable dataframe`() {
@Language("kts")
val a = """
fun create(): AnyFrame? = dataFrameOf("a")(1)
val df = create()
df.a
""".checkCompilation()
}
@Test
fun `nullable columnGroup`() {
@Language("kts")
val a = """
fun create(): AnyCol? = dataFrameOf("a")(1).asColumnGroup().asDataColumn()
val col = create()
col.a
""".checkCompilation()
}
@Test
fun `nullable dataRow`() {
@Language("kts")
val a = """
fun create(): AnyRow? = dataFrameOf("a")(1).single()
val row = create()
row.a
""".checkCompilation()
}
@Test
fun `interface without body compiled correctly`() {
@Language("kts")
val a = """
val a = dataFrameOf("a")(1, 2, 3)
val b = dataFrameOf("b")(1, 2, 3)
val ab = dataFrameOf("a", "b")(1, 2)
ab.a
""".checkCompilation()
}
@Test
fun `nested schema with isOpen = false is ignored in marker generation`() {
@Language("kts")
val a = """
val df = dataFrameOf("col" to listOf("a"), "leaf" to listOf(dataFrameOf("a", "b")(1, 2).first()))
val df1 = df.convert { leaf }.asFrame { it.add("c") { 3 } }
df1.leaf.c
""".checkCompilation()
}
// Issue #1222
@Test
fun `do not reuse marker with non-matching sub-schema`() {
@Language("kts")
val a = """
val df1 = dataFrameOf("group" to columnOf("a" to columnOf(1, null, 3)))
val df2 = dataFrameOf("group" to columnOf("a" to columnOf(1, 2, 3)))
df1.group.a
df2.group.a
""".checkCompilation()
@Language("kts")
val b = """
val df1 = dataFrameOf("group" to columnOf("a" to columnOf(1, 2, 3)))
val df2 = dataFrameOf("group" to columnOf("a" to columnOf(1, null, 3)))
df1.group.a
df2.group.a
""".checkCompilation()
}
// Issue #1221, #663
@Test
fun `GroupBy code generation`() {
@Language("kts")
val a = """
val ab = dataFrameOf("a", "b")(1, 2)
ab.groupBy { a }.aggregate { sum { b } into "bSum" }
""".checkCompilation()
@Language("kts")
val b = """
val ab = dataFrameOf("a", "b")(1, 2)
val grouped = ab.groupBy { a }
grouped.aggregate { sum { b } into "bSum" }
""".checkCompilation()
@Language("kts")
val c = """
val grouped = dataFrameOf("a", "b")(1, 2).groupBy("a")
grouped.aggregate { sum { b } into "bSum" }
""".checkCompilation()
@Language("kts")
val d = """
val grouped = dataFrameOf("a", "b")(1, 2).groupBy("a")
grouped.keys.a
""".checkCompilation()
@Language("kts")
val e = """
val grouped = dataFrameOf("a", "b")(1, 2).groupBy { "a"<Int>() named "k" }
grouped.keys.k
""".checkCompilation()
@Language("kts")
val f = """
val groupBy = dataFrameOf("a")("1", "11", "2", "22").groupBy { expr { "a"<String>().length } named "k" }
groupBy.keys.k
""".checkCompilation()
@Language("kts")
val g = """
val groupBy = dataFrameOf("a")("1", "11", "2", "22").groupBy { expr { "a"<String>().length } named "k" }.add("newCol") { 42 }
groupBy.aggregate { newCol into "newCol" }
""".checkCompilation()
}
}
@@ -0,0 +1,58 @@
package org.jetbrains.kotlinx.dataframe.jupyter
import org.jetbrains.jupyter.parser.notebook.Cell
import org.jetbrains.kotlinx.jupyter.testkit.JupyterReplTestCase
import org.jetbrains.kotlinx.jupyter.testkit.ReplProvider
abstract class DataFrameJupyterTest :
JupyterReplTestCase(
ReplProvider.forLibrariesTesting(
libraries = setOf("dataframe", "kandy-geo", "kandy"),
extraCompilerArguments = listOf(
"-Xopt-in=kotlin.uuid.ExperimentalUuidApi",
),
),
)
fun interface CodeReplacer {
fun replace(code: String): String
companion object {
val DEFAULT = CodeReplacer { it }
fun byMap(replacements: Map<String, String>) =
CodeReplacer { code ->
replacements.entries.fold(code) { acc, (key, replacement) ->
acc.replace(key, replacement)
}
}
fun byMap(vararg replacements: Pair<String, String>): CodeReplacer = byMap(mapOf(*replacements))
}
}
fun interface CellClause {
fun isAccepted(cell: Cell): Boolean
companion object {
val IS_CODE = CellClause { it.type == Cell.Type.CODE }
}
}
infix fun CellClause.and(other: CellClause): CellClause =
CellClause { cell ->
// Prevent lazy evaluation
val acceptedThis = this.isAccepted(cell)
val acceptedOther = other.isAccepted(cell)
acceptedThis && acceptedOther
}
fun CellClause.Companion.stopAfter(breakClause: CellClause) =
object : CellClause {
var clauseTriggered: Boolean = false
override fun isAccepted(cell: Cell): Boolean {
clauseTriggered = clauseTriggered || breakClause.isAccepted(cell)
return !clauseTriggered
}
}
@@ -0,0 +1,410 @@
package org.jetbrains.kotlinx.dataframe.jupyter
import io.kotest.assertions.throwables.shouldNotThrowAny
import io.kotest.matchers.should
import io.kotest.matchers.shouldBe
import io.kotest.matchers.types.shouldBeInstanceOf
import org.intellij.lang.annotations.Language
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.api.isNotEmpty
import org.jetbrains.kotlinx.dataframe.columns.ValueColumn
import org.jetbrains.kotlinx.dataframe.type
import org.jetbrains.kotlinx.jupyter.api.MimeTypedResult
import org.jetbrains.kotlinx.jupyter.testkit.JupyterReplTestCase
import org.junit.Test
import kotlin.reflect.typeOf
class JupyterCodegenTests : JupyterReplTestCase() {
@Test
fun `codegen adding column with generic type function`() {
@Language("kts")
val res1 = execRendered(
"""
fun <T> AnyFrame.addValue(value: T) = add("value") { listOf(value) }
val df = dataFrameOf("a")(1).addValue(2)
""".trimIndent(),
)
res1 shouldBe Unit
@Language("kts")
val res2 = execRaw("df") as AnyFrame
res2["value"].type shouldBe typeOf<List<Any?>>()
}
@Test
fun `opt in experimental Instant`() {
@Language("kts")
val res1 = execRaw(
"""
@file:OptIn(kotlin.time.ExperimentalTime::class)
val values: kotlin.time.Instant = kotlin.time.Clock.System.now()
val df = dataFrameOf("a" to columnOf(values))
""".trimIndent(),
)
@Language("kts")
val res2 = execRaw(
"""
@file:OptIn(kotlin.time.ExperimentalTime::class)
df.a
""".trimIndent(),
)
res2.shouldBeInstanceOf<DataColumn<*>>()
res2.type() shouldBe typeOf<kotlin.time.Instant>()
}
@Test
fun `opt in experimental Uuid`() {
@Language("kts")
val res1 = execRaw(
"""
@file:OptIn(kotlin.uuid.ExperimentalUuidApi::class)
val uuid: kotlin.uuid.Uuid = kotlin.uuid.Uuid.NIL
val df = dataFrameOf("a" to columnOf(uuid))
df
""".trimIndent(),
)
@Language("kts")
val res2 = execRaw(
"""
@file:OptIn(kotlin.uuid.ExperimentalUuidApi::class)
df.a
""".trimIndent(),
)
res2.shouldBeInstanceOf<DataColumn<*>>()
res2.type() shouldBe typeOf<kotlin.uuid.Uuid>()
}
@Test
fun `Don't inherit from data class`() {
@Language("kts")
val res1 = execRendered(
"""
@DataSchema
data class A(val a: Int)
""".trimIndent(),
)
@Language("kts")
val res2 = execRaw(
"""
val df = dataFrameOf("a", "b")(1, 2)
df
""".trimIndent(),
)
(res2 as AnyFrame).should { it.isNotEmpty() }
}
@Test
fun `Don't inherit from non open class`() {
@Language("kts")
val res1 = execRendered(
"""
@DataSchema
class A(val a: Int)
""".trimIndent(),
)
@Language("kts")
val res2 = execRaw(
"""
val df = dataFrameOf("a", "b")(1, 2)
df
""".trimIndent(),
)
(res2 as AnyFrame).should { it.isNotEmpty() }
}
@Test
fun `Don't inherit from open class`() {
@Language("kts")
val res1 = execRendered(
"""
@DataSchema
open class A(val a: Int)
""".trimIndent(),
)
@Language("kts")
val res2 = execRaw(
"""
val df = dataFrameOf("a", "b")(1, 2)
df
""".trimIndent(),
)
(res2 as AnyFrame).should { it.isNotEmpty() }
}
@Test
fun `Do inherit from open interface`() {
@Language("kts")
val res1 = execRendered(
"""
@DataSchema
interface A { val a: Int }
""".trimIndent(),
)
@Language("kts")
val res2 = execRaw(
"""
val df = dataFrameOf("a", "b")(1, 2)
df
""".trimIndent(),
)
(res2 as AnyFrame).should { it.isNotEmpty() }
}
@Test
fun `codegen for enumerated frames`() {
@Language("kts")
val res1 = execRendered(
"""
val names = (0..2).map { it.toString() }
val df = dataFrameOf(names)(1, 2, 3)
""".trimIndent(),
)
res1 shouldBe Unit
@Language("kts")
val res2 = execRaw("df.`1`")
res2.shouldBeInstanceOf<ValueColumn<*>>()
}
@Test
fun `codegen for complex column names`() {
@Language("kts")
val res1 = execRendered(
"""
val df = DataFrame.readDelimStr("[a], (b), {c}\n1, 2, 3")
df
""".trimIndent(),
)
res1.shouldBeInstanceOf<MimeTypedResult>()
@Language("kts")
val res2 = execRendered(
"""listOf(df.`{a}`[0], df.`(b)`[0], df.`{c}`[0])""",
)
res2 shouldBe listOf(1, 2, 3)
}
@Test
fun `codegen for '$' that is interpolator in kotlin string literals`() {
@Language("kts")
val res1 = execRendered(
"""
val df = DataFrame.readDelimStr("\${'$'}id\n1")
df
""".trimIndent(),
)
res1.shouldBeInstanceOf<MimeTypedResult>()
@Language("kts")
val res2 = execRendered(
"listOf(df.`\$id`[0])",
)
res2 shouldBe listOf(1)
}
@Test
fun `codegen for backtick that is forbidden in kotlin identifiers`() {
@Language("kts")
val res1 = execRendered(
"""
val df = DataFrame.readDelimStr("Day`s\n1")
df
""".trimIndent(),
)
res1.shouldBeInstanceOf<MimeTypedResult>()
println(res1.entries.joinToString())
@Language("kts")
val res2 = execRendered(
"listOf(df.`Day's`[0])",
)
res2 shouldBe listOf(1)
}
@Test
fun `codegen for chars that is forbidden in JVM identifiers`() {
val forbiddenChar = ";"
@Language("kts")
val res1 = execRendered(
"""
val df = DataFrame.readDelimStr("Test$forbiddenChar\n1")
df
""".trimIndent(),
)
res1.shouldBeInstanceOf<MimeTypedResult>()
println(res1.entries.joinToString())
@Language("kts")
val res2 = execRendered(
"listOf(df.`Test `[0])",
)
res2 shouldBe listOf(1)
}
@Test
fun `codegen for chars that is forbidden in JVM identifiers 1`() {
val forbiddenChar = "\\\\"
@Language("kts")
val res1 = execRendered(
"""
val df = DataFrame.readDelimStr("Test$forbiddenChar\n1")
df
""".trimIndent(),
)
res1.shouldBeInstanceOf<MimeTypedResult>()
println(res1.entries.joinToString())
@Language("kts")
val res2 = execRendered(
"listOf(df.`Test `[0])",
)
res2 shouldBe listOf(1)
}
@Test
fun `generic interface`() {
@Language("kts")
val res1 = execRendered(
"""
@DataSchema
interface Generic<T> {
val field: T
}
""".trimIndent(),
)
res1.shouldBeInstanceOf<Unit>()
@Language("kts")
val res2 = execRendered(
"""
val <T> ColumnsContainer<Generic<T>>.test1: DataColumn<T> get() = field
val <T> DataRow<Generic<T>>.test2: T get() = field
""".trimIndent(),
)
res2.shouldBeInstanceOf<Unit>()
}
@Test
fun `generic interface with upper bound`() {
@Language("kts")
val res1 = execRendered(
"""
@DataSchema
interface Generic <T : String> {
val field: T
}
""".trimIndent(),
)
res1.shouldBeInstanceOf<Unit>()
@Language("kts")
val res2 = execRendered(
"""
val <T : String> ColumnsContainer<Generic<T>>.test1: DataColumn<T> get() = field
val <T : String> DataRow<Generic<T>>.test2: T get() = field
""".trimIndent(),
)
res2.shouldBeInstanceOf<Unit>()
}
@Test
fun `generic interface with variance and user type in type parameters`() {
@Language("kts")
val res1 = execRendered(
"""
interface UpperBound
@DataSchema(isOpen = false)
interface Generic <out T : UpperBound> {
val field: T
}
""".trimIndent(),
)
res1.shouldBeInstanceOf<Unit>()
@Language("kts")
val res2 = execRendered(
"""
val <T : UpperBound> ColumnsContainer<Generic<T>>.test1: DataColumn<T> get() = field
val <T : UpperBound> DataRow<Generic<T>>.test2: T get() = field
""".trimIndent(),
)
res2.shouldBeInstanceOf<Unit>()
}
@Test
fun `type converter does not conflict with other type converters`() {
@Language("kts")
val anotherTypeConverter =
"""
notebook.fieldsHandlersProcessor.register(
FieldHandlerFactory.createUpdateHandler<ByteArray>(TypeDetection.RUNTIME) { _, prop ->
execute(prop.name + ".toList()").name
},
ProcessingPriority.LOW
)
""".trimIndent()
execEx(anotherTypeConverter)
execEx("val x = ByteArray(1)")
val res1 = execRaw("x")
res1.shouldBeInstanceOf<List<*>>()
}
@Test
fun `generate a new marker when dataframe marker is not a data schema so that columns are accessible with extensions`() {
@Language("kts")
val a = execRendered(
"""
enum class State {
Idle, Productive, Maintenance
}
class Event(val toolId: String, val state: State, val timestamp: Long)
val tool1 = "tool_1"
val tool2 = "tool_2"
val tool3 = "tool_3"
val events = listOf(
Event(tool1, State.Idle, 0),
Event(tool1, State.Productive, 5),
Event(tool2, State.Idle, 0),
Event(tool2, State.Maintenance, 10),
Event(tool2, State.Idle, 20),
Event(tool3, State.Idle, 0),
Event(tool3, State.Productive, 25),
).toDataFrame()
""".trimIndent(),
)
shouldNotThrowAny {
@Language("kts")
val b = execRendered(
"""
events.toolId
events.state
events.timestamp
""".trimIndent(),
)
}
}
}
@@ -0,0 +1,651 @@
package org.jetbrains.kotlinx.dataframe.jupyter
import io.kotest.assertions.throwables.shouldNotThrow
import io.kotest.matchers.comparables.shouldBeGreaterThan
import io.kotest.matchers.comparables.shouldBeLessThan
import io.kotest.matchers.shouldBe
import io.kotest.matchers.string.shouldContain
import io.kotest.matchers.string.shouldNotContain
import kotlinx.serialization.json.Json
import kotlinx.serialization.json.JsonArray
import kotlinx.serialization.json.JsonObject
import kotlinx.serialization.json.int
import kotlinx.serialization.json.jsonArray
import kotlinx.serialization.json.jsonObject
import kotlinx.serialization.json.jsonPrimitive
import org.intellij.lang.annotations.Language
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.api.columnOf
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import org.jetbrains.kotlinx.dataframe.api.format
import org.jetbrains.kotlinx.dataframe.api.with
import org.jetbrains.kotlinx.dataframe.jupyter.SerializationKeys.DATA
import org.jetbrains.kotlinx.dataframe.jupyter.SerializationKeys.KOTLIN_DATAFRAME
import org.jetbrains.kotlinx.dataframe.jupyter.SerializationKeys.METADATA
import org.jetbrains.kotlinx.jupyter.api.MimeTypedResult
import org.jetbrains.kotlinx.jupyter.testkit.JupyterReplTestCase
import org.junit.BeforeClass
import org.junit.Test
class RenderingTests : JupyterReplTestCase() {
@Test
fun `dataframe is rendered to html`() {
@Language("kts")
val html = execHtml(
"""
val name by column<String>()
val height by column<Int>()
val df = dataFrameOf(name, height)(
"Bill", 135,
"Charlie", 160
)
df
""".trimIndent(),
)
html shouldContain "Bill"
@Language("kts")
val useRes = execRendered(
"""
USE {
render<Int> { (it * 2).toString() }
}
""".trimIndent(),
)
useRes shouldBe Unit
val html2 = execHtml("df")
html2 shouldContain (135 * 2).toString()
html2 shouldContain (160 * 2).toString()
}
@Test
fun `rendering options`() {
@Language("kts")
val html1 = execHtml(
"""
data class Person(val age: Int, val name: String)
val df = (1..70).map { Person(it, "A".repeat(it)) }.toDataFrame()
df
""".trimIndent(),
)
html1 shouldContain "showing only top 20 of 70 rows"
@Language("kts")
val html2 = execHtml(
"""
dataFrameConfig.display.rowsLimit = 50
df
""".trimIndent(),
)
html2 shouldContain "showing only top 50 of 70 rows"
}
@Test
fun `dark color scheme`() {
fun execSimpleDf() = execHtml("""dataFrameOf("a", "b")(1, 2, 3, 4)""")
val htmlLight = execSimpleDf()
val r1 = execRendered("notebook.changeColorScheme(ColorScheme.DARK); 1")
val htmlDark = execSimpleDf()
r1 shouldBe 1
val darkClassAttribute = """theme='dark'"""
htmlLight shouldNotContain darkClassAttribute
htmlDark shouldContain darkClassAttribute
}
@Test
fun `test kotlin notebook plugin utils rows subset`() {
val json = executeScriptAndParseDataframeResult(
"""
data class Row(val id: Int)
val df = (1..100).map { Row(it) }.toDataFrame()
KotlinNotebookPluginUtils.getRowsSubsetForRendering(df, 20 , 50)
""".trimIndent(),
)
assertDataFrameDimensions(json, 30, 1)
val rows = json[KOTLIN_DATAFRAME]!!.jsonArray
rows.getObj(0)["id"]?.jsonPrimitive?.int shouldBe 21
rows.getObj(rows.lastIndex)["id"]?.jsonPrimitive?.int shouldBe 50
}
/**
* Executes the given `script` and parses the resulting DataFrame as a `JsonObject`.
*
* @param script the script to be executed
* @return the parsed DataFrame result as a `JsonObject`
*/
private fun executeScriptAndParseDataframeResult(
@Language("kts") script: String,
): JsonObject {
val result = execRendered<MimeTypedResult>(script)
return parseDataframeJson(result)
}
private fun assertDataFrameDimensions(json: JsonObject, expectedRows: Int, expectedColumns: Int) {
json[METADATA]!!.jsonObject["nrow"]!!.jsonPrimitive.int shouldBe expectedRows
json[METADATA]!!.jsonObject["ncol"]!!.jsonPrimitive.int shouldBe expectedColumns
}
private fun parseDataframeJson(result: MimeTypedResult): JsonObject =
Json.decodeFromString<JsonObject>(result["application/kotlindataframe+json"]!!)
private fun JsonArray.getObj(index: Int) = this[index].jsonObject
@Test
fun `test kotlin notebook plugin utils sort by one column asc`() {
val json = executeScriptAndParseDataframeResult(
"""
data class CustomRow(val id: Int, val category: String)
val df = (1..100).map { CustomRow(it, if (it % 2 == 0) "even" else "odd") }.toDataFrame()
KotlinNotebookPluginUtils.sortByColumns(df, listOf(listOf("id")), listOf(false))
""".trimIndent(),
)
assertDataFrameDimensions(json, 100, 2)
assertSortedById(json, false)
}
@Suppress("UNCHECKED_CAST")
private fun assertSortedById(json: JsonObject, desc: Boolean) {
val rows = json[KOTLIN_DATAFRAME]!!.jsonArray as List<JsonObject>
var previousId = if (desc) 101 else 0
rows.forEach { row: JsonObject ->
val currentId = row["id"]!!.jsonPrimitive.int
if (desc) currentId shouldBeLessThan previousId else currentId shouldBeGreaterThan previousId
previousId = currentId
}
}
@Test
fun `test kotlin notebook plugin utils sort by one column desc`() {
val json = executeScriptAndParseDataframeResult(
"""
data class CustomRow(val id: Int, val category: String)
val df = (1..100).map { CustomRow(it, if (it % 2 == 0) "even" else "odd") }.toDataFrame()
KotlinNotebookPluginUtils.sortByColumns(df, listOf(listOf("id")), listOf(true))
""".trimIndent(),
)
assertDataFrameDimensions(json, 100, 2)
assertSortedById(json, true)
}
@Suppress("UNCHECKED_CAST")
@Test
fun `test kotlin notebook plugin utils sort by multiple columns`() {
val json = executeScriptAndParseDataframeResult(
"""
data class CustomRow(val id: Int, val category: String)
val df = (1..100).map { CustomRow(it, if (it % 2 == 0) "even" else "odd") }.toDataFrame()
KotlinNotebookPluginUtils.getRowsSubsetForRendering(
KotlinNotebookPluginUtils.sortByColumns(df, listOf(listOf("category"), listOf("id")), listOf(true, false)),
0, 100
)
""".trimIndent(),
)
assertDataFrameDimensions(json, 100, 2)
val rows = json[KOTLIN_DATAFRAME]!!.jsonArray as List<JsonObject>
assertSortedByCategory(rows)
assertSortedById(rows)
}
private fun assertSortedByCategory(rows: List<JsonObject>) {
rows.forEachIndexed { i, row ->
val currentCategory = row["category"]!!.jsonPrimitive.content
if (i < 50) {
currentCategory shouldBe "odd"
} else {
currentCategory shouldBe "even"
}
}
}
private fun assertSortedById(rows: List<JsonObject>) {
var previousCategory = "odd"
var previousId = 0
for (row in rows) {
val currentCategory = row["category"]!!.jsonPrimitive.content
val currentId = row["id"]!!.jsonPrimitive.int
if (previousCategory == "odd" && currentCategory == "even") {
previousId shouldBeGreaterThan currentId
} else if (previousCategory == currentCategory) {
previousId shouldBeLessThan currentId
}
previousCategory = currentCategory
previousId = currentId
}
}
@Test
fun `json metadata contains schema metadata`() {
val json = executeScriptAndParseDataframeResult(
"""
val col1 by columnOf("a", "b", "c")
val col2 by columnOf(1, 2, 3)
val col3 by columnOf("Foo", "Bar", null)
val df2 = dataFrameOf(Pair("header", listOf("A", "B", "C")))
val col4 by columnOf(df2, df2, df2)
var df = dataFrameOf(col1, col2, col3, col4)
df.group(col1, col2).into("group")
""".trimIndent(),
)
val expectedOutput =
"""
{
"${'$'}version": "2.2.0",
"metadata": {
"columns": ["group", "col3", "col4"],
"types": [{
"kind": "ColumnGroup"
}, {
"kind": "ValueColumn",
"type": "kotlin.String?"
}, {
"kind": "FrameColumn"
}],
"nrow": 3,
"ncol": 3,
"is_formatted": false
},
"kotlin_dataframe": [{
"group": {
"data": {
"col1": "a",
"col2": 1
},
"metadata": {
"kind": "ColumnGroup",
"columns": ["col1", "col2"],
"types": [{
"kind": "ValueColumn",
"type": "kotlin.String"
}, {
"kind": "ValueColumn",
"type": "kotlin.Int"
}]
}
},
"col3": "Foo",
"col4": {
"data": [{
"header": "A"
}, {
"header": "B"
}, {
"header": "C"
}],
"metadata": {
"kind": "FrameColumn",
"columns": ["header"],
"types": [{
"kind": "ValueColumn",
"type": "kotlin.String"
}],
"ncol": 1,
"nrow": 3
}
}
}, {
"group": {
"data": {
"col1": "b",
"col2": 2
},
"metadata": {
"kind": "ColumnGroup",
"columns": ["col1", "col2"],
"types": [{
"kind": "ValueColumn",
"type": "kotlin.String"
}, {
"kind": "ValueColumn",
"type": "kotlin.Int"
}]
}
},
"col3": "Bar",
"col4": {
"data": [{
"header": "A"
}, {
"header": "B"
}, {
"header": "C"
}],
"metadata": {
"kind": "FrameColumn",
"columns": ["header"],
"types": [{
"kind": "ValueColumn",
"type": "kotlin.String"
}],
"ncol": 1,
"nrow": 3
}
}
}, {
"group": {
"data": {
"col1": "c",
"col2": 3
},
"metadata": {
"kind": "ColumnGroup",
"columns": ["col1", "col2"],
"types": [{
"kind": "ValueColumn",
"type": "kotlin.String"
}, {
"kind": "ValueColumn",
"type": "kotlin.Int"
}]
}
},
"col3": null,
"col4": {
"data": [{
"header": "A"
}, {
"header": "B"
}, {
"header": "C"
}],
"metadata": {
"kind": "FrameColumn",
"columns": ["header"],
"types": [{
"kind": "ValueColumn",
"type": "kotlin.String"
}],
"ncol": 1,
"nrow": 3
}
}
}]
}
""".trimIndent()
json shouldBe Json.parseToJsonElement(expectedOutput)
}
@Test
fun `test kotlin dataframe conversion groupby`() {
val json = executeScriptAndParseDataframeResult(
"""
data class Row(val id: Int, val group: Int)
val df = (1..20).map { Row(it, if (it <= 10) 1 else 2) }.toDataFrame()
KotlinNotebookPluginUtils.convertToDataFrame(df.groupBy("group"))
""".trimIndent(),
)
assertDataFrameDimensions(json, 2, 2)
val rows = json[KOTLIN_DATAFRAME]!!.jsonArray
rows.getObj(0)["group1"]!!
.jsonObject[DATA]!!
.jsonArray.size shouldBe 10
rows.getObj(1)["group1"]!!
.jsonObject[DATA]!!
.jsonArray.size shouldBe 10
}
// Regression KTNB-424
@Test
fun `test kotlin dataframe conversion ReducedGroupBy`() {
shouldNotThrow<Throwable> {
val json = executeScriptAndParseDataframeResult(
"""
data class Row(val id: Int, val group: Int)
val df = (1..100).map { Row(it, if (it <= 50) 1 else 2) }.toDataFrame()
KotlinNotebookPluginUtils.convertToDataFrame(df.groupBy("group").first())
""".trimIndent(),
)
assertDataFrameDimensions(json, 2, 2)
}
}
@Test
fun `test sortByColumns by int column`() {
val json = executeScriptAndParseDataframeResult(
"""
val df = dataFrameOf("nums")(5, 4, 3, 2, 1)
val res = KotlinNotebookPluginUtils.sortByColumns(df, listOf(listOf("nums")), listOf(false))
KotlinNotebookPluginUtils.convertToDataFrame(res)
""".trimIndent(),
)
val rows = json[KOTLIN_DATAFRAME]!!.jsonArray
json.extractColumn<Int>(0, "nums") shouldBe 1
json.extractColumn<Int>(rows.size - 1, "nums") shouldBe 5
}
internal inline fun <reified T> JsonObject.extractColumn(index: Int, fieldName: String): T {
val element = this[KOTLIN_DATAFRAME]!!.jsonArray[index].jsonObject[fieldName]!!.jsonPrimitive
return when (T::class) {
String::class -> element.content as T
Int::class -> element.int as T
else -> throw IllegalArgumentException("Unsupported type")
}
}
@Test
fun `test sortByColumns by multiple int columns`() {
val json = executeScriptAndParseDataframeResult(
"""
data class Row(val a: Int, val b: Int)
val df = listOf(Row(1, 1), Row(1, 2), Row(2, 3), Row(2, 4), Row(3, 5), Row(3, 6)).toDataFrame()
val res = KotlinNotebookPluginUtils.sortByColumns(df, listOf(listOf("a"), listOf("b")), listOf(true, false))
KotlinNotebookPluginUtils.convertToDataFrame(res)
""".trimIndent(),
)
json.extractColumn<Int>(0, "a") shouldBe 3
json.extractColumn<Int>(0, "b") shouldBe 5
json.extractColumn<Int>(5, "a") shouldBe 1
json.extractColumn<Int>(5, "b") shouldBe 2
}
@Test
fun `test sortByColumns by single string column`() {
val json = executeScriptAndParseDataframeResult(
"""
val df = dataFrameOf("letters")("e", "d", "c", "b", "a")
val res = KotlinNotebookPluginUtils.sortByColumns(df, listOf(listOf("letters")), listOf(true))
KotlinNotebookPluginUtils.convertToDataFrame(res)
""".trimIndent(),
)
json.extractColumn<String>(0, "letters") shouldBe "e"
json.extractColumn<String>(4, "letters") shouldBe "a"
}
@Test
fun `test sortByColumns by multiple string columns`() {
val json = executeScriptAndParseDataframeResult(
"""
data class Row(val first: String, val second: String)
val df = listOf(Row("a", "b"), Row("a", "a"), Row("b", "b"), Row("b", "a")).toDataFrame()
val res = KotlinNotebookPluginUtils.sortByColumns(df, listOf(listOf("first"), listOf("second")), listOf(false, true))
KotlinNotebookPluginUtils.convertToDataFrame(res)
""".trimIndent(),
)
json.extractColumn<String>(0, "first") shouldBe "a"
json.extractColumn<String>(0, "second") shouldBe "b"
json.extractColumn<String>(3, "first") shouldBe "b"
json.extractColumn<String>(3, "second") shouldBe "a"
}
@Test
fun `test sortByColumns by mix of int and string columns`() {
val json = executeScriptAndParseDataframeResult(
"""
data class Row(val num: Int, val letter: String)
val df = listOf(Row(1, "a"), Row(1, "b"), Row(2, "a"), Row(2, "b"), Row(3, "a")).toDataFrame()
val res = KotlinNotebookPluginUtils.sortByColumns(df, listOf(listOf("num"), listOf("letter")), listOf(true, false))
KotlinNotebookPluginUtils.convertToDataFrame(res)
""".trimIndent(),
)
json.extractColumn<Int>(0, "num") shouldBe 3
json.extractColumn<String>(0, "letter") shouldBe "a"
json.extractColumn<Int>(4, "num") shouldBe 1
json.extractColumn<String>(4, "letter") shouldBe "b"
}
@Test
fun `test sortByColumns by multiple non-comparable column`() {
val json = executeScriptAndParseDataframeResult(
"""
data class Person(val name: String, val age: Int) {
override fun toString(): String {
return age.toString()
}
}
val df = dataFrameOf("urls", "person")(
URI("https://example.com/a").toURL(), Person("Alice", 10),
URI("https://example.com/b").toURL(), Person("Bob", 11),
URI("https://example.com/a").toURL(), Person("Nick", 12),
URI("https://example.com/b").toURL(), Person("Guy", 13),
)
val res = KotlinNotebookPluginUtils.sortByColumns(df, listOf(listOf("urls"), listOf("person")), listOf(false, true))
KotlinNotebookPluginUtils.convertToDataFrame(res)
""".trimIndent(),
)
json.extractColumn<Int>(0, "person") shouldBe 12
json.extractColumn<Int>(3, "person") shouldBe 11
}
@Test
fun `test sortByColumns by mix of comparable and non-comparable columns`() {
val json = executeScriptAndParseDataframeResult(
"""
val df = dataFrameOf("urls", "id")(
URI("https://example.com/a").toURL(), 1,
URI("https://example.com/b").toURL(), 2,
URI("https://example.com/a").toURL(), 2,
URI("https://example.com/b").toURL(), 1,
)
val res = KotlinNotebookPluginUtils.sortByColumns(df, listOf(listOf("id"), listOf("urls")), listOf(true, true))
KotlinNotebookPluginUtils.convertToDataFrame(res)
""".trimIndent(),
)
json.extractColumn<String>(0, "urls") shouldBe "https://example.com/b"
json.extractColumn<Int>(0, "id") shouldBe 2
json.extractColumn<String>(3, "urls") shouldBe "https://example.com/a"
json.extractColumn<Int>(3, "id") shouldBe 1
}
@Test
fun `test sortByColumns by url column`() {
val json = executeScriptAndParseDataframeResult(
"""
val df = dataFrameOf("urls")(
URI("https://example.com/a").toURL(),
URI("https://example.com/c").toURL(),
URI("https://example.com/b").toURL(),
URI("https://example.com/d").toURL()
)
val res = KotlinNotebookPluginUtils.sortByColumns(df, listOf(listOf("urls")), listOf(false))
KotlinNotebookPluginUtils.convertToDataFrame(res)
""".trimIndent(),
)
json.extractColumn<String>(0, "urls") shouldBe "https://example.com/a"
json.extractColumn<String>(1, "urls") shouldBe "https://example.com/b"
json.extractColumn<String>(2, "urls") shouldBe "https://example.com/c"
json.extractColumn<String>(3, "urls") shouldBe "https://example.com/d"
}
@Test
fun `test sortByColumns by column group children`() {
val json = executeScriptAndParseDataframeResult(
"""
val df = dataFrameOf(
"a" to listOf(5, 4, 3, 2, 1),
"b" to listOf(1, 2, 3, 4, 5)
)
val res = KotlinNotebookPluginUtils.sortByColumns(df.group("a", "b").into("c"), listOf(listOf("c", "a")), listOf(false))
KotlinNotebookPluginUtils.convertToDataFrame(res)
""".trimIndent(),
)
fun JsonObject.extractBFields(): List<Int> {
val dataframe = this[KOTLIN_DATAFRAME]!!.jsonArray
return dataframe.map { it.jsonObject["c"]!!.jsonObject["data"]!!.jsonObject["b"]!!.jsonPrimitive.int }
}
val bFields = json.extractBFields()
bFields shouldBe listOf(5, 4, 3, 2, 1)
}
@Test
fun `test sortByColumns for column that contains string and int`() {
val json = executeScriptAndParseDataframeResult(
"""
val df = dataFrameOf("mixed")(
5,
"10",
2,
"4",
"1"
)
val res = KotlinNotebookPluginUtils.sortByColumns(df, listOf(listOf("mixed")), listOf(true))
KotlinNotebookPluginUtils.convertToDataFrame(res)
""".trimIndent(),
)
json.extractColumn<String>(0, "mixed") shouldBe "5"
json.extractColumn<String>(1, "mixed") shouldBe "4"
json.extractColumn<String>(2, "mixed") shouldBe "2"
json.extractColumn<String>(3, "mixed") shouldBe "10"
json.extractColumn<String>(4, "mixed") shouldBe "1"
}
// Issue #1546
@Test
fun `hasFormattedFrame false positive`() {
val df = dataFrameOf(
"a" to columnOf(1, 2, 3, null),
"b" to DataColumn.createByInference("", listOf(null, null, null, null)),
"c" to columnOf(7, 3, 2, 65),
)
df.hasFormattedColumns() shouldBe false
val formatted = dataFrameOf("a" to columnOf(df.format { "c"() }.with { background(black) }))
formatted.hasFormattedColumns() shouldBe true
}
companion object {
/**
* Set the system property for the IDE version needed for specific serialization testing purposes.
*/
@BeforeClass
@JvmStatic
internal fun setupOnce() {
System.setProperty("KTNB_IDE_BUILD_NUMBER", "IU;241;14015")
}
}
}
// region friend module error suppression
@Suppress("INVISIBLE_REFERENCE")
internal object SerializationKeys {
const val DATA = org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.DATA
const val METADATA = org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.METADATA
const val KOTLIN_DATAFRAME = org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KOTLIN_DATAFRAME
}
// endregion
@@ -0,0 +1,55 @@
package org.jetbrains.kotlinx.dataframe.jupyter
import org.jetbrains.kotlinx.jupyter.api.EmbeddedKernelRunMode
import org.jetbrains.kotlinx.jupyter.config.DefaultKernelLoggerFactory
import org.jetbrains.kotlinx.jupyter.config.defaultRepositoriesCoordinates
import org.jetbrains.kotlinx.jupyter.libraries.LibraryResolver
import org.jetbrains.kotlinx.jupyter.libraries.createLibraryHttpUtil
import org.jetbrains.kotlinx.jupyter.repl.ReplForJupyter
import org.jetbrains.kotlinx.jupyter.repl.creating.createRepl
import org.jetbrains.kotlinx.jupyter.repl.embedded.NoOpInMemoryReplResultsHolder
import org.jetbrains.kotlinx.jupyter.testkit.ClasspathLibraryResolver
import org.jetbrains.kotlinx.jupyter.testkit.ReplProvider
import org.jetbrains.kotlinx.jupyter.testkit.ToEmptyLibraryResolver
/** Mirrors [ReplProvider.forLibrariesTesting] but `extraCompilerArguments` to set opt-in's. */
@Suppress("unused")
fun ReplProvider.Companion.forLibrariesTesting(
libraries: Collection<String>,
extraCompilerArguments: List<String> = emptyList(),
): ReplProvider =
withDefaultClasspathResolution(
shouldResolveToEmpty = { it in libraries },
extraCompilerArguments = extraCompilerArguments,
)
private val httpUtil = createLibraryHttpUtil(DefaultKernelLoggerFactory)
fun withDefaultClasspathResolution(
shouldResolve: (String?) -> Boolean = { true },
shouldResolveToEmpty: (String?) -> Boolean = { false },
extraCompilerArguments: List<String> = emptyList(),
) = ReplProvider { classpath ->
val resolver =
run {
var res: LibraryResolver = ClasspathLibraryResolver(httpUtil.libraryDescriptorsManager, null, shouldResolve)
res = ToEmptyLibraryResolver(res, shouldResolveToEmpty)
res
}
createRepl(
httpUtil = httpUtil,
scriptClasspath = classpath,
kernelRunMode = EmbeddedKernelRunMode,
mavenRepositories = defaultRepositoriesCoordinates,
libraryResolver = resolver,
inMemoryReplResultsHolder = NoOpInMemoryReplResultsHolder,
extraCompilerArguments = extraCompilerArguments,
).apply {
initializeWithCurrentClasspath()
}
}
private fun ReplForJupyter.initializeWithCurrentClasspath() {
eval { librariesScanner.addLibrariesFromClassLoader(currentClassLoader, this, notebook) }
}
@@ -0,0 +1,17 @@
package org.jetbrains.kotlinx.dataframe.jupyter
import io.kotest.matchers.shouldNotBe
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.junit.Test
import java.io.InputStreamReader
class ResourcesTest {
@Test
fun `resources available`() {
val res = DataFrame::class.java.getResourceAsStream("/table.html")
println(InputStreamReader(res).readText())
res shouldNotBe null
}
}
@@ -0,0 +1,190 @@
package org.jetbrains.kotlinx.dataframe.jupyter
import org.jetbrains.jupyter.parser.JupyterParser
import org.jetbrains.jupyter.parser.notebook.CodeCell
import org.jetbrains.jupyter.parser.notebook.Output
import org.jetbrains.kotlinx.dataframe.BuildConfig
import org.jetbrains.kotlinx.jupyter.exceptions.causesSequence
import org.jetbrains.kotlinx.jupyter.repl.result.EvalResultEx
import org.junit.Assume
import org.junit.AssumptionViolatedException
import org.junit.Before
import org.junit.Test
import java.io.File
import java.util.Locale
/**
* Runs the example notebooks in the dev folder (`/examples/notebooks/dev`), only when
* debug mode is on:
* `kotlin.dataframe.debug=true`
*/
class SampleNotebooksTests : DataFrameJupyterTest() {
/**
* Skips the test if debug mode is off.
*/
@Suppress("KotlinConstantConditions")
@Before
fun checkDebugMode() = Assume.assumeTrue(BuildConfig.DEBUG)
@Test
fun puzzles() = exampleTest("puzzles", "40 puzzles")
@Test
fun github() =
exampleTest(
dir = "github",
cellClause = CellClause.stopAfter { cell ->
"personal access token" in cell.source
},
cleanup = {
File("jetbrains.json").delete()
},
)
@Test
fun titanic() =
exampleTest(
dir = "titanic",
notebookName = "Titanic",
replacer = CodeReplacer.byMap(
testFile("titanic", "titanic.csv"),
),
)
@Test
fun wine() =
exampleTest(
dir = "wine",
notebookName = "WineNetWIthKotlinDL",
replacer = CodeReplacer.byMap(
testFile("wine", "winequality-red.csv"),
),
)
@Test
fun netflix() {
val currentLocale = Locale.getDefault()
try {
// Set explicit locale as of test data contains locale-dependent values (date for parsing)
Locale.setDefault(Locale.forLanguageTag("en-US"))
exampleTest(
dir = "netflix",
replacer = CodeReplacer.byMap(
testFile("netflix", "country_codes.csv"),
testFile("netflix", "netflix_titles.csv"),
),
)
} finally {
Locale.setDefault(currentLocale)
}
}
@Test
fun movies() =
exampleTest(
dir = "movies",
replacer = CodeReplacer.byMap(
testFile("movies", "movies.csv"),
),
// There is no tags data in repository
cellClause = CellClause.stopAfter { cell ->
"tags.csv" in cell.source
},
)
@Test
fun top12GermanCompanies() =
exampleTest(
dir = "top_12_german_companies",
replacer = CodeReplacer.byMap(
testFile("top_12_german_companies", "top_12_german_companies.csv"),
),
)
@Test
fun json() =
exampleTest(
dir = "json",
notebookName = "KeyValueAndOpenApi",
cellClause = CellClause {
// skip OOM cells
it.metadata.tags?.contains("skiptest") != true
},
replacer = CodeReplacer.byMap(
testFile("json", "api_guru_list.json"),
testFile("json", "apiGuruMetrics.json"),
testFile("json", "ApiGuruOpenApi.yaml"),
),
)
@Test
fun quickstart() =
exampleTest(
dir = "quickstart",
)
private fun doTest(
notebookPath: String,
replacer: CodeReplacer,
cellClause: CellClause,
cleanup: () -> Unit = {},
) {
val notebookFile = File(notebookPath)
val notebook = JupyterParser.parse(notebookFile)
val finalClause = cellClause and CellClause.IS_CODE
val codeCellsData = notebook.cells
.filter { finalClause.isAccepted(it) }
.map { CodeCellData(it.source, (it as? CodeCell)?.outputs.orEmpty()) }
try {
for (codeCellData in codeCellsData) {
val code = codeCellData.code
val codeToExecute = replacer.replace(code)
// println("Executing code:\n$codeToExecute")
val cellResult = execEx(codeToExecute)
if (cellResult is EvalResultEx.AbstractError) {
throw cellResult.error
}
require(cellResult is EvalResultEx.Success)
// println(cellResult)
}
} catch (e: Exception) {
if (e.causesSequence()
.filterIsInstance<IllegalStateException>()
.any { it.message?.contains("null DefinitelyNotNullType for") == true }
) {
// Issue #1116: https://github.com/Kotlin/dataframe/issues/1116
// Cannot finish test until this is solved, so treat this test as "ignored"
throw AssumptionViolatedException("Test skipped due to issue #1116")
} else {
throw e
}
} finally {
cleanup()
}
}
private fun exampleTest(
dir: String,
notebookName: String? = null,
replacer: CodeReplacer = CodeReplacer.DEFAULT,
cellClause: CellClause = CellClause { true },
cleanup: () -> Unit = {},
) {
val fileName = if (notebookName == null) "$dir.ipynb" else "$notebookName.ipynb"
doTest("$NOTEBOOK_EXAMPLES_PATH/$dir/$fileName", replacer, cellClause, cleanup)
}
data class CodeCellData(val code: String, val outputs: List<Output>)
companion object {
const val NOTEBOOK_EXAMPLES_PATH = "../examples/notebooks/dev"
fun testFile(folder: String, fileName: String) = fileName to "$NOTEBOOK_EXAMPLES_PATH/$folder/$fileName"
}
}
@@ -0,0 +1,21 @@
<configuration>
<appender name="STDERR" class="ch.qos.logback.core.ConsoleAppender">
<!-- encoders are assigned the type
ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
<target>System.err</target>
<encoder>
<pattern>%-4relative [%thread] %-5level %logger{35} - %msg %n</pattern>
</encoder>
</appender>
<root level="DEBUG">
<appender-ref ref="STDERR"/>
</root>
<logger name="org.apache" level="ERROR"/>
<logger name="org.jetbrains.kotlin.org.apache" level="ERROR"/>
<logger name="httpclient" level="ERROR"/>
<logger name="org.jetbrains.kotlin.httpclient" level="ERROR"/>
<logger name="org.eclipse.aether" level="DEBUG"/>
<logger name="org.jetbrains.kotlin.org.eclipse.aether" level="DEBUG"/>
</configuration>