init research

2026-02-08 11:20:43 -10:00
commit bdf064f54d
3041 changed files with 1592200 additions and 0 deletions
@@ -0,0 +1,39 @@
+## :samples
+
+Code samples, as well as DataFrame iframes and Kandy plot images, for the 
+[documentation website](https://github.com/Kotlin/dataframe).
+
+### Korro
+
+Saves code samples using [Korro](https://github.com/devcrocod/korro).
+
+To save or update samples:
+* Run the `korroClean` and `korro` Gradle tasks.
+
+**Important**: May not work correctly until the 
+[migration from `:core` is finished](https://github.com/Kotlin/dataframe/issues/898).
+Run Korro tasks for the whole project.
+
+### SampleHelper
+
+[`SampleHelper`](https://github.com/Kotlin/kandy/blob/samples_util/util/kandy-samples-utils/README.md)
+allows you to save the resulting Kandy plots as SVG images and DataFrames as iframes.
+
+Running tests in this module will save or update these samples.
+
+**Important**:
+
+1) If a sample has changed, verify that the change is intentional and correct.
+You can track it with the Git file changes tracker in IDEA.
+2) Add all iframes as resources in [this file](../docs/StardustDocs/topics/_shadow_resources.md).
+Run [this script](https://github.com/Kotlin/kandy/blob/samples_util/util/kandy-samples-utils/README.md#how-to-use)
+to update them.
+
+### Notebook-To-Doc
+
+A Kotlin notebook can be easily converted to documentation using 
+[this script](https://github.com/Kotlin/kandy/blob/samples_util/util/kandy-samples-utils/README.md#how-to-use).
+It produces two files: `.kt` and `.md`.
+* Place the `.kt` file in the tests of this module and run it.
+* Place the `.md` file in the [docs topics directory](../docs/StardustDocs/topics).
+* Run the Korro tasks.
@@ -0,0 +1,142 @@
+import org.gradle.kotlin.dsl.dependencies
+import org.gradle.kotlin.dsl.exclude
+import org.gradle.kotlin.dsl.implementation
+import org.gradle.kotlin.dsl.invoke
+import org.gradle.kotlin.dsl.java
+import org.gradle.kotlin.dsl.korro
+import org.gradle.kotlin.dsl.kotlin
+import org.gradle.kotlin.dsl.libs
+import org.gradle.kotlin.dsl.main
+import org.gradle.kotlin.dsl.projects
+import org.gradle.kotlin.dsl.repositories
+import org.gradle.kotlin.dsl.runKtlintCheckOverMainSourceSet
+import org.gradle.kotlin.dsl.runKtlintCheckOverTestSourceSet
+import org.gradle.kotlin.dsl.runKtlintFormatOverMainSourceSet
+import org.gradle.kotlin.dsl.runKtlintFormatOverTestSourceSet
+import org.gradle.kotlin.dsl.sourceSets
+import org.gradle.kotlin.dsl.test
+import org.gradle.kotlin.dsl.testImplementation
+import org.jetbrains.kotlin.gradle.tasks.KotlinCompile
+
+plugins {
+    with(convention.plugins) {
+        alias(kotlinJvm11)
+        alias(ktlint)
+    }
+    with(libs.plugins) {
+        alias(korro)
+        alias(dataframe.compiler.plugin)
+    }
+}
+
+val dependentProjects = with(projects) {
+    listOf(
+        core,
+        dataframeArrow,
+        dataframeExcel,
+        dataframeJdbc,
+        dataframeCsv,
+        dataframeJson,
+    )
+}.map { project(it.path) }
+
+tasks.withType<KotlinCompile> {
+    dependentProjects.forEach {
+        dependsOn("${it.path}:jar")
+    }
+}
+
+tasks.withType<KotlinCompile>().configureEach {
+    friendPaths.from(project(projects.core.path).projectDir)
+}
+
+// get the output of the instrumentedJars configuration, aka the jar-files of the compiled modules
+// all modules with jar-task have this artifact in the DataFrame project
+val dependentProjectJarPaths = dependentProjects.map {
+    it.configurations
+        .getByName("instrumentedJars")
+        .artifacts.single()
+        .file.absolutePath
+        .replace(File.separatorChar, '/')
+}
+
+dependencies {
+    runtimeOnly(projects.dataframe) // Must depend on jars for the compiler plugin to work!
+    implementation(files(dependentProjectJarPaths))
+
+    // include api() dependencies from dependent projects, as they are not included in the jars
+    dependentProjects.forEach {
+        it.configurations.getByName("api").dependencies.forEach { dep ->
+            if (dep is ExternalModuleDependency) {
+                implementation("${dep.group}:${dep.name}:${dep.version ?: "+"}")
+            }
+        }
+    }
+
+    testImplementation(libs.junit)
+    testImplementation(libs.kotestAssertions) {
+        exclude("org.jetbrains.kotlin", "kotlin-stdlib-jdk8")
+    }
+    testImplementation(libs.kandy) {
+        exclude("org.jetbrains.kotlinx", "dataframe")
+    }
+    testImplementation(libs.kandy.samples.utils) {
+        exclude("org.jetbrains.kotlinx", "dataframe")
+    }
+    testImplementation(libs.kotlin.datetimeJvm)
+    testImplementation(libs.poi)
+    testImplementation(libs.arrow.vector)
+}
+
+korro {
+    docs = fileTree(rootProject.rootDir) {
+        include("docs/StardustDocs/topics/DataSchema-Data-Classes-Generation.md")
+        include("docs/StardustDocs/topics/read.md")
+        include("docs/StardustDocs/topics/write.md")
+        include("docs/StardustDocs/topics/rename.md")
+        include("docs/StardustDocs/topics/format.md")
+        include("docs/StardustDocs/topics/toHTML.md")
+        include("docs/StardustDocs/topics/guides/*.md")
+        include("docs/StardustDocs/topics/operations/utils/*.md")
+        include("docs/StardustDocs/topics/operations/multiple/*.md")
+        include("docs/StardustDocs/topics/operations/column/*.md")
+        include("docs/StardustDocs/topics/collectionsInterop/*.md")
+        include("docs/StardustDocs/topics/dataSources/sql/*.md")
+        include("docs/StardustDocs/topics/info/*.md")
+    }
+
+    samples = fileTree(project.projectDir) {
+        include("src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/*.kt")
+        include("src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/*.kt")
+        include("src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/utils/*.kt")
+        include("src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/multiple/*.kt")
+        include("src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/render/*.kt")
+        include("src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/collectionsInterop/*.kt")
+        include("src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/column/*.kt")
+        include("src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/info/*.kt")
+        include("src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/guides/*.kt")
+        include("src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/io/*.kt")
+    }
+
+    groupSamples {
+
+        beforeSample = "<tab title=\"NAME\">\n"
+        afterSample = "\n</tab>"
+
+        funSuffix("_properties") {
+            replaceText("NAME", "Properties")
+        }
+        funSuffix("_accessors") {
+            replaceText("NAME", "Accessors")
+        }
+        funSuffix("_strings") {
+            replaceText("NAME", "Strings")
+        }
+        beforeGroup = "<tabs>\n"
+        afterGroup = "</tabs>"
+    }
+}
+
+tasks.test {
+    jvmArgs = listOf("--add-opens", "java.base/java.nio=ALL-UNNAMED")
+}
@@ -0,0 +1,20 @@
+package org.jetbrains.kotlinx.dataframe.samples
+
+import org.jetbrains.kotlinx.dataframe.DataColumn
+import org.jetbrains.kotlinx.dataframe.api.toDataFrame
+import org.jetbrains.kotlinx.dataframe.samples.api.TestBase
+import org.jetbrains.kotlinx.kandy.letsplot.samples.SampleHelper
+
+abstract class DataFrameSampleHelper(sampleName: String, subFolder: String = "samples") :
+    SampleHelper(
+        sampleName,
+        subFolder,
+        "../docs/StardustDocs/images",
+        "../docs/StardustDocs/resources",
+    ),
+    TestBase {
+
+    fun DataColumn<*>.saveDfHtmlSample() {
+        toDataFrame().saveDfHtmlSample()
+    }
+}
@@ -0,0 +1,103 @@
+@file:Suppress("UNUSED_VARIABLE", "unused", "UNCHECKED_CAST", "ktlint", "ClassName")
+
+package org.jetbrains.kotlinx.dataframe.samples.api
+
+import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
+import org.jetbrains.kotlinx.dataframe.api.add
+import org.jetbrains.kotlinx.dataframe.api.all
+import org.jetbrains.kotlinx.dataframe.api.cast
+import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
+import org.jetbrains.kotlinx.dataframe.api.filter
+import org.jetbrains.kotlinx.dataframe.api.generateDataClasses
+import org.jetbrains.kotlinx.dataframe.api.generateInterfaces
+import org.jetbrains.kotlinx.dataframe.api.into
+import org.jetbrains.kotlinx.dataframe.api.rename
+import org.jetbrains.kotlinx.dataframe.api.sumOf
+import org.jetbrains.kotlinx.dataframe.api.toList
+import org.jetbrains.kotlinx.dataframe.samples.DataFrameSampleHelper
+import org.junit.Test
+
+class Generate : DataFrameSampleHelper("generate_docs", "api") {
+
+    @DataSchema
+    data class Orders(
+        val orderId: Int,
+        val amount: Double,
+    )
+
+    private val ordersAlice = dataFrameOf(
+        "orderId" to listOf(101, 102),
+        "amount" to listOf(50.0, 75.5),
+    ).cast<Orders>()
+
+    private val ordersBob = dataFrameOf(
+        "orderId" to listOf(103, 104, 105),
+        "amount" to listOf(20.0, 30.0, 25.0),
+    ).cast<Orders>()
+
+    @DataSchema
+    data class Customer(
+        val user: String,
+        val orders: List<Orders>,
+    )
+
+    private val df = dataFrameOf(
+        "user" to listOf("Alice", "Bob"),
+        "orders" to listOf(ordersAlice, ordersBob),
+    ).cast<Customer>()
+
+    @Test
+    fun notebook_test_generate_docs_1() {
+        // SampleStart
+        df
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_generate_docs_2() {
+        // SampleStart
+        df.generateInterfaces()
+        // SampleEnd
+    }
+
+    @Test
+    fun notebook_test_generate_docs_3() {
+        // SampleStart
+        df.filter { orders.all { orderId >= 102 } }
+        // SampleEnd
+        // .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_generate_docs_4() {
+        // SampleStart
+        df.generateDataClasses("Customer")
+        // SampleEnd
+    }
+
+    @Test
+    fun notebook_test_generate_docs_5() {
+        // SampleStart
+        val customers: List<Customer> = df.cast<Customer>().toList()
+        // SampleEnd
+    }
+
+    @Test
+    fun notebook_test_generate_docs_6() {
+        // SampleStart
+        df.generateInterfaces(markerName = "Customer")
+        // SampleEnd
+    }
+
+    @Test
+    fun notebook_test_generate_docs_7() {
+        // SampleStart
+        df.cast<Customer>()
+            .add("ordersTotal") { orders.sumOf { it.amount } }
+            .filter { user.startsWith("A") }
+            .rename { user }.into("customer")
+        // SampleEnd
+        //   .saveDfHtmlSample()
+    }
+}
@@ -0,0 +1,99 @@
+package org.jetbrains.kotlinx.dataframe.samples.api
+
+import org.jetbrains.kotlinx.dataframe.DataColumn
+import org.jetbrains.kotlinx.dataframe.api.FormattingDsl
+import org.jetbrains.kotlinx.dataframe.api.and
+import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
+import org.jetbrains.kotlinx.dataframe.api.format
+import org.jetbrains.kotlinx.dataframe.api.getColumnIndex
+import org.jetbrains.kotlinx.dataframe.api.linearBg
+import org.jetbrains.kotlinx.dataframe.api.max
+import org.jetbrains.kotlinx.dataframe.api.min
+import org.jetbrains.kotlinx.dataframe.api.notNull
+import org.jetbrains.kotlinx.dataframe.api.perRowCol
+import org.jetbrains.kotlinx.dataframe.api.with
+import org.jetbrains.kotlinx.dataframe.samples.DataFrameSampleHelper
+import org.junit.Test
+
+@Suppress("ktlint:standard:argument-list-wrapping")
+class Modify : DataFrameSampleHelper("operations", "modify") {
+
+    val df = peopleDf
+
+    private val df2 = dataFrameOf(
+        "col1", "col2", "col3", "col4", "col5", "col6", "col7", "col8", "col9", "col10",
+    )(
+        45, 12, 78, 34, 90, 23, 67, 89, 56, 43,
+        87, 34, 56, 78, 12, 45, 90, 23, 67, 89,
+        23, 67, 89, 45, 78, 90, 12, 56, 34, 78,
+        90, 45, 23, 67, 34, 78, 89, 12, 56, 23,
+        12, 89, 45, 90, 56, 34, 78, 67, 23, 90,
+        78, 56, 12, 23, 89, 67, 34, 90, 45, 12,
+        34, 90, 67, 12, 45, 23, 56, 78, 89, 67,
+        56, 23, 34, 89, 67, 12, 45, 34, 78, 90,
+        89, 78, 90, 56, 23, 89, 67, 45, 12, 34,
+        67, 45, 78, 12, 90, 56, 23, 89, 34, 78,
+    )
+
+    @Suppress("UNCHECKED_CAST")
+    @Test
+    fun formatExample_strings() {
+        // SampleStart
+        val ageMin = df.min { "age"<Int>() }
+        val ageMax = df.max { "age"<Int>() }
+
+        df
+            .format().with { bold and textColor(black) and background(white) }
+            .format("name").with { underline }
+            .format { "name"["lastName"] }.with { italic }
+            .format("isHappy").with {
+                background(if (it as Boolean) green else red)
+            }
+            .format("weight").notNull().with { linearBg(it as Int, 50 to blue, 90 to red) }
+            .format("age").perRowCol { row, col ->
+                col as DataColumn<Int>
+                textColor(
+                    linear(value = col[row], from = ageMin to blue, to = ageMax to green),
+                )
+            }
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun formatExample_properties() {
+        // SampleStart
+        val ageMin = df.age.min()
+        val ageMax = df.age.max()
+
+        df
+            .format().with { bold and textColor(black) and background(white) }
+            .format { name }.with { underline }
+            .format { name.lastName }.with { italic }
+            .format { isHappy }.with { background(if (it) green else red) }
+            .format { weight }.notNull().linearBg(50 to FormattingDsl.blue, 90 to FormattingDsl.red)
+            .format { age }.perRowCol { row, col ->
+                textColor(
+                    linear(value = col[row], from = ageMin to blue, to = ageMax to green),
+                )
+            }
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun formatExampleNumbers() {
+        // SampleStart
+        df2.format().perRowCol { row, col ->
+            val rowIndex = row.index()
+            val colIndex = row.df().getColumnIndex(col)
+            if ((rowIndex - colIndex) % 3 == 0) {
+                background(darkGray) and textColor(white)
+            } else {
+                background(white) and textColor(black)
+            }
+        }
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+}
@@ -0,0 +1,156 @@
+@file:Suppress("ktlint")
+
+package org.jetbrains.kotlinx.dataframe.samples.api
+
+import io.deephaven.csv.parsers.Parsers
+import io.kotest.matchers.shouldBe
+import org.jetbrains.kotlinx.dataframe.DataFrame
+import org.jetbrains.kotlinx.dataframe.DataRow
+import org.jetbrains.kotlinx.dataframe.api.ParserOptions
+import org.jetbrains.kotlinx.dataframe.api.columnNames
+import org.jetbrains.kotlinx.dataframe.api.columnTypes
+import org.jetbrains.kotlinx.dataframe.io.ColType
+import org.jetbrains.kotlinx.dataframe.io.StringColumns
+import org.jetbrains.kotlinx.dataframe.io.readArrowFeather
+import org.jetbrains.kotlinx.dataframe.io.readCsv
+import org.jetbrains.kotlinx.dataframe.io.readExcel
+import org.jetbrains.kotlinx.dataframe.io.readJson
+import org.jetbrains.kotlinx.dataframe.testArrowFeather
+import org.jetbrains.kotlinx.dataframe.testCsv
+import org.jetbrains.kotlinx.dataframe.testJson
+import org.junit.Ignore
+import org.junit.Test
+import java.time.format.DateTimeFormatter
+import java.util.Locale
+import kotlin.reflect.typeOf
+
+class Read {
+    @Test
+    fun readCsvCustom() {
+        val file = testCsv("syntheticSample")
+        // SampleStart
+        val df = DataFrame.readCsv(
+            file,
+            delimiter = '|',
+            header = listOf("A", "B", "C", "D"),
+            parserOptions = ParserOptions(nullStrings = setOf("not assigned")),
+        )
+        // SampleEnd
+        df.rowsCount() shouldBe 3
+        df.columnNames() shouldBe listOf("A", "B", "C", "D")
+        df["A"].type() shouldBe typeOf<Int>()
+        df["D"].type() shouldBe typeOf<Boolean?>()
+    }
+
+    @Test
+    fun readJson() {
+        val file = testJson("synthetic")
+        // SampleStart
+        val df = DataFrame.readJson(file)
+        // SampleEnd
+        df.rowsCount() shouldBe 4
+        df.columnNames() shouldBe listOf("A", "B", "C", "D")
+        df["A"].type() shouldBe typeOf<String>()
+        df["B"].type() shouldBe typeOf<Int>()
+        df["D"].type() shouldBe typeOf<Boolean?>()
+    }
+
+    @Test
+    fun readJsonRow() {
+        val file = testJson("syntheticObj")
+        // SampleStart
+        val row = DataRow.readJson(file)
+        // SampleEnd
+        row.columnNames() shouldBe listOf("A", "B", "C", "D")
+        row.columnTypes() shouldBe listOf(typeOf<String>(), typeOf<Int>(), typeOf<Float>(), typeOf<Boolean>())
+    }
+
+    @Test
+    @Ignore
+    fun fixMixedColumn() {
+        // SampleStart
+        val df = DataFrame.readExcel("mixed_column.xlsx", stringColumns = StringColumns("A"))
+        // SampleEnd
+    }
+
+    @Test
+    fun readArrowFeather() {
+        val file = testArrowFeather("data-arrow_2.0.0_uncompressed")
+        // SampleStart
+        val df = DataFrame.readArrowFeather(file)
+        // SampleEnd
+        df.rowsCount() shouldBe 1
+        df.columnsCount() shouldBe 4
+    }
+
+    @Test
+    fun readNumbersWithSpecificLocale() {
+        val file = testCsv("numbers")
+        // SampleStart
+        val df = DataFrame.readCsv(
+            file,
+            parserOptions = ParserOptions(locale = Locale.UK),
+        )
+        // SampleEnd
+    }
+
+    @Test
+    fun readNumbersWithColType() {
+        val file = testCsv("numbers")
+        // SampleStart
+        val df = DataFrame.readCsv(
+            file,
+            colTypes = mapOf("colName" to ColType.String),
+        )
+        // SampleEnd
+    }
+
+    @Test
+    fun readDatesWithSpecificDateTimePattern() {
+        val file = testCsv("dates")
+        // SampleStart
+        val df = DataFrame.readCsv(
+            file,
+            parserOptions = ParserOptions(dateTimePattern = "dd/MMM/yy h:mm a")
+        )
+        // SampleEnd
+    }
+
+    @Test
+    fun readDatesWithSpecificDateTimeFormatter() {
+        val file = testCsv("dates")
+        // SampleStart
+        val df = DataFrame.readCsv(
+            file,
+            parserOptions = ParserOptions(dateTimeFormatter = DateTimeFormatter.ofPattern("dd/MMM/yy h:mm a"))
+        )
+        // SampleEnd
+    }
+
+    @Test
+    fun readDatesWithDefaultType() {
+        val file = testCsv("dates")
+        // SampleStart
+        val df = DataFrame.readCsv(
+            file,
+            colTypes = mapOf(ColType.DEFAULT to ColType.String),
+        )
+        // SampleEnd
+    }
+
+    @Test
+    fun readDatesWithDeephavenDateTimeParser() {
+        val file = testCsv("dates")
+        try {
+            // SampleStart
+            val df = DataFrame.readCsv(
+                inputStream = file.openStream(),
+                adjustCsvSpecs = { // it: CsvSpecs.Builder
+                    it.putParserForName("date", Parsers.DATETIME)
+                },
+            )
+            // SampleEnd
+        } catch (_: Exception) {
+        }
+    }
+}
@@ -0,0 +1,42 @@
+@file:Suppress("PropertyName", "unused", "ktlint")
+
+package org.jetbrains.kotlinx.dataframe.samples.api
+
+import org.jetbrains.kotlinx.dataframe.api.column
+import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
+import org.jetbrains.kotlinx.dataframe.api.rename
+import org.jetbrains.kotlinx.dataframe.api.renameToCamelCase
+import org.jetbrains.kotlinx.dataframe.api.toCamelCase
+import org.jetbrains.kotlinx.dataframe.samples.DataFrameSampleHelper
+import org.junit.Test
+
+class RenameToCamelCase : DataFrameSampleHelper("rename", "api") {
+    private val df = dataFrameOf("ColumnA", "column_b", "COLUMN-C")(1, "a", true, 2, "b", false)
+
+    val ColumnA by column<String>()
+    val `COLUMN-C` by column<String>()
+
+    @Test
+    fun notebook_test_rename_3() {
+        // SampleStart
+        df
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_rename_4() {
+        // SampleStart
+        df.rename { ColumnA and `COLUMN-C` }.toCamelCase()
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_rename_5() {
+        // SampleStart
+        df.renameToCamelCase()
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+}
@@ -0,0 +1,273 @@
+package org.jetbrains.kotlinx.dataframe.samples.api
+
+import org.jetbrains.kotlinx.dataframe.AnyFrame
+import org.jetbrains.kotlinx.dataframe.api.chunked
+import org.jetbrains.kotlinx.dataframe.api.columnOf
+import org.jetbrains.kotlinx.dataframe.api.convert
+import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
+import org.jetbrains.kotlinx.dataframe.api.duplicateRows
+import org.jetbrains.kotlinx.dataframe.api.forEachIndexed
+import org.jetbrains.kotlinx.dataframe.api.reorderColumnsByName
+import org.jetbrains.kotlinx.dataframe.api.sortBy
+import org.jetbrains.kotlinx.dataframe.api.sortByDesc
+import org.jetbrains.kotlinx.dataframe.api.with
+import org.jetbrains.kotlinx.dataframe.dataTypes.IFRAME
+import org.jetbrains.kotlinx.dataframe.dataTypes.IMG
+import org.jetbrains.kotlinx.dataframe.indices
+import org.jetbrains.kotlinx.dataframe.io.DataFrameHtmlData
+import org.jetbrains.kotlinx.dataframe.io.DisplayConfiguration
+import org.jetbrains.kotlinx.dataframe.io.toHtml
+import org.jetbrains.kotlinx.dataframe.io.toStandaloneHtml
+import org.jetbrains.kotlinx.dataframe.jupyter.ChainedCellRenderer
+import org.jetbrains.kotlinx.dataframe.jupyter.DefaultCellRenderer
+import org.jetbrains.kotlinx.dataframe.jupyter.RenderedContent
+import org.jetbrains.kotlinx.dataframe.samples.DataFrameSampleHelper
+import org.junit.Ignore
+import org.junit.Test
+import java.io.File
+import java.net.URI
+import kotlin.io.path.Path
+
+class Render : DataFrameSampleHelper("toHTML", "api") {
+
+    private val df: AnyFrame = dataFrameOf(
+        "name" to columnOf(
+            "firstName" to columnOf("Alice", "Bob", "Charlie", "Charlie", "Bob", "Alice", "Charlie"),
+            "lastName" to columnOf("Cooper", "Dylan", "Daniels", "Chaplin", "Marley", "Wolf", "Byrd"),
+        ),
+        "age" to columnOf(15, 45, 20, 40, 30, 20, 30),
+        "city" to columnOf("London", "Dubai", "Moscow", "Milan", "Tokyo", null, "Moscow"),
+        "weight" to columnOf(54, 87, null, null, 68, 55, 90),
+        "isHappy" to columnOf(true, true, false, true, true, false, true),
+    )
+
+    @Test
+    @Ignore
+    fun useRenderingResult() {
+        // SampleStart
+        val configuration = DisplayConfiguration(rowsLimit = null)
+        df.toStandaloneHtml(configuration).openInBrowser()
+        df.toStandaloneHtml(configuration).writeHtml(File("/path/to/file"))
+        df.toStandaloneHtml(configuration).writeHtml(Path("/path/to/file"))
+        // SampleEnd
+    }
+
+    @Test
+    fun composeTables_strings() {
+        val df = dataFrameOf(
+            "name" to columnOf(
+                "firstName" to columnOf("Alice", "Bob", "Charlie", "Charlie", "Bob", "Alice", "Charlie"),
+                "lastName" to columnOf("Cooper", "Dylan", "Daniels", "Chaplin", "Marley", "Wolf", "Byrd"),
+            ),
+            "age" to columnOf(15, 45, 20, 40, 30, 20, 30),
+            "city" to columnOf("London", "Dubai", "Moscow", "Milan", "Tokyo", null, "Moscow"),
+            "weight" to columnOf(54, 87, null, null, 68, 55, 90),
+            "isHappy" to columnOf(true, true, false, true, true, false, true),
+        )
+        // SampleStart
+        val df1 = df.reorderColumnsByName()
+        val df2 = df.sortBy("age")
+        val df3 = df.sortByDesc("age")
+
+        listOf(df1, df2, df3).fold(DataFrameHtmlData.tableDefinitions()) { acc, df ->
+            acc + df.toHtml()
+        }
+        // SampleEnd
+    }
+
+    @Test
+    fun composeTables_properties() {
+        val df = dataFrameOf(
+            "name" to columnOf(
+                "firstName" to columnOf("Alice", "Bob", "Charlie", "Charlie", "Bob", "Alice", "Charlie"),
+                "lastName" to columnOf("Cooper", "Dylan", "Daniels", "Chaplin", "Marley", "Wolf", "Byrd"),
+            ),
+            "age" to columnOf(15, 45, 20, 40, 30, 20, 30),
+            "city" to columnOf("London", "Dubai", "Moscow", "Milan", "Tokyo", null, "Moscow"),
+            "weight" to columnOf(54, 87, null, null, 68, 55, 90),
+            "isHappy" to columnOf(true, true, false, true, true, false, true),
+        )
+        // SampleStart
+        val df1 = df.reorderColumnsByName()
+        val df2 = df.sortBy { age }
+        val df3 = df.sortByDesc { age }
+
+        listOf(df1, df2, df3).fold(DataFrameHtmlData.tableDefinitions()) { acc, df -> acc + df.toHtml() }
+        // SampleEnd
+    }
+
+    @Test
+    fun configureCellOutput() {
+        // SampleStart
+        df.toHtml(DisplayConfiguration(cellContentLimit = -1))
+        // SampleEnd
+    }
+
+    @Test
+    fun displayImg() {
+        // SampleStart
+        val htmlData = dataFrameOf(
+            "kotlinLogo" to columnOf(
+                IMG("https://kotlin.github.io/dataframe/images/kotlin-logo.svg"),
+            ),
+        ).toStandaloneHtml()
+        // SampleEnd
+        // .openInBrowser()
+    }
+
+    @Test
+    fun displayIFrame() {
+        // SampleStart
+        val htmlData = dataFrameOf(
+            "documentationPages" to columnOf(
+                IFRAME(
+                    src = "https://kotlin.github.io/dataframe/tohtml.html",
+                    width = 850,
+                    height = 500,
+                ),
+            ),
+        ).toStandaloneHtml()
+        // SampleEnd
+        // .openInBrowser()
+    }
+
+    @Test
+    fun displayURL() {
+        // SampleStart
+        val htmlData = dataFrameOf(
+            "documentationPages" to columnOf(
+                URI("https://kotlin.github.io/dataframe/format.html").toURL(),
+                URI("https://kotlin.github.io/dataframe/tohtml.html").toURL(),
+                URI("https://kotlin.github.io/dataframe/jupyterrendering.html").toURL(),
+            ),
+        ).toStandaloneHtml()
+        // SampleEnd
+        // .openInBrowser()
+    }
+
+    @Test
+    fun displayMediaContent_strings() {
+        // SampleStart
+        val htmlData = dataFrameOf(
+            "documentationPages" to columnOf(
+                "https://kotlin.github.io/dataframe/format.html",
+                "https://kotlin.github.io/dataframe/tohtml.html",
+                "https://kotlin.github.io/dataframe/jupyterrendering.html",
+            ),
+        )
+            .convert { "documentationPages"<String>() }.with {
+                val uri = URI(it)
+                RenderedContent.media("""<a href='$uri'>${uri.path}</a>""")
+            }
+            .toStandaloneHtml()
+        // SampleEnd
+        // .openInBrowser()
+    }
+
+    @Test
+    fun displayMediaContent_properties() {
+        // SampleStart
+        val htmlData = dataFrameOf(
+            "documentationPages" to columnOf(
+                "https://kotlin.github.io/dataframe/format.html",
+                "https://kotlin.github.io/dataframe/tohtml.html",
+                "https://kotlin.github.io/dataframe/jupyterrendering.html",
+            ),
+        )
+            .convert { documentationPages }.with {
+                val uri = URI(it)
+                RenderedContent.media("""<a href='$uri'>${uri.path}</a>""")
+            }
+            .toStandaloneHtml()
+        // SampleEnd
+        // .openInBrowser()
+    }
+
+    @Test
+    fun cellRenderer() {
+        // SampleStart
+        class CustomArrayCellRenderer : ChainedCellRenderer(DefaultCellRenderer) {
+            override fun maybeContent(value: Any?, configuration: DisplayConfiguration): RenderedContent? {
+                if (value is Boolean) {
+                    return RenderedContent.text(if (value) "✓" else "✗")
+                }
+                // return null to delegate work to parent renderer: DefaultCellRenderer
+                return null
+            }
+
+            override fun maybeTooltip(value: Any?, configuration: DisplayConfiguration): String? {
+                // return null to delegate work to parent renderer: DefaultCellRenderer
+                return null
+            }
+        }
+
+        val htmlData = df.toStandaloneHtml(cellRenderer = CustomArrayCellRenderer())
+        // SampleEnd
+        // .openInBrowser()
+    }
+
+    @Test
+    fun df() {
+        // SampleStart
+        val df = dataFrameOf(
+            "name" to columnOf(
+                "firstName" to columnOf("Alice", "Bob", "Charlie", "Charlie", "Bob", "Alice", "Charlie"),
+                "lastName" to columnOf("Cooper", "Dylan", "Daniels", "Chaplin", "Marley", "Wolf", "Byrd"),
+            ),
+            "age" to columnOf(15, 45, 20, 40, 30, 20, 30),
+            "city" to columnOf("London", "Dubai", "Moscow", "Milan", "Tokyo", null, "Moscow"),
+            "weight" to columnOf(54, 87, null, null, 68, 55, 90),
+            "isHappy" to columnOf(true, true, false, true, true, false, true),
+        )
+        // SampleEnd
+    }
+
+    @Test
+    fun appendCustomHtml() {
+        // SampleStart
+        val pages = df.duplicateRows(10).chunked(20)
+        val files = pages.indices.map { i -> File("page$i.html") }
+        val navLinks = files.mapIndexed { i, file ->
+            """<a href="${file.name}">Page ${i + 1}</a>"""
+        }.joinToString(" | ")
+
+        pages.forEachIndexed { i, page ->
+            val output = files[i]
+            page.toStandaloneHtml().plus(DataFrameHtmlData(body = navLinks))
+            // uncomment
+            // .writeHtml(output)
+        }
+        // SampleEnd
+    }
+
+    @Test
+    fun interactiveJs() {
+        // SampleStart
+        val selectCellInteraction = DataFrameHtmlData(
+            style =
+                """
+                td:hover {
+                    background-color: rgba(0, 123, 255, 0.15);
+                    cursor: pointer;
+                }
+                """.trimIndent(),
+            script =
+                """
+                (function() {
+                    let cells = document.querySelectorAll('td');
+                    cells.forEach(function(cell) {
+                        cell.addEventListener('click', function(e) {
+                            let content = cell.textContent;
+                            alert(content);
+                        });
+                    });
+                })();
+                """.trimIndent(),
+        )
+
+        // keep in mind JS script initialization order.
+        val htmlData = df.toStandaloneHtml().plus(selectCellInteraction)
+        // SampleEnd
+        // .openInBrowser()
+    }
+}
@@ -0,0 +1,47 @@
+@file:Suppress("ktlint")
+
+package org.jetbrains.kotlinx.dataframe.samples.api
+
+import io.kotest.matchers.shouldBe
+import org.jetbrains.kotlinx.dataframe.DataFrame
+import org.jetbrains.kotlinx.dataframe.DataRow
+import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
+import org.jetbrains.kotlinx.dataframe.api.cast
+import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
+import org.jetbrains.kotlinx.dataframe.api.group
+import org.jetbrains.kotlinx.dataframe.api.into
+
+@Suppress("ktlint:standard:argument-list-wrapping")
+interface TestBase {
+
+    val peopleDf: DataFrame<Person>
+        get() = run {
+            dataFrameOf("firstName", "lastName", "age", "city", "weight", "isHappy")(
+                "Alice", "Cooper", 15, "London", 54, true,
+                "Bob", "Dylan", 45, "Dubai", 87, true,
+                "Charlie", "Daniels", 20, "Moscow", null, false,
+                "Charlie", "Chaplin", 40, "Milan", null, true,
+                "Bob", "Marley", 30, "Tokyo", 68, true,
+                "Alice", "Wolf", 20, null, 55, false,
+                "Charlie", "Byrd", 30, "Moscow", 90, true,
+            ).group { firstName and lastName }.into("name")
+                .cast<Person>(verify = false)
+        }
+
+    @DataSchema
+    interface Name {
+        val firstName: String
+        val lastName: String
+    }
+
+    @DataSchema
+    interface Person {
+        val age: Int
+        val city: String?
+        val name: DataRow<Name> // TODO Requires https://code.jetbrains.team/p/kt/repositories/kotlin/reviews/23694 to be merged
+        val weight: Int?
+        val isHappy: Boolean
+    }
+
+    infix fun <T, U : T> T.willBe(expected: U?) = shouldBe(expected)
+}
@@ -0,0 +1,250 @@
+@file:Suppress("ktlint")
+
+package org.jetbrains.kotlinx.dataframe.samples.api
+
+import io.kotest.matchers.string.shouldStartWith
+import org.apache.arrow.vector.types.pojo.Schema
+import org.apache.poi.ss.usermodel.Sheet
+import org.apache.poi.ss.usermodel.WorkbookFactory
+import org.jetbrains.kotlinx.dataframe.api.filter
+import org.jetbrains.kotlinx.dataframe.api.remove
+import org.jetbrains.kotlinx.dataframe.io.ArrowWriter
+import org.jetbrains.kotlinx.dataframe.io.arrowWriter
+import org.jetbrains.kotlinx.dataframe.io.saveArrowFeatherToByteArray
+import org.jetbrains.kotlinx.dataframe.io.saveArrowIPCToByteArray
+import org.jetbrains.kotlinx.dataframe.io.toCsvStr
+import org.jetbrains.kotlinx.dataframe.io.toJson
+import org.jetbrains.kotlinx.dataframe.io.writeArrowFeather
+import org.jetbrains.kotlinx.dataframe.io.writeArrowIPC
+import org.jetbrains.kotlinx.dataframe.io.writeCsv
+import org.jetbrains.kotlinx.dataframe.io.writeExcel
+import org.jetbrains.kotlinx.dataframe.io.writeJson
+import org.jetbrains.kotlinx.dataframe.io.writeMismatchMessage
+import org.junit.Test
+import java.io.File
+import kotlin.io.path.deleteExisting
+
+class Write : TestBase {
+
+    val df = peopleDf
+
+    @Test
+    fun writeCsv() {
+        useTempFile { file ->
+            // SampleStart
+            df.writeCsv(file)
+            // SampleEnd
+        }
+    }
+
+    @Test
+    fun writeJson() {
+        useTempFile { file ->
+            // SampleStart
+            df.writeJson(file)
+            // SampleEnd
+        }
+    }
+
+    @Test
+    fun writeCsvStr() {
+        // SampleStart
+        val csvStr = df.toCsvStr(delimiter = ';', recordSeparator = System.lineSeparator())
+        // SampleEnd
+        csvStr shouldStartWith """
+            name;age;city;weight;isHappy
+            "{""firstName"":""Alice"",""lastName"":""Cooper""}";15;London;54;true
+        """.trimIndent().lines().joinToString(System.lineSeparator())
+    }
+
+    @Test
+    fun writeJsonStr() {
+        // SampleStart
+        val jsonStr = df.toJson(prettyPrint = true)
+        // SampleEnd
+        jsonStr shouldStartWith """
+            [
+                {
+                    "name": {
+                        "firstName": "Alice",
+                        "lastName": "Cooper"
+                    },
+                    "age": 15,
+                    "city": "London",
+                    "weight": 54,
+                    "isHappy": true
+        """.trimIndent()
+    }
+
+    @Test
+    fun writeXls() {
+        useTempFile { file ->
+            // SampleStart
+            df.writeExcel(file)
+            // SampleEnd
+        }
+    }
+
+    @Test
+    fun writeXlsAppendAndPostProcessing() {
+        useTempFile { file ->
+            // SampleStart
+            /**
+             * Do something with generated sheets. Here we set bold style for headers and italic style for first data column
+             */
+            fun setStyles(sheet: Sheet) {
+                val headerFont = sheet.workbook.createFont()
+                headerFont.bold = true
+                val headerStyle = sheet.workbook.createCellStyle()
+                headerStyle.setFont(headerFont)
+
+                val indexFont = sheet.workbook.createFont()
+                indexFont.italic = true
+                val indexStyle = sheet.workbook.createCellStyle()
+                indexStyle.setFont(indexFont)
+
+                sheet.forEachIndexed { index, row ->
+                    if (index == 0) {
+                        for (cell in row) {
+                            cell.cellStyle = headerStyle
+                        }
+                    } else {
+                        row.first().cellStyle = indexStyle
+                    }
+                }
+            }
+
+            // Create a workbook (or use existing)
+            val wb = WorkbookFactory.create(true)
+
+            // Create different sheets from different dataframes in the workbook
+            val allPersonsSheet = df.writeExcel(wb, sheetName = "allPersons")
+            val happyPersonsSheet =
+                df.filter { person -> person.isHappy }.remove("isHappy").writeExcel(wb, sheetName = "happyPersons")
+            val unhappyPersonsSheet =
+                df.filter { person -> !person.isHappy }.remove("isHappy").writeExcel(wb, sheetName = "unhappyPersons")
+
+            // Do anything you want by POI
+            listOf(happyPersonsSheet, unhappyPersonsSheet).forEach { setStyles(it) }
+
+            // Save the result
+            file.outputStream().use { wb.write(it) }
+            wb.close()
+            // SampleEnd
+        }
+    }
+
+    @Test
+    fun writeArrowFile() {
+        useTempFile { file ->
+            // SampleStart
+            df.writeArrowIPC(file)
+            // or
+            df.writeArrowFeather(file)
+            // SampleEnd
+        }
+    }
+
+    @Test
+    fun writeArrowByteArray() {
+        // SampleStart
+        val ipcByteArray: ByteArray = df.saveArrowIPCToByteArray()
+        // or
+        val featherByteArray: ByteArray = df.saveArrowFeatherToByteArray()
+        // SampleEnd
+    }
+
+    @Test
+    fun writeArrowPerSchema() {
+        useTempFile { file ->
+            val schemaJson =
+                """{
+                  "fields" : [ {
+                    "name" : "name",
+                    "nullable" : true,
+                    "type" : {
+                      "name" : "utf8"
+                    },
+                    "children" : [ ]
+                  }, {
+                    "name" : "age",
+                    "nullable" : false,
+                    "type" : {
+                      "name" : "int",
+                      "bitWidth" : 32,
+                      "isSigned" : true
+                    },
+                    "children" : [ ]
+                  }, {
+                    "name" : "city",
+                    "nullable" : false,
+                    "type" : {
+                      "name" : "utf8"
+                    },
+                    "children" : [ ]
+                  }, {
+                    "name" : "weight",
+                    "nullable" : true,
+                    "type" : {
+                      "name" : "floatingpoint",
+                      "precision" : "DOUBLE"
+                    },
+                    "children" : [ ]
+                  } ]
+                }
+                """
+
+            // SampleStart
+            // Get schema from anywhere you want. It can be deserialized from JSON, generated from another dataset
+            // (including the DataFrame.columns().toArrowSchema() method), created manually, and so on.
+            val schema = Schema.fromJSON(schemaJson)
+
+            df.arrowWriter(
+
+                // Specify your schema
+                targetSchema = schema,
+
+                // Specify desired behavior mode
+                mode = ArrowWriter.Mode(
+                    restrictWidening = true,
+                    restrictNarrowing = true,
+                    strictType = true,
+                    strictNullable = false,
+                ),
+
+                // Specify mismatch subscriber
+                mismatchSubscriber = writeMismatchMessage,
+
+                ).use { writer: ArrowWriter ->
+
+                // Save to any format and sink, like in the previous example
+                writer.writeArrowFeather(file)
+            }
+            // SampleEnd
+        }
+    }
+
+    @Test
+    fun writeXlsWithMultipleSheets() {
+        useTempFile { file ->
+            // SampleStart
+            // Create a new Excel workbook with a single sheet called "allPersons", replacing the file if it already exists -> Current sheets: allPersons
+            df.writeExcel(file, sheetName = "allPersons")
+            // Add a new sheet to the previous file without replacing it, by setting keepFile = true -> Current sheets: allPersons, happyPersons
+            df.filter { person -> person.isHappy }.remove("isHappy")
+                .writeExcel(file, sheetName = "happyPersons", keepFile = true)
+            // Add a new sheet to the previous file without replacing it, by setting keepFile = true -> Current sheets: allPersons, happyPersons, unhappyPersons
+            df.filter { person -> !person.isHappy }.remove("isHappy")
+                .writeExcel(file, sheetName = "unhappyPersons", keepFile = true)
+            // SampleEnd
+        }
+    }
+
+    companion object {
+        private fun useTempFile(action: (File) -> Unit) {
+            val file = kotlin.io.path.createTempFile("dataframeWriteTest")
+            action(file.toFile())
+            file.deleteExisting()
+        }
+    }
+}
@@ -0,0 +1,28 @@
+package org.jetbrains.kotlinx.dataframe.samples.api.collectionsInterop
+
+import org.jetbrains.kotlinx.dataframe.api.associateBy
+import org.jetbrains.kotlinx.dataframe.samples.DataFrameSampleHelper
+import org.jetbrains.kotlinx.dataframe.samples.api.firstName
+import org.jetbrains.kotlinx.dataframe.samples.api.lastName
+import org.jetbrains.kotlinx.dataframe.samples.api.name
+import org.junit.Test
+
+class AssociateBySamples : DataFrameSampleHelper("associateBy", "api/collectionsInterop") {
+
+    private val df = peopleDf
+
+    @Test
+    fun notebook_test_associateBy_1() {
+        // SampleStart
+        df
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_associateBy_2() {
+        // SampleStart
+        df.associateBy { "${name.firstName} ${name.lastName}" }
+        // SampleEnd
+    }
+}
@@ -0,0 +1,29 @@
+package org.jetbrains.kotlinx.dataframe.samples.api.collectionsInterop
+
+import org.jetbrains.kotlinx.dataframe.api.associate
+import org.jetbrains.kotlinx.dataframe.samples.DataFrameSampleHelper
+import org.jetbrains.kotlinx.dataframe.samples.api.age
+import org.jetbrains.kotlinx.dataframe.samples.api.firstName
+import org.jetbrains.kotlinx.dataframe.samples.api.lastName
+import org.jetbrains.kotlinx.dataframe.samples.api.name
+import org.junit.Test
+
+class AssociateSamples : DataFrameSampleHelper("associate", "api/collectionsInterop") {
+
+    private val df = peopleDf
+
+    @Test
+    fun notebook_test_associate_1() {
+        // SampleStart
+        df
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_associate_2() {
+        // SampleStart
+        df.associate { "${name.firstName} ${name.lastName}" to age }
+        // SampleEnd
+    }
+}
@@ -0,0 +1,46 @@
+package org.jetbrains.kotlinx.dataframe.samples.api.column
+
+import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
+import org.jetbrains.kotlinx.dataframe.api.between
+import org.jetbrains.kotlinx.dataframe.api.cast
+import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
+import org.jetbrains.kotlinx.dataframe.samples.DataFrameSampleHelper
+import org.junit.Test
+
+class BetweenSamples : DataFrameSampleHelper("between", "api") {
+
+    @DataSchema
+    interface SimplePerson {
+        val name: String
+        val age: Int
+    }
+
+    private val df = dataFrameOf(
+        "name" to listOf("Alice", "Bob", "Charlie", "Diana"),
+        "age" to listOf(15, 20, 25, 30),
+    ).cast<SimplePerson>()
+
+    @Test
+    fun notebook_test_between_1() {
+        // SampleStart
+        df
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_between_2() {
+        // SampleStart
+        df.age.between(left = 18, right = 25)
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_between_3() {
+        // SampleStart
+        df.age.between(left = 18, right = 25, includeBoundaries = false)
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+}
@@ -0,0 +1,34 @@
+package org.jetbrains.kotlinx.dataframe.samples.api.info
+
+import org.jetbrains.kotlinx.dataframe.api.tail
+import org.jetbrains.kotlinx.dataframe.samples.DataFrameSampleHelper
+import org.junit.Test
+
+class TailSamples : DataFrameSampleHelper("tail", "api") {
+
+    private val df = peopleDf
+
+    @Test
+    fun notebook_test_tail_1() {
+        // SampleStart
+        df
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_tail_2() {
+        // SampleStart
+        df.tail()
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_tail_3() {
+        // SampleStart
+        df.tail(numRows = 2)
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+}
@@ -0,0 +1,315 @@
+package org.jetbrains.kotlinx.dataframe.samples.api.multiple
+
+import org.jetbrains.kotlinx.dataframe.DataFrame
+import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
+import org.jetbrains.kotlinx.dataframe.api.RgbColor
+import org.jetbrains.kotlinx.dataframe.api.and
+import org.jetbrains.kotlinx.dataframe.api.cast
+import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
+import org.jetbrains.kotlinx.dataframe.api.excludeJoin
+import org.jetbrains.kotlinx.dataframe.api.filterJoin
+import org.jetbrains.kotlinx.dataframe.api.format
+import org.jetbrains.kotlinx.dataframe.api.fullJoin
+import org.jetbrains.kotlinx.dataframe.api.innerJoin
+import org.jetbrains.kotlinx.dataframe.api.join
+import org.jetbrains.kotlinx.dataframe.api.leftJoin
+import org.jetbrains.kotlinx.dataframe.api.perRowCol
+import org.jetbrains.kotlinx.dataframe.api.rightJoin
+import org.jetbrains.kotlinx.dataframe.api.with
+import org.jetbrains.kotlinx.dataframe.samples.DataFrameSampleHelper
+import org.jetbrains.kotlinx.dataframe.util.defaultHeaderFormatting
+import org.junit.Test
+
+class JoinSamples : DataFrameSampleHelper("join", "api") {
+
+    @DataSchema
+    interface DfAges {
+        val age: Int
+        val firstName: String
+    }
+
+    private val dfAges = dataFrameOf(
+        "firstName" to listOf("Alice", "Bob", "Charlie"),
+        "age" to listOf(14, 45, 20),
+    ).cast<DfAges>()
+
+    @DataSchema
+    interface DfCities {
+        val city: String
+        val name: String
+    }
+
+    private val dfCities = dataFrameOf(
+        "name" to listOf("Bob", "Alice", "Charlie"),
+        "city" to listOf("London", "Dubai", "Moscow"),
+    ).cast<DfCities>()
+
+    @DataSchema
+    interface DfWithNameAndCity {
+        val name: String
+        val city: String?
+    }
+
+    @DataSchema
+    interface DfLeft : DfWithNameAndCity {
+        val age: Int
+        override val city: String
+        override val name: String
+    }
+
+    private val dfLeft = dataFrameOf(
+        "name" to listOf("Alice", "Bob", "Charlie", "Charlie"),
+        "age" to listOf(15, 45, 20, 40),
+        "city" to listOf("London", "Dubai", "Moscow", "Tokyo"),
+    ).cast<DfLeft>()
+
+    @DataSchema
+    interface DfRight : DfWithNameAndCity {
+        override val city: String?
+        val isBusy: Boolean
+        override val name: String
+    }
+
+    private val dfRight = dataFrameOf(
+        "name" to listOf("Alice", "Bob", "Alice", "Charlie"),
+        "isBusy" to listOf(true, false, true, true),
+        "city" to listOf("London", "Tokyo", null, "Moscow"),
+    ).cast<DfRight>()
+
+    private fun nameToColor(name: String): RgbColor =
+        when (name) {
+            "Alice" -> RgbColor(189, 206, 233)
+            "Bob" -> RgbColor(198, 224, 198)
+            "Charlie" -> RgbColor(219, 198, 230)
+            else -> RgbColor(255, 255, 255)
+        }
+
+    private fun nameAndCityToColor(name: String, city: String?): RgbColor =
+        when (name to city) {
+            "Alice" to "London" -> RgbColor(242, 210, 189)
+            "Bob" to "Dubai" -> RgbColor(245, 226, 191)
+            "Charlie" to "Moscow" -> RgbColor(210, 229, 199)
+            "Charlie" to "Tokyo" -> RgbColor(191, 223, 232)
+            "Bob" to "Tokyo" -> RgbColor(200, 200, 232)
+            "Alice" to null -> RgbColor(233, 199, 220)
+            else -> RgbColor(255, 255, 255)
+        }
+
+    private fun <T> DataFrame<T>.colorized() =
+        format().perRowCol { row, _ ->
+            val color = nameAndCityToColor(row["name"] as String, row["city"] as String?)
+            background(color) and textColor(black)
+        }
+
+    @Test
+    fun notebook_test_join_3() {
+        // SampleStart
+        dfAges
+            // SampleEnd
+            .format().perRowCol { row, _ ->
+                val color = nameToColor(row.firstName)
+                background(color) and textColor(black)
+            }
+            .defaultHeaderFormatting { firstName }
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_join_5() {
+        // SampleStart
+        dfCities
+            // SampleEnd
+            .format().perRowCol { row, _ ->
+                val color = nameToColor(row.name)
+                background(color) and textColor(black)
+            }
+            .defaultHeaderFormatting { name }
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_join_6() {
+        // SampleStart
+        // INNER JOIN on differently named keys:
+        // Merge a row when dfAges.firstName == dfCities.name.
+        // With the given data all 3 names match → all rows merge.
+        dfAges.join(dfCities) { firstName match right.name }
+            // SampleEnd
+            .format().perRowCol { row, _ ->
+                val color = nameToColor(row.firstName)
+                background(color) and textColor(black)
+            }
+            .defaultHeaderFormatting { firstName }
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_join_8() {
+        // SampleStart
+        dfLeft
+            // SampleEnd
+            .colorized()
+            .defaultHeaderFormatting { name }
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_join_10() {
+        // SampleStart
+        dfRight
+            // SampleEnd
+            .colorized()
+            .defaultHeaderFormatting { name }
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_join_11() {
+        // SampleStart
+        // INNER JOIN on "name" only:
+        // Merge when left.name == right.name.
+        // Duplicate keys produce multiple merged rows (one per pairing).
+        dfLeft.join(dfRight) { name }
+            // SampleEnd
+            .colorized()
+            .defaultHeaderFormatting { name }
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun dfLeftImplicit() {
+        // SampleStart
+        dfLeft
+            // SampleEnd
+            .colorized()
+            .defaultHeaderFormatting { name and city }
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun dfRightImplicit() {
+        // SampleStart
+        dfRight
+            // SampleEnd
+            .colorized()
+            .defaultHeaderFormatting { name and city }
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_join_12() {
+        // SampleStart
+        // INNER JOIN on all same-named columns ("name" and "city"):
+        // Merge when BOTH name AND city are equal; otherwise the row is dropped.
+        dfLeft.join(dfRight)
+            // SampleEnd
+            .colorized()
+            .defaultHeaderFormatting { "name" and "city" }
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_join_13() {
+        // SampleStart
+        dfLeft
+            // SampleEnd
+            .colorized()
+            .defaultHeaderFormatting { name and city }
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_join_14() {
+        // SampleStart
+        dfRight
+            // SampleEnd
+            .colorized()
+            .defaultHeaderFormatting { name and city }
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_join_15() {
+        // SampleStart
+        // INNER JOIN:
+        // Combines columns from the left and right dataframes
+        // and keep only rows where (name, city) matches on both sides.
+        dfLeft.innerJoin(dfRight) { name and city }
+            // SampleEnd
+            .colorized()
+            .defaultHeaderFormatting { name and city }
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_join_16() {
+        // SampleStart
+        // FILTER JOIN:
+        // Keep ONLY left rows that have ANY match on (name, city).
+        // No right-side columns are added.
+        dfLeft.filterJoin(dfRight) { name and city }
+            // SampleEnd
+            .colorized()
+            .defaultHeaderFormatting { name and city }
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_join_17() {
+        // SampleStart
+        // LEFT JOIN:
+        // Keep ALL left rows and add columns from the right dataframe.
+        // If (name, city) matches, attach right columns values from
+        // the corresponding row in the right dataframe;
+        // if not (e.g. ("Bob", "Dubai") row), fill them with `null`.
+        dfLeft.leftJoin(dfRight) { name and city }
+            // SampleEnd
+            .colorized()
+            .format { all() except (name and city) }.with { if (it == null) bold else null }
+            .defaultHeaderFormatting { name and city }
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_join_18() {
+        // SampleStart
+        // RIGHT JOIN:
+        // Keep ALL right rows and add columns from the left dataframe.
+        // If (name, city) matches, attach left columns values from
+        // the corresponding row in the left dataframe;
+        // if not (e.g. ("Bob", "Tokyo") row), fill them with `null`.
+        dfLeft.rightJoin(dfRight) { name and city }
+            // SampleEnd
+            .colorized()
+            .format { all() except (name and city) }.with { if (it == null) bold else null }
+            .defaultHeaderFormatting { name and city }
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_join_19() {
+        // SampleStart
+        // FULL JOIN:
+        // Keep ALL rows from both sides. Where there's no match on (name, city),
+        // the other side is filled with nulls.
+        dfLeft.fullJoin(dfRight) { name and city }
+            // SampleEnd
+            .colorized()
+            .format { all() except (name and city) }.with { if (it == null) bold else null }
+            .defaultHeaderFormatting { name and city }
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_join_20() {
+        // SampleStart
+        // EXCLUDE JOIN:
+        // Keep ONLY left rows that have NO match on (name, city).
+        // Useful to find "unpaired" left rows.
+        dfLeft.excludeJoin(dfRight) { name and city }
+            // SampleEnd
+            .colorized()
+            .defaultHeaderFormatting { name and city }
+            .saveDfHtmlSample()
+    }
+}
@@ -0,0 +1,45 @@
+package org.jetbrains.kotlinx.dataframe.samples.api.render
+
+import org.jetbrains.kotlinx.dataframe.DataRow
+import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
+import org.jetbrains.kotlinx.dataframe.api.cast
+import org.jetbrains.kotlinx.dataframe.api.colsOf
+import org.jetbrains.kotlinx.dataframe.api.formatHeader
+import org.jetbrains.kotlinx.dataframe.api.with
+import org.jetbrains.kotlinx.dataframe.samples.DataFrameSampleHelper
+import org.junit.Test
+
+class FormatHeaderSamples : DataFrameSampleHelper("format", "api") {
+    val df = peopleDf.cast<Person>()
+
+    @DataSchema
+    interface Name {
+        val firstName: String
+        val lastName: String
+    }
+
+    @DataSchema
+    interface Person {
+        val age: Int
+        val city: String?
+        val name: DataRow<Name>
+        val weight: Int?
+        val isHappy: Boolean
+    }
+
+    @Test
+    fun formatHeader() {
+        // SampleStart
+        df
+            // Format all column headers with bold
+            .formatHeader().with { bold }
+            // Format the "name" column (including nested) header with red text
+            .formatHeader { name }.with { textColor(red) }
+            // Override "name"/"lastName" column formating header with blue text
+            .formatHeader { name.lastName }.with { textColor(blue) }
+            // Format all numeric column headers with underlines
+            .formatHeader { colsOf<Number?>() }.with { underline }
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+}
@@ -0,0 +1,37 @@
+package org.jetbrains.kotlinx.dataframe.samples.api.utils
+
+import org.jetbrains.kotlinx.dataframe.api.all
+import org.jetbrains.kotlinx.dataframe.api.cast
+import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
+import org.jetbrains.kotlinx.dataframe.samples.DataFrameSampleHelper
+import org.junit.Test
+
+class AllSamples : DataFrameSampleHelper("all", "api") {
+
+    private val df = dataFrameOf(
+        "name" to listOf("Alice", "Bob"),
+        "age" to listOf(15, 20),
+    ).cast<AnySamples.SimplePerson>()
+
+    @Test
+    fun notebook_test_all_3() {
+        // SampleStart
+        df
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_all_4() {
+        // SampleStart
+        df.all { age > 21 }
+        // SampleEnd
+    }
+
+    @Test
+    fun notebook_test_all_5() {
+        // SampleStart
+        df.all { name.first().isUpperCase() && age >= 15 }
+        // SampleEnd
+    }
+}
@@ -0,0 +1,44 @@
+package org.jetbrains.kotlinx.dataframe.samples.api.utils
+
+import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
+import org.jetbrains.kotlinx.dataframe.api.any
+import org.jetbrains.kotlinx.dataframe.api.cast
+import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
+import org.jetbrains.kotlinx.dataframe.samples.DataFrameSampleHelper
+import org.junit.Test
+
+class AnySamples : DataFrameSampleHelper("any", "api") {
+
+    @DataSchema
+    interface SimplePerson {
+        val name: String
+        val age: Int
+    }
+
+    private val df = dataFrameOf(
+        "name" to listOf("Alice", "Bob"),
+        "age" to listOf(15, 20),
+    ).cast<SimplePerson>()
+
+    @Test
+    fun notebook_test_any_3() {
+        // SampleStart
+        df
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_any_4() {
+        // SampleStart
+        df.any { age > 21 }
+        // SampleEnd
+    }
+
+    @Test
+    fun notebook_test_any_5() {
+        // SampleStart
+        df.any { age == 15 && name == "Alice" }
+        // SampleEnd
+    }
+}
@@ -0,0 +1,46 @@
+package org.jetbrains.kotlinx.dataframe.samples.api.utils
+
+import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
+import org.jetbrains.kotlinx.dataframe.api.cast
+import org.jetbrains.kotlinx.dataframe.api.chunked
+import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
+import org.jetbrains.kotlinx.dataframe.samples.DataFrameSampleHelper
+import org.junit.Test
+
+class ChunkedSamples : DataFrameSampleHelper("chunked", "api") {
+
+    @DataSchema
+    interface SimplePerson {
+        val name: String
+        val age: Int
+    }
+
+    private val df = dataFrameOf(
+        "name" to listOf("Alice", "Bob", "Charlie", "Diana", "Eve"),
+        "age" to listOf(15, 20, 25, 30, 35),
+    ).cast<SimplePerson>()
+
+    @Test
+    fun notebook_test_chunked_1() {
+        // SampleStart
+        df
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_chunked_2() {
+        // SampleStart
+        df.chunked(size = 2)
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_chunked_3() {
+        // SampleStart
+        df.chunked(startIndices = listOf(0, 1, 3), name = "segments")
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+}
@@ -0,0 +1,39 @@
+package org.jetbrains.kotlinx.dataframe.samples.api.utils
+
+import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
+import org.jetbrains.kotlinx.dataframe.api.cast
+import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
+import org.jetbrains.kotlinx.dataframe.api.shuffle
+import org.jetbrains.kotlinx.dataframe.samples.DataFrameSampleHelper
+import org.junit.Test
+import kotlin.random.Random
+
+class ShuffleSamples : DataFrameSampleHelper("shuffle", "api") {
+
+    @DataSchema
+    interface SimplePerson {
+        val name: String
+        val age: Int
+    }
+
+    private val df = dataFrameOf(
+        "name" to listOf("Alice", "Bob", "Charlie", "Diana", "Eve"),
+        "age" to listOf(15, 20, 25, 30, 35),
+    ).cast<SimplePerson>()
+
+    @Test
+    fun notebook_test_shuffle_1() {
+        // SampleStart
+        df
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_shuffle_2() {
+        // SampleStart
+        df.shuffle(Random(42))
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+}
@@ -0,0 +1,246 @@
+@file:Suppress("PropertyName", "UNUSED_VARIABLE", "UNUSED_EXPRESSION", "UNCHECKED_CAST")
+
+package org.jetbrains.kotlinx.dataframe.samples.guides
+
+import org.jetbrains.kotlinx.dataframe.DataFrame
+import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
+import org.jetbrains.kotlinx.dataframe.api.add
+import org.jetbrains.kotlinx.dataframe.api.aggregate
+import org.jetbrains.kotlinx.dataframe.api.cast
+import org.jetbrains.kotlinx.dataframe.api.convert
+import org.jetbrains.kotlinx.dataframe.api.count
+import org.jetbrains.kotlinx.dataframe.api.describe
+import org.jetbrains.kotlinx.dataframe.api.filter
+import org.jetbrains.kotlinx.dataframe.api.groupBy
+import org.jetbrains.kotlinx.dataframe.api.into
+import org.jetbrains.kotlinx.dataframe.api.maxOf
+import org.jetbrains.kotlinx.dataframe.api.rename
+import org.jetbrains.kotlinx.dataframe.api.select
+import org.jetbrains.kotlinx.dataframe.api.sortByDesc
+import org.jetbrains.kotlinx.dataframe.api.sumOf
+import org.jetbrains.kotlinx.dataframe.api.take
+import org.jetbrains.kotlinx.dataframe.api.update
+import org.jetbrains.kotlinx.dataframe.api.with
+import org.jetbrains.kotlinx.dataframe.io.readCsv
+import org.jetbrains.kotlinx.dataframe.io.writeExcel
+import org.jetbrains.kotlinx.dataframe.samples.DataFrameSampleHelper
+import org.jetbrains.kotlinx.kandy.dsl.plot
+import org.jetbrains.kotlinx.kandy.letsplot.feature.layout
+import org.jetbrains.kotlinx.kandy.letsplot.layers.bars
+import org.junit.Ignore
+import org.junit.Test
+import java.net.URL
+
+class QuickStartGuide : DataFrameSampleHelper("quickstart", "guides") {
+
+    @DataSchema
+    interface Repositories {
+        val html_url: URL
+        val watchers: Int
+        val full_name: String
+        val stargazers_count: Int
+        val topics: String
+    }
+
+    private val df = DataFrame.readCsv(
+        "https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv",
+    ).cast<Repositories>()
+
+    private fun getDfSelected() = df.select { full_name and stargazers_count and topics }
+
+    private fun getDfFiltered() =
+        getDfSelected()
+            .filter { stargazers_count >= 1000 }
+
+    private fun getDfRenamed() =
+        getDfFiltered()
+            .rename { full_name }.into("name")
+            // And "stargazers_count" into "starsCount"
+            .rename { stargazers_count }.into("starsCount")
+
+    private fun getDfUpdated() =
+        getDfRenamed()
+            // Update "name" values with only its second part (after '/')
+            .update { name }.with { it.split("/")[1] }
+            // Convert "topics" `String` values into `List<String>` by splitting:
+            .convert { topics }.with { it.removePrefix("[").removeSuffix("]").split(", ") }
+
+    private fun getDfWithIsIntellij() =
+        getDfUpdated()
+            .add("isIntellij") {
+                name.contains("intellij") || "intellij" in topics
+            }
+
+    private fun getGroupedByIsIntellij() =
+        getDfWithIsIntellij()
+            .groupBy { isIntellij }
+
+    private fun getDfTop10() =
+        getDfWithIsIntellij()
+            // Sort by "starsCount" value descending
+            .sortByDesc { starsCount }.take(10)
+
+    @Test
+    fun notebook_test_quickstart_2() {
+        // SampleStart
+        val df = DataFrame.readCsv(
+            "https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv",
+        )
+        // SampleEnd
+    }
+
+    @Test
+    fun notebook_test_quickstart_3() {
+        // SampleStart
+        df
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_quickstart_4() {
+        // SampleStart
+        df.describe()
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_quickstart_5() {
+        // SampleStart
+        // Select "full_name", "stargazers_count" and "topics" columns
+        val dfSelected = df.select { full_name and stargazers_count and topics }
+        dfSelected
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_quickstart_6() {
+        val dfSelected = getDfSelected()
+        // SampleStart
+        // Keep only rows where "stargazers_count" value is more than 1000
+        val dfFiltered = dfSelected.filter { stargazers_count >= 1000 }
+        dfFiltered
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_quickstart_7() {
+        val dfFiltered = getDfFiltered()
+        // SampleStart
+        // Rename "full_name" column into "name"
+        val dfRenamed = dfFiltered.rename { full_name }.into("name")
+            // And "stargazers_count" into "starsCount"
+            .rename { stargazers_count }.into("starsCount")
+        dfRenamed
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_quickstart_8() {
+        val dfRenamed = getDfRenamed()
+        // SampleStart
+        val dfUpdated = dfRenamed
+            // Update "name" values with only its second part (after '/')
+            .update { name }.with { it.split("/")[1] }
+            // Convert "topics" `String` values into `List<String>` by splitting:
+            .convert { topics }.with { it.removePrefix("[").removeSuffix("]").split(", ") }
+        dfUpdated
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_quickstart_9() {
+        val dfUpdated = getDfUpdated()
+        // SampleStart
+        dfUpdated.topics.type()
+        // SampleEnd
+    }
+
+    @Test
+    fun notebook_test_quickstart_10() {
+        val dfUpdated = getDfUpdated()
+        // SampleStart
+        // Add a `Boolean` column indicating whether the `name` contains the "intellij" substring
+        // or the topics include "intellij".
+        val dfWithIsIntellij = dfUpdated.add("isIntellij") {
+            name.contains("intellij") || "intellij" in topics
+        }
+        dfWithIsIntellij
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_quickstart_11() {
+        val dfWithIsIntellij = getDfWithIsIntellij()
+        // SampleStart
+        val groupedByIsIntellij = dfWithIsIntellij.groupBy { isIntellij }
+        groupedByIsIntellij
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_quickstart_12() {
+        val groupedByIsIntellij = getGroupedByIsIntellij()
+        // SampleStart
+        groupedByIsIntellij.count()
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_quickstart_13() {
+        val groupedByIsIntellij = getGroupedByIsIntellij()
+        // SampleStart
+        groupedByIsIntellij.aggregate {
+            // Compute sum and max of "starsCount" within each group into "sumStars" and "maxStars" columns
+            sumOf { starsCount } into "sumStars"
+            maxOf { starsCount } into "maxStars"
+        }
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_quickstart_14() {
+        val dfWithIsIntellij = getDfWithIsIntellij()
+        // SampleStart
+        val dfTop10 = dfWithIsIntellij
+            // Sort by "starsCount" value descending
+            .sortByDesc { starsCount }.take(10)
+        dfTop10
+            // SampleEnd
+            .saveDfHtmlSample()
+    }
+
+    @Test
+    fun notebook_test_quickstart_16() {
+        val dfTop10 = getDfTop10()
+        // SampleStart
+        dfTop10.plot {
+            bars {
+                x(name)
+                y(starsCount)
+            }
+
+            layout.title = "Top 10 JetBrains repositories by stars count"
+        }
+            // SampleEnd
+            .savePlotSVGSample()
+    }
+
+    @Ignore
+    @Test
+    fun notebook_test_quickstart_17() {
+        val dfWithIsIntellij = getDfWithIsIntellij()
+        // SampleStart
+        dfWithIsIntellij.writeExcel("jb_repos.xlsx")
+        // SampleEnd
+    }
+}
@@ -0,0 +1,48 @@
+package org.jetbrains.kotlinx.dataframe.samples.io
+
+import org.jetbrains.kotlinx.dataframe.DataFrame
+import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig
+import org.jetbrains.kotlinx.dataframe.io.readSqlQuery
+import org.jetbrains.kotlinx.dataframe.io.readSqlTable
+import org.junit.Ignore
+import org.junit.Test
+import java.sql.DriverManager
+
+class DuckDb {
+
+    @Ignore
+    @Test
+    fun readSqlTable() {
+        // SampleStart
+        val url = "jdbc:duckdb:/testDatabase"
+        val username = "duckdb"
+        val password = "password"
+
+        val dbConfig = DbConnectionConfig(url, username, password)
+
+        val tableName = "Customer"
+
+        val df = DataFrame.readSqlTable(dbConfig, tableName)
+        // SampleEnd
+    }
+
+    // source: https://duckdb.org/docs/stable/core_extensions/iceberg/overview.html
+    @Ignore
+    @Test
+    fun readIcebergExtension() {
+        // SampleStart
+        // Creating an in-memory DuckDB database
+        val connection = DriverManager.getConnection("jdbc:duckdb:")
+        val df = connection.use { connection ->
+            // install and load Iceberg
+            connection.createStatement().execute("INSTALL iceberg; LOAD iceberg;")
+
+            // query a table from Iceberg using a specific SQL query
+            DataFrame.readSqlQuery(
+                connection = connection,
+                sqlQuery = "SELECT * FROM iceberg_scan('data/iceberg/lineitem_iceberg', allow_moved_paths = true);",
+            )
+        }
+        // SampleEnd
+    }
+}
@@ -0,0 +1,78 @@
+package org.jetbrains.kotlinx.dataframe.samples.io
+
+import io.kotest.matchers.shouldBe
+import org.jetbrains.kotlinx.dataframe.DataFrame
+import org.jetbrains.kotlinx.dataframe.api.NullabilityOptions
+import org.jetbrains.kotlinx.dataframe.io.readParquet
+import org.jetbrains.kotlinx.dataframe.testParquet
+import org.junit.Test
+import java.io.File
+import java.nio.file.Paths
+
+class Parquet {
+    @Test
+    fun readParquetURL() {
+        val url = testParquet("sales")
+
+        // SampleStart
+        // Read from URLs
+        val df = DataFrame.readParquet(url)
+        // SampleEnd
+        df.rowsCount() shouldBe 300
+        df.columnsCount() shouldBe 20
+    }
+
+    @Test
+    fun readParquetFilePath() {
+        val url = testParquet("sales")
+        val path = Paths.get(url.toURI())
+        // SampleStart
+        val df = DataFrame.readParquet(path)
+        // SampleEnd
+        df.rowsCount() shouldBe 300
+        df.columnsCount() shouldBe 20
+    }
+
+    @Test
+    fun readParquetFile() {
+        val url = testParquet("sales")
+        val file = File(url.toURI())
+
+        // SampleStart
+        // Read from File objects
+        val df = DataFrame.readParquet(file)
+        // SampleEnd
+        df.rowsCount() shouldBe 300
+        df.columnsCount() shouldBe 20
+    }
+
+    @Test
+    fun readParquetFileWithParameters() {
+        val url = testParquet("sales")
+        val file = File(url.toURI())
+
+        // SampleStart
+        val df = DataFrame.readParquet(
+            file,
+            nullability = NullabilityOptions.Infer,
+            batchSize = 64L * 1024,
+        )
+        // SampleEnd
+        df.rowsCount() shouldBe 300
+        df.columnsCount() shouldBe 20
+    }
+
+    @Test
+    fun readMultipleParquetFiles() {
+        val url = testParquet("sales")
+        val file = File(url.toURI())
+        val file1 = File(url.toURI())
+        val file2 = File(url.toURI())
+
+        // SampleStart
+        val df = DataFrame.readParquet(file, file1, file2)
+        // SampleEnd
+        df.rowsCount() shouldBe 900
+        df.columnsCount() shouldBe 20
+    }
+}
@@ -0,0 +1,13 @@
+package org.jetbrains.kotlinx.dataframe
+
+import java.net.URL
+
+fun testResource(resourcePath: String): URL = object { }::class.java.classLoader.getResource(resourcePath)!!
+
+fun testCsv(csvName: String) = testResource("$csvName.csv")
+
+fun testJson(jsonName: String) = testResource("$jsonName.json")
+
+fun testArrowFeather(name: String) = testResource("$name.feather")
+
+fun testParquet(name: String) = testResource("$name.parquet")
@@ -0,0 +1,60 @@
+package org.jetbrains.kotlinx.dataframe.util
+
+import org.jetbrains.kotlinx.dataframe.ColumnsSelector
+import org.jetbrains.kotlinx.dataframe.DataFrame
+import org.jetbrains.kotlinx.dataframe.api.CellAttributes
+import org.jetbrains.kotlinx.dataframe.api.FormattedFrame
+import org.jetbrains.kotlinx.dataframe.api.FormattingDsl
+import org.jetbrains.kotlinx.dataframe.api.and
+import org.jetbrains.kotlinx.dataframe.api.formatHeader
+import org.jetbrains.kotlinx.dataframe.api.getColumnsWithPaths
+import org.jetbrains.kotlinx.dataframe.api.with
+
+internal val baseColorSet = listOf(
+    FormattingDsl.rgb(244, 67, 54), // red
+    FormattingDsl.rgb(33, 150, 243), // blue
+    FormattingDsl.rgb(76, 175, 80), // green
+    FormattingDsl.rgb(255, 152, 0), // orange
+    FormattingDsl.rgb(156, 39, 176), // purple
+    FormattingDsl.rgb(0, 150, 136), // teal
+    FormattingDsl.rgb(233, 30, 99), // pink/magenta
+)
+
+internal val FormattingDsl.monospace: CellAttributes
+    get() = attr(
+        "font-family",
+        "ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, 'Liberation Mono', 'Courier New', monospace",
+    )
+
+internal fun <T> DataFrame<T>.defaultHeaderFormatting(headers: ColumnsSelector<T, *>): FormattedFrame<T> {
+    val columns = getColumnsWithPaths(headers)
+    require(columns.size <= baseColorSet.size) {
+        "Too many headers: ${columns.size}. Max supported is ${baseColorSet.size}."
+    }
+
+    val start = formatHeader().with { null }
+
+    return columns.foldIndexed(start) { idx, acc, header ->
+        acc.formatHeader { header }
+            .with {
+                textColor(baseColorSet[idx]) and monospace
+            }
+    }
+}
+
+@Suppress("INVISIBLE_REFERENCE")
+internal fun <T> FormattedFrame<T>.defaultHeaderFormatting(headers: ColumnsSelector<T, *>): FormattedFrame<T> {
+    val columns = df.getColumnsWithPaths(headers)
+    require(columns.size <= baseColorSet.size) {
+        "Too many headers: ${columns.size}. Max supported is ${baseColorSet.size}."
+    }
+
+    val start = formatHeader().with { null }
+
+    return columns.foldIndexed(start) { idx, acc, header ->
+        acc.formatHeader { header }
+            .with {
+                textColor(baseColorSet[idx]) and monospace
+            }
+    }
+}
@@ -0,0 +1,3 @@
+date
+13/Jan/23 11:49 AM
+14/Mar/23 5:35 PM
@@ -0,0 +1,3 @@
+colName
+"48,000"
+"47,302"
@@ -0,0 +1,6 @@
+[
+  { "A": "1", "B": 1, "C": 1.0, "D": true },
+  { "A": "2", "B": 2, "C": 1.1, "D": null },
+  { "A": "3", "B": 3, "C": 1, "D": false },
+  { "A": "4", "B": 4, "C": 1.3, "D": true }
+]
@@ -0,0 +1 @@
+{ "A": "1", "B": 1, "C": 1.0, "D": true }
@@ -0,0 +1,3 @@
+12|tuv|0.12|true
+41|xyz|3.6|not assigned
+89|abc|7.1|false