init research

2026-02-08 11:20:43 -10:00
commit bdf064f54d
3041 changed files with 1592200 additions and 0 deletions
@@ -0,0 +1,39 @@
+import org.jetbrains.kotlin.gradle.dsl.JvmTarget
+
+plugins {
+    application
+    kotlin("jvm")
+
+    // uses the 'old' Gradle plugin instead of the compiler plugin for now
+    id("org.jetbrains.kotlinx.dataframe")
+
+    // only mandatory if `kotlin.dataframe.add.ksp=false` in gradle.properties
+    id("com.google.devtools.ksp")
+}
+
+repositories {
+    mavenLocal() // in case of local dataframe development
+    mavenCentral()
+}
+
+dependencies {
+    // implementation("org.jetbrains.kotlinx:dataframe:X.Y.Z")
+    implementation(project(":"))
+
+    // multik support
+    implementation(libs.multik.core)
+    implementation(libs.multik.default)
+}
+
+kotlin {
+    compilerOptions {
+        jvmTarget = JvmTarget.JVM_1_8
+        freeCompilerArgs.add("-Xjdk-release=8")
+    }
+}
+
+tasks.withType<JavaCompile> {
+    sourceCompatibility = JavaVersion.VERSION_1_8.toString()
+    targetCompatibility = JavaVersion.VERSION_1_8.toString()
+    options.release.set(8)
+}
@@ -0,0 +1,374 @@
+@file:OptIn(ExperimentalTypeInference::class)
+
+package org.jetbrains.kotlinx.dataframe.examples.multik
+
+import org.jetbrains.kotlinx.dataframe.AnyFrame
+import org.jetbrains.kotlinx.dataframe.ColumnSelector
+import org.jetbrains.kotlinx.dataframe.ColumnsSelector
+import org.jetbrains.kotlinx.dataframe.DataColumn
+import org.jetbrains.kotlinx.dataframe.DataFrame
+import org.jetbrains.kotlinx.dataframe.api.ValueProperty
+import org.jetbrains.kotlinx.dataframe.api.cast
+import org.jetbrains.kotlinx.dataframe.api.colsOf
+import org.jetbrains.kotlinx.dataframe.api.column
+import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
+import org.jetbrains.kotlinx.dataframe.api.getColumn
+import org.jetbrains.kotlinx.dataframe.api.getColumns
+import org.jetbrains.kotlinx.dataframe.api.map
+import org.jetbrains.kotlinx.dataframe.api.named
+import org.jetbrains.kotlinx.dataframe.api.toColumn
+import org.jetbrains.kotlinx.dataframe.api.toColumnGroup
+import org.jetbrains.kotlinx.dataframe.api.toDataFrame
+import org.jetbrains.kotlinx.dataframe.columns.BaseColumn
+import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
+import org.jetbrains.kotlinx.multik.api.mk
+import org.jetbrains.kotlinx.multik.api.ndarray
+import org.jetbrains.kotlinx.multik.ndarray.complex.Complex
+import org.jetbrains.kotlinx.multik.ndarray.data.D1Array
+import org.jetbrains.kotlinx.multik.ndarray.data.D2Array
+import org.jetbrains.kotlinx.multik.ndarray.data.D3Array
+import org.jetbrains.kotlinx.multik.ndarray.data.MultiArray
+import org.jetbrains.kotlinx.multik.ndarray.data.NDArray
+import org.jetbrains.kotlinx.multik.ndarray.data.get
+import org.jetbrains.kotlinx.multik.ndarray.operations.toList
+import org.jetbrains.kotlinx.multik.ndarray.operations.toListD2
+import kotlin.experimental.ExperimentalTypeInference
+import kotlin.reflect.KClass
+import kotlin.reflect.KType
+import kotlin.reflect.full.isSubtypeOf
+import kotlin.reflect.typeOf
+
+// region 1D
+
+/** Converts a one-dimensional array ([D1Array]) to a [DataColumn] with optional [name]. */
+inline fun <reified N> D1Array<N>.convertToColumn(name: String = ""): DataColumn<N> {
+    // we can simply convert the 1D array to a typed list and create a typed column from it
+    // by using the reified type parameter, DataFrame needs to do no inference :)
+    val values = this.toList()
+    return column<N>(values) named name
+}
+
+/**
+ * Converts a one-dimensional array ([D1Array]) of type [N] into a DataFrame.
+ * The resulting DataFrame contains a single column named "value", where each element of the array becomes a row in the DataFrame.
+ *
+ * @return a DataFrame where each element of the source array is represented as a row in a column named "value" under the schema [ValueProperty].
+ */
+@JvmName("convert1dArrayToDataFrame")
+inline fun <reified N> D1Array<N>.convertToDataFrame(): DataFrame<ValueProperty<N>> {
+    // do the conversion like above, but name the column "value"...
+    val column = this.convertToColumn(ValueProperty<*>::value.name)
+    // ...so we can cast it to a ValueProperty DataFrame
+    return dataFrameOf(column).cast<ValueProperty<N>>()
+}
+
+/** Converts a [DataColumn] to a one-dimensional array ([D1Array]). */
+@JvmName("convertNumberColumnToMultik")
+inline fun <reified N> DataColumn<N>.convertToMultik(): D1Array<N> where N : Number, N : Comparable<N> {
+    // we can convert our column to a typed list again to convert it to a multik array
+    val values = this.toList()
+    return mk.ndarray(values)
+}
+
+/** Converts a [DataColumn] to a one-dimensional array ([D1Array]). */
+@JvmName("convertComplexColumnToMultik")
+inline fun <reified N : Complex> DataColumn<N>.convertToMultik(): D1Array<N> {
+    // we can convert our column to a typed list again to convert it to a multik array
+    val values = this.toList()
+    return mk.ndarray(values)
+}
+
+/** Converts a [DataColumn] selected by [column] to a one-dimensional array ([D1Array]). */
+@JvmName("convertNumberColumnFromDfToMultik")
+@OverloadResolutionByLambdaReturnType
+inline fun <T, reified N> DataFrame<T>.convertToMultik(
+    crossinline column: ColumnSelector<T, N>,
+): D1Array<N>
+    where N : Number, N : Comparable<N> {
+    // use the selector to get the column from this DataFrame and convert it
+    val col = this.getColumn { column(it) }
+    return col.convertToMultik()
+}
+
+/** Converts a [DataColumn] selected by [column] to a one-dimensional array ([D1Array]). */
+@JvmName("convertComplexColumnFromDfToMultik")
+@OverloadResolutionByLambdaReturnType
+inline fun <T, reified N : Complex> DataFrame<T>.convertToMultik(crossinline column: ColumnSelector<T, N>): D1Array<N> {
+    // use the selector to get the column from this DataFrame and convert it
+    val col = this.getColumn { column(it) }
+    return col.convertToMultik()
+}
+
+// endregion
+
+// region 2D
+
+/**
+ * Converts a two-dimensional array ([D2Array]) to a DataFrame.
+ * It will contain `shape[0]` rows and `shape[1]` columns.
+ *
+ * Column names can be specified using the [columnNameGenerator] lambda.
+ *
+ * The conversion enforces that `multikArray[x][y] == dataframe[x][y]`
+ */
+@JvmName("convert2dArrayToDataFrame")
+inline fun <reified N> D2Array<N>.convertToDataFrame(columnNameGenerator: (Int) -> String = { "col$it" }): AnyFrame {
+    // Turning the 2D array into a list of typed columns first, no inference needed
+    val columns: List<DataColumn<N>> = List(shape[1]) { i ->
+        this[0..<shape[0], i] // get all cells of column i
+            .toList()
+            .toColumn<N>(name = columnNameGenerator(i))
+    }
+    // and make a DataFrame from it
+    return columns.toDataFrame()
+}
+
+/**
+ * Converts a [DataFrame] to a two-dimensional array ([D2Array]).
+ * You'll need to specify which columns to convert using the [columns] selector.
+ *
+ * All columns need to be of the same type. If no columns are supplied, the function
+ * will only succeed if all columns are of the same type.
+ *
+ * @see convertToMultikOf
+ */
+@JvmName("convertNumberColumnsFromDfToMultik")
+@OverloadResolutionByLambdaReturnType
+inline fun <T, reified N> DataFrame<T>.convertToMultik(
+    crossinline columns: ColumnsSelector<T, N>,
+): D2Array<N>
+    where N : Number, N : Comparable<N> {
+    // use the selector to get the columns from this DataFrame and convert them
+    val cols = this.getColumns { columns(it) }
+    return cols.convertToMultik()
+}
+
+/**
+ * Converts a [DataFrame] to a two-dimensional array ([D2Array]).
+ * You'll need to specify which columns to convert using the [columns] selector.
+ *
+ * All columns need to be of the same type. If no columns are supplied, the function
+ * will only succeed if all columns are of the same type.
+ *
+ * @see convertToMultikOf
+ */
+@JvmName("convertComplexColumnsFromDfToMultik")
+@OverloadResolutionByLambdaReturnType
+inline fun <T, reified N : Complex> DataFrame<T>.convertToMultik(
+    crossinline columns: ColumnsSelector<T, N>,
+): D2Array<N> {
+    // use the selector to get the columns from this DataFrame and convert them
+    val cols = this.getColumns { columns(it) }
+    return cols.convertToMultik()
+}
+
+/**
+ * Converts a [DataFrame] to a two-dimensional array ([D2Array]).
+ * You'll need to specify which columns to convert using the `columns` selector.
+ *
+ * All columns need to be of the same type. If no columns are supplied, the function
+ * will only succeed if all columns in [this] are of the same type.
+ *
+ * @see convertToMultikOf
+ */
+@JvmName("convertToMultikGuess")
+fun AnyFrame.convertToMultik(): D2Array<*> {
+    val columnTypes = this.columnTypes().distinct()
+    val type = columnTypes.singleOrNull() ?: error("found multiple column types: $columnTypes")
+    return when {
+        type == typeOf<Complex>() -> convertToMultik { colsOf<Complex>() }
+        type.isSubtypeOf(typeOf<Byte>()) -> convertToMultik { colsOf<Byte>() }
+        type.isSubtypeOf(typeOf<Short>()) -> convertToMultik { colsOf<Short>() }
+        type.isSubtypeOf(typeOf<Int>()) -> convertToMultik { colsOf<Int>() }
+        type.isSubtypeOf(typeOf<Long>()) -> convertToMultik { colsOf<Long>() }
+        type.isSubtypeOf(typeOf<Float>()) -> convertToMultik { colsOf<Float>() }
+        type.isSubtypeOf(typeOf<Double>()) -> convertToMultik { colsOf<Double>() }
+        else -> error("found multiple column types: $columnTypes")
+    }
+}
+
+/**
+ * Converts a [DataFrame] to a two-dimensional array ([D2Array]) by taking all
+ * columns of type [N].
+ *
+ * Allows you to write `df.convertToMultikOf<Complex>()`.
+ *
+ * @see convertToMultik
+ */
+@JvmName("convertToMultikOfComplex")
+@Suppress("LocalVariableName")
+inline fun <reified N : Complex> AnyFrame.convertToMultikOf(
+    // unused param to avoid overload resolution ambiguity
+    _klass: KClass<Complex> = Complex::class,
+): D2Array<N> =
+    convertToMultik { colsOf<N>() }
+
+/**
+ * Converts a [DataFrame] to a two-dimensional array ([D2Array]) by taking all
+ * columns of type [N].
+ *
+ * Allows you to write `df.convertToMultikOf<Int>()`.
+ *
+ * @see convertToMultik
+ */
+@JvmName("convertToMultikOfNumber")
+@Suppress("LocalVariableName")
+inline fun <reified N> AnyFrame.convertToMultikOf(
+    // unused param to avoid overload resolution ambiguity
+    _klass: KClass<Number> = Number::class,
+): D2Array<N> where N : Number, N : Comparable<N> = convertToMultik { colsOf<N>() }
+
+/**
+ * Helper function to convert a list of same-typed [DataColumn]s to a two-dimensional array ([D2Array]).
+ * We cannot enforce all columns have the same type if we require just a [DataFrame].
+ */
+@Suppress("UNCHECKED_CAST")
+@JvmName("convertNumberColumnsToMultik")
+inline fun <reified N> List<DataColumn<N>>.convertToMultik(): D2Array<N> where N : Number, N : Comparable<N> {
+    // to get the list of columns as a list of rows, we need to convert them back to a dataframe first,
+    // then we can get the values of each row
+    val rows = this.toDataFrame().map { row -> row.values() as List<N> }
+    return mk.ndarray(rows)
+}
+
+/**
+ * Helper function to convert a list of same-typed [DataColumn]s to a two-dimensional array ([D2Array]).
+ * We cannot enforce all columns have the same type if we require just a [DataFrame].
+ */
+@Suppress("UNCHECKED_CAST")
+@JvmName("convertComplexColumnsToMultik")
+inline fun <reified N : Complex> List<DataColumn<N>>.convertToMultik(): D2Array<N> {
+    // to get the list of columns as a list of rows, we need to convert them back to a dataframe first,
+    // then we can get the values of each row
+    val rows = this.toDataFrame().map { row -> row.values() as List<N> }
+    return mk.ndarray(rows)
+}
+
+// endregion
+
+// region higher dimensions
+
+/**
+ * Converts a three-dimensional array ([D3Array]) to a DataFrame.
+ * It will contain `shape[0]` rows and `shape[1]` columns containing lists of size `shape[2]`.
+ *
+ * Column names can be specified using the [columnNameGenerator] lambda.
+ *
+ * The conversion enforces that `multikArray[x][y][z] == dataframe[x][y][z]`
+ */
+inline fun <reified N> D3Array<N>.convertToDataFrameWithLists(
+    columnNameGenerator: (Int) -> String = { "col$it" },
+): AnyFrame {
+    val columns: List<DataColumn<List<N>>> = List(shape[1]) { y ->
+        this[0..<shape[0], y, 0..<shape[2]] // get all cells of column y, each is a 2d array of size shape[0] x shape[2]
+            .toListD2() // get a shape[0]-sized list/column filled with lists of size shape[2]
+            .toColumn<List<N>>(name = columnNameGenerator(y))
+    }
+    return columns.toDataFrame()
+}
+
+/**
+ * Converts a three-dimensional array ([D3Array]) to a DataFrame.
+ * It will contain `shape[0]` rows and `shape[1]` column groups containing `shape[2]` columns each.
+ *
+ * Column names can be specified using the [columnNameGenerator] lambda.
+ *
+ * The conversion enforces that `multikArray[x][y][z] == dataframe[x][y][z]`
+ */
+@JvmName("convert3dArrayToDataFrame")
+inline fun <reified N> D3Array<N>.convertToDataFrame(columnNameGenerator: (Int) -> String = { "col$it" }): AnyFrame {
+    val columns: List<ColumnGroup<*>> = List(shape[1]) { y ->
+        this[0..<shape[0], y, 0..<shape[2]] // get all cells of column i, each is a 2d array of size shape[0] x shape[2]
+            .transpose(1, 0) // flip, so we get shape[2] x shape[0]
+            .toListD2() // get a shape[2]-sized list filled with lists of size shape[0]
+            .mapIndexed { z, list ->
+                list.toColumn<N>(name = columnNameGenerator(z))
+            } // we get shape[2] columns inside each column group
+            .toColumnGroup(name = columnNameGenerator(y))
+    }
+    return columns.toDataFrame()
+}
+
+/**
+ * Exploratory recursive function to convert a [MultiArray] of any number of dimensions
+ * to a `List<List<...>>` of the same number of dimensions.
+ */
+fun <T> MultiArray<T, *>.toListDn(): List<*> {
+    // Recursive helper function to handle traversal across dimensions
+    fun toListRecursive(indices: IntArray): List<*> {
+        // If we are at the last dimension (1D case)
+        if (indices.size == shape.lastIndex) {
+            return List(shape[indices.size]) { i ->
+                this[intArrayOf(*indices, i)] // Collect values for this dimension
+            }
+        }
+
+        // For higher dimensions, recursively process smaller dimensions
+        return List(shape[indices.size]) { i ->
+            toListRecursive(indices + i) // Add `i` to the current index array
+        }
+    }
+    return toListRecursive(intArrayOf())
+}
+
+/**
+ * Converts a multidimensional array ([NDArray]) to a DataFrame.
+ * Inspired by [toListDn].
+ *
+ * For a single-dimensional array, it will call [D1Array.convertToDataFrame].
+ *
+ * Column names can be specified using the [columnNameGenerator] lambda.
+ *
+ * The conversion enforces that `multikArray[a][b][c][d]... == dataframe[a][b][c][d]...`
+ */
+@Suppress("UNCHECKED_CAST")
+inline fun <reified N> NDArray<N, *>.convertToDataFrameNestedGroups(
+    noinline columnNameGenerator: (Int) -> String = { "col$it" },
+): AnyFrame {
+    if (shape.size == 1) return (this as D1Array<N>).convertToDataFrame()
+
+    // push the first dimension to the end, because this represents the rows in DataFrame,
+    // and they are accessed by []'s first
+    return transpose(*(1..<dim.d).toList().toIntArray(), 0)
+        .convertToDataFrameNestedGroupsRecursive(
+            indices = intArrayOf(),
+            type = typeOf<N>(), // cannot inline a recursive function, so pass the type explicitly
+            columnNameGenerator = columnNameGenerator,
+        ).let {
+            // we could just cast this to a DataFrame<*>, because a ColumnGroup<*>: DataFrame
+            // however, this can sometimes cause issues where instance checks are done at runtime
+            // this converts it to an actual DataFrame instance
+            dataFrameOf((it as ColumnGroup<*>).columns())
+        }
+}
+
+/**
+ * Recursive helper function to handle traversal across dimensions. Do not call directly,
+ * use [convertToDataFrameNestedGroups] instead.
+ */
+@PublishedApi
+internal fun NDArray<*, *>.convertToDataFrameNestedGroupsRecursive(
+    indices: IntArray,
+    type: KType,
+    columnNameGenerator: (Int) -> String,
+): BaseColumn<*> {
+    // If we are at the last dimension (1D case)
+    if (indices.size == shape.lastIndex) {
+        return List(shape[indices.size]) { i ->
+            this[intArrayOf(*indices, i)] // Collect values for this dimension
+        }.let {
+            DataColumn.createByType(name = "", values = it, type = type)
+        }
+    }
+
+    // For higher dimensions, recursively process smaller dimensions
+    return List(shape[indices.size]) { i ->
+        convertToDataFrameNestedGroupsRecursive(
+            indices = indices + i, // Add `i` to the current index array
+            type = type,
+            columnNameGenerator = columnNameGenerator,
+        ).rename(columnNameGenerator(i))
+    }.toColumnGroup("")
+}
+
+// endregion
@@ -0,0 +1,23 @@
+package org.jetbrains.kotlinx.dataframe.examples.multik
+
+import org.jetbrains.kotlinx.dataframe.api.print
+import org.jetbrains.kotlinx.multik.api.io.readNPY
+import org.jetbrains.kotlinx.multik.api.mk
+import org.jetbrains.kotlinx.multik.ndarray.data.D1
+import java.io.File
+
+/**
+ * Multik can read/write data from NPY/NPZ files.
+ * We can use this from DataFrame too!
+ *
+ * We use compatibilityLayer.kt for the conversions, check it out for the implementation details of the conversion!
+ */
+fun main() {
+    val npyFilename = "a1d.npy"
+    val npyFile = File(object {}.javaClass.classLoader.getResource(npyFilename)!!.toURI())
+
+    val mk1 = mk.readNPY<Long, D1>(npyFile)
+    val df1 = mk1.convertToDataFrame()
+
+    df1.print(borders = true, columnTypes = true)
+}
@@ -0,0 +1,99 @@
+package org.jetbrains.kotlinx.dataframe.examples.multik
+
+import org.jetbrains.kotlinx.dataframe.api.cast
+import org.jetbrains.kotlinx.dataframe.api.colsOf
+import org.jetbrains.kotlinx.dataframe.api.describe
+import org.jetbrains.kotlinx.dataframe.api.mean
+import org.jetbrains.kotlinx.dataframe.api.meanFor
+import org.jetbrains.kotlinx.dataframe.api.print
+import org.jetbrains.kotlinx.dataframe.api.value
+import org.jetbrains.kotlinx.multik.api.mk
+import org.jetbrains.kotlinx.multik.api.rand
+import org.jetbrains.kotlinx.multik.ndarray.data.get
+
+/**
+ * Let's explore some ways we can combine Multik with Kotlin DataFrame.
+ *
+ * We will use compatibilityLayer.kt for the conversions.
+ * Take a look at that file for the implementation details!
+ */
+fun main() {
+    oneDimension()
+    twoDimensions()
+    higherDimensions()
+}
+
+fun oneDimension() {
+    // we can convert a 1D ndarray to a column of a DataFrame:
+    val mk1 = mk.rand<Double>(50)
+    val col1 by mk1.convertToColumn()
+    println(col1)
+
+    // or straight to a DataFrame. It will become the `value` column.
+    val df1 = mk1.convertToDataFrame()
+    println(df1)
+
+    // this allows us to perform any DF operation:
+    println(df1.mean { value })
+    df1.describe().print(borders = true)
+
+    // we can convert back to Multik:
+    val mk2 = df1.convertToMultik { value }
+    // or
+    df1.value.convertToMultik()
+
+    println(mk2)
+}
+
+fun twoDimensions() {
+    // we can also convert a 2D ndarray to a DataFrame
+    // This conversion will create columns like "col0", "col1", etc.
+    // (careful, when the number of columns is too large, this can cause problems)
+    // but will allow for similar access like in multik
+    // aka: `multikArray[x][y] == dataframe[x][y]`
+    val mk1 = mk.rand<Int>(5, 10)
+    println(mk1)
+    val df = mk1.convertToDataFrame()
+    df.print()
+
+    // this allows us to perform any DF operation:
+    val means = df.meanFor { ("col0".."col9").cast<Int>() }
+    means.print()
+
+    // we can convert back to Multik in multiple ways.
+    // Multik can only store one type of data, so we need to specify the type or select
+    // only the columns we want:
+    val mk2 = df.convertToMultik { colsOf<Int>() }
+    // or
+    df.convertToMultikOf<Int>()
+    // or if all columns are of the same type:
+    df.convertToMultik()
+
+    println(mk2)
+}
+
+fun higherDimensions() {
+    // Multik can store higher dimensions as well
+    // however; to convert this to a DataFrame, we need to specify how to do a particular conversion
+    // for instance, for 3d, we could store a list in each cell of the DF to represent the extra dimension:
+    val mk1 = mk.rand<Int>(5, 4, 3)
+
+    println(mk1)
+
+    val df1 = mk1.convertToDataFrameWithLists()
+    df1.print()
+
+    // Alternatively, this could be solved using column groups.
+    // This subdivides each column into more columns, while ensuring `multikArray[x][y][z] == dataframe[x][y][z]`
+    val df2 = mk1.convertToDataFrame()
+    df2.print()
+
+    // For even higher dimensions, we can keep adding more column groups
+    val mk2 = mk.rand<Int>(5, 4, 3, 2)
+    val df3 = mk2.convertToDataFrameNestedGroups()
+    df3.print()
+
+    // ...or use nested DataFrames (in FrameColumns)
+    // (for instance, a 4D matrix could be stored in a 2D DataFrame where each cell is another DataFrame)
+    // but, we'll leave that as an exercise for the reader :)
+}
@@ -0,0 +1,115 @@
+package org.jetbrains.kotlinx.dataframe.examples.multik
+
+import kotlinx.datetime.LocalDate
+import kotlinx.datetime.Month
+import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
+import org.jetbrains.kotlinx.dataframe.api.append
+import org.jetbrains.kotlinx.dataframe.api.cast
+import org.jetbrains.kotlinx.dataframe.api.mapToFrame
+import org.jetbrains.kotlinx.dataframe.api.print
+import org.jetbrains.kotlinx.dataframe.api.single
+import org.jetbrains.kotlinx.dataframe.api.toDataFrame
+import org.jetbrains.kotlinx.multik.api.mk
+import org.jetbrains.kotlinx.multik.api.rand
+import org.jetbrains.kotlinx.multik.ndarray.data.D3Array
+import org.jetbrains.kotlinx.multik.ndarray.data.D4Array
+
+/**
+ * DataFrames can store anything inside, including Multik ndarrays.
+ * This can be useful for storing matrices for easier access later or to simply organize data read from other files.
+ * For example, MRI data is often stored as 3D arrays and sometimes even 4D arrays.
+ */
+fun main() {
+    // imaginary list of patient data
+    @Suppress("ktlint:standard:argument-list-wrapping")
+    val metadata = listOf(
+        MriMetadata(10012L, 25, "Healthy", LocalDate(2023, 1, 1)),
+        MriMetadata(10013L, 45, "Tuberculosis", LocalDate(2023, 2, 15)),
+        MriMetadata(10014L, 32, "Healthy", LocalDate(2023, 3, 22)),
+        MriMetadata(10015L, 58, "Pneumonia", LocalDate(2023, 4, 8)),
+        MriMetadata(10016L, 29, "Tuberculosis", LocalDate(2023, 5, 30)),
+        MriMetadata(10017L, 42, "Healthy", LocalDate(2023, 6, 15)),
+        MriMetadata(10018L, 37, "Healthy", LocalDate(2023, 7, 1)),
+        MriMetadata(10019L, 55, "Healthy", LocalDate(2023, 8, 15)),
+        MriMetadata(10020L, 28, "Healthy", LocalDate(2023, 9, 1)),
+        MriMetadata(10021L, 44, "Healthy", LocalDate(2023, 10, 15)),
+        MriMetadata(10022L, 31, "Healthy", LocalDate(2023, 11, 1)),
+    ).toDataFrame()
+
+    // "reading" the results from "files"
+    val results = metadata.mapToFrame {
+        +patientId
+        +age
+        +diagnosis
+        +scanDate
+        "t1WeightedMri" from { readT1WeightedMri(patientId) }
+        "fMriBoldSeries" from { readFMRiBoldSeries(patientId) }
+    }.cast<MriResults>(verify = true)
+        .append()
+
+    results.print(borders = true)
+
+    // now when we want to check and visualize the T1-weighted MRI scan
+    // for that one healthy patient in July, we can do:
+    val scan = results
+        .single { scanDate.month == Month.JULY && diagnosis == "Healthy" }
+        .t1WeightedMri
+
+    // easy :)
+    visualize(scan)
+}
+
+@DataSchema
+data class MriMetadata(
+    /** Unique patient ID. */
+    val patientId: Long,
+    /** Patient age. */
+    val age: Int,
+    /** Clinical diagnosis (e.g. "Healthy", "Tuberculosis") */
+    val diagnosis: String,
+    /** Date of the scan */
+    val scanDate: LocalDate,
+)
+
+@DataSchema
+data class MriResults(
+    /** Unique patient ID. */
+    val patientId: Long,
+    /** Patient age. */
+    val age: Int,
+    /** Clinical diagnosis (e.g. "Healthy", "Tuberculosis") */
+    val diagnosis: String,
+    /** Date of the scan */
+    val scanDate: LocalDate,
+    /**
+     * T1-weighted anatomical MRI scan.
+     *
+     * Dimensions: (256 x 256 x 180)
+     * - 256 width x 256 height
+     * - 180 slices
+     */
+    val t1WeightedMri: D3Array<Float>,
+    /**
+     * Blood oxygenation level-dependent (BOLD) time series from an fMRI scan.
+     *
+     * Dimensions: (64 x 64 x 30 x 200)
+     * - 64 width x 64 height
+     * - 30 slices
+     * - 200 timepoints
+     */
+    val fMriBoldSeries: D4Array<Float>,
+)
+
+fun readT1WeightedMri(id: Long): D3Array<Float> {
+    // This should in practice, of course, read the actual data, but for this example we just return a dummy array
+    return mk.rand(256, 256, 180)
+}
+
+fun readFMRiBoldSeries(id: Long): D4Array<Float> {
+    // This should in practice, of course, read the actual data, but for this example we just return a dummy array
+    return mk.rand(64, 64, 30, 200)
+}
+
+fun visualize(scan: D3Array<Float>) {
+    // This would then actually visualize the scan
+}