init research
This commit is contained in:
Vendored
+14
@@ -0,0 +1,14 @@
|
||||
## :dataframe-json
|
||||
|
||||
This module, published as `dataframe-json`, contains all logic and tests for DataFrame to be able to work with
|
||||
JSON data sources; [reading](https://kotlin.github.io/dataframe/read.html#read-from-json)
|
||||
and [writing](https://kotlin.github.io/dataframe/write.html#writing-to-json).
|
||||
It's based on [Kotlinx Serialization](https://github.com/Kotlin/kotlinx.serialization).
|
||||
|
||||
It also contains some logic specific to encoding dataframes as JSON objects with metadata for
|
||||
the [custom table component in Kotlin Notebook](https://kotlin.github.io/dataframe/usage-with-kotlin-notebook-plugin.html).
|
||||
See [serialization_format](../docs/serialization_format.md) for more information about the format.
|
||||
|
||||
This module is optional but is included by default by the `dataframe` module, `dataframe-jupyter`,
|
||||
`dataframe-csv`, and `dataframe-excel`.
|
||||
If you want to use DataFrame without JSON support, you can exclude this module from the dependency.
|
||||
+91
@@ -0,0 +1,91 @@
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/Base64ImageEncodingOptions {
|
||||
public static final field ALL_OFF I
|
||||
public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/Base64ImageEncodingOptions$Companion;
|
||||
public static final field GZIP_ON I
|
||||
public static final field LIMIT_SIZE_ON I
|
||||
public fun <init> ()V
|
||||
public fun <init> (II)V
|
||||
public synthetic fun <init> (IIILkotlin/jvm/internal/DefaultConstructorMarker;)V
|
||||
public final fun getImageSizeLimit ()I
|
||||
public final fun isGzipOn ()Z
|
||||
public final fun isLimitSizeOn ()Z
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/Base64ImageEncodingOptions$Companion {
|
||||
}
|
||||
|
||||
public abstract interface class org/jetbrains/kotlinx/dataframe/io/CustomEncoder {
|
||||
public abstract fun canEncode (Ljava/lang/Object;)Z
|
||||
public abstract fun encode (Ljava/lang/Object;)Lkotlinx/serialization/json/JsonElement;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/JSON : org/jetbrains/kotlinx/dataframe/io/SupportedDataFrameFormat {
|
||||
public fun <init> ()V
|
||||
public fun <init> (Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Ljava/util/List;Z)V
|
||||
public synthetic fun <init> (Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Ljava/util/List;ZILkotlin/jvm/internal/DefaultConstructorMarker;)V
|
||||
public fun acceptsExtension (Ljava/lang/String;)Z
|
||||
public fun acceptsSample (Lorg/jetbrains/kotlinx/dataframe/io/SupportedFormatSample;)Z
|
||||
public fun createDefaultReadMethod (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethod;
|
||||
public fun getTestOrder ()I
|
||||
public fun readDataFrame (Ljava/io/File;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public fun readDataFrame (Ljava/io/InputStream;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public fun readDataFrame (Ljava/nio/file/Path;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic : java/lang/Enum {
|
||||
public static final field ANY_COLUMNS Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;
|
||||
public static final field ARRAY_AND_VALUE_COLUMNS Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;
|
||||
public static fun getEntries ()Lkotlin/enums/EnumEntries;
|
||||
public static fun valueOf (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;
|
||||
public static fun values ()[Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/JsonKt {
|
||||
public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow;
|
||||
public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow;
|
||||
public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow;
|
||||
public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow;
|
||||
public static final fun readJson (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/nio/file/Path;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow;
|
||||
public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/File;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow;
|
||||
public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/io/InputStream;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow;
|
||||
public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow;
|
||||
public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/net/URL;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow;
|
||||
public static synthetic fun readJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/nio/file/Path;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow;
|
||||
public static final fun readJsonStr (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readJsonStr (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;Z)Lorg/jetbrains/kotlinx/dataframe/DataRow;
|
||||
public static synthetic fun readJsonStr$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readJsonStr$default (Lorg/jetbrains/kotlinx/dataframe/DataRow$Companion;Ljava/lang/String;Ljava/util/List;Ljava/util/List;Lorg/jetbrains/kotlinx/dataframe/io/JSON$TypeClashTactic;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataRow;
|
||||
public static final fun toJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Z)Ljava/lang/String;
|
||||
public static final fun toJson (Lorg/jetbrains/kotlinx/dataframe/DataRow;Z)Ljava/lang/String;
|
||||
public static synthetic fun toJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;ZILjava/lang/Object;)Ljava/lang/String;
|
||||
public static synthetic fun toJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow;ZILjava/lang/Object;)Ljava/lang/String;
|
||||
public static final fun toJsonWithMetadata (Lorg/jetbrains/kotlinx/dataframe/DataFrame;ILjava/lang/Integer;ZLjava/util/List;Z)Ljava/lang/String;
|
||||
public static synthetic fun toJsonWithMetadata$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;ILjava/lang/Integer;ZLjava/util/List;ZILjava/lang/Object;)Ljava/lang/String;
|
||||
public static final fun writeJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/io/File;Z)V
|
||||
public static final fun writeJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/Appendable;Z)V
|
||||
public static final fun writeJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/String;Z)V
|
||||
public static final fun writeJson (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/nio/file/Path;Z)V
|
||||
public static final fun writeJson (Lorg/jetbrains/kotlinx/dataframe/DataRow;Ljava/io/File;Z)V
|
||||
public static final fun writeJson (Lorg/jetbrains/kotlinx/dataframe/DataRow;Ljava/lang/Appendable;Z)V
|
||||
public static final fun writeJson (Lorg/jetbrains/kotlinx/dataframe/DataRow;Ljava/lang/String;Z)V
|
||||
public static final fun writeJson (Lorg/jetbrains/kotlinx/dataframe/DataRow;Ljava/nio/file/Path;Z)V
|
||||
public static synthetic fun writeJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/io/File;ZILjava/lang/Object;)V
|
||||
public static synthetic fun writeJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/Appendable;ZILjava/lang/Object;)V
|
||||
public static synthetic fun writeJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/String;ZILjava/lang/Object;)V
|
||||
public static synthetic fun writeJson$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/nio/file/Path;ZILjava/lang/Object;)V
|
||||
public static synthetic fun writeJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow;Ljava/io/File;ZILjava/lang/Object;)V
|
||||
public static synthetic fun writeJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow;Ljava/lang/Appendable;ZILjava/lang/Object;)V
|
||||
public static synthetic fun writeJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow;Ljava/lang/String;ZILjava/lang/Object;)V
|
||||
public static synthetic fun writeJson$default (Lorg/jetbrains/kotlinx/dataframe/DataRow;Ljava/nio/file/Path;ZILjava/lang/Object;)V
|
||||
}
|
||||
|
||||
+60
@@ -0,0 +1,60 @@
|
||||
import org.jetbrains.kotlin.gradle.tasks.KotlinCompile
|
||||
|
||||
plugins {
|
||||
with(convention.plugins) {
|
||||
alias(kotlinJvm8)
|
||||
}
|
||||
with(libs.plugins) {
|
||||
alias(publisher)
|
||||
alias(serialization)
|
||||
alias(binary.compatibility.validator)
|
||||
}
|
||||
}
|
||||
|
||||
group = "org.jetbrains.kotlinx"
|
||||
|
||||
dependencies {
|
||||
api(projects.core)
|
||||
|
||||
implementation(libs.kotlin.stdlib)
|
||||
implementation(libs.serialization.core)
|
||||
implementation(libs.serialization.json)
|
||||
implementation(libs.sl4j)
|
||||
|
||||
// Use Kotlin test integration for JUnit 5 to satisfy variant 'kotlin-test-framework-junit5'
|
||||
testImplementation(libs.kotlin.test.junit5)
|
||||
testImplementation(libs.junit.jupiter)
|
||||
testImplementation(libs.junit.jupiter.engine)
|
||||
testImplementation(libs.junit.jupiter.params)
|
||||
testImplementation(libs.kotestAssertions) {
|
||||
exclude("org.jetbrains.kotlin", "kotlin-stdlib-jdk8")
|
||||
}
|
||||
testImplementation(libs.sl4jsimple)
|
||||
}
|
||||
|
||||
tasks.withType<KotlinCompile> {
|
||||
friendPaths.from(project(projects.core.path).projectDir)
|
||||
}
|
||||
|
||||
tasks.withType<Javadoc> {
|
||||
enabled = false
|
||||
}
|
||||
|
||||
tasks.test {
|
||||
useJUnitPlatform()
|
||||
}
|
||||
|
||||
sourceSets {
|
||||
main {
|
||||
java.srcDirs("src/main/kotlin")
|
||||
}
|
||||
}
|
||||
|
||||
kotlinPublications {
|
||||
publication {
|
||||
publicationName = "dataframeJson"
|
||||
artifactId = project.name
|
||||
description = "Kotlin DataFrame JSON integration"
|
||||
packageName = artifactId
|
||||
}
|
||||
}
|
||||
Vendored
+5
@@ -0,0 +1,5 @@
|
||||
package org.jetbrains.kotlinx.dataframe.impl.io
|
||||
|
||||
import java.util.Base64
|
||||
|
||||
internal fun ByteArray.toBase64(): String = Base64.getEncoder().encodeToString(this)
|
||||
Vendored
+11
@@ -0,0 +1,11 @@
|
||||
package org.jetbrains.kotlinx.dataframe.impl.io
|
||||
|
||||
import java.io.ByteArrayOutputStream
|
||||
import java.util.zip.GZIPOutputStream
|
||||
|
||||
internal fun ByteArray.encodeGzip(): ByteArray {
|
||||
val bos = ByteArrayOutputStream()
|
||||
GZIPOutputStream(bos).use { it.write(this) }
|
||||
|
||||
return bos.toByteArray()
|
||||
}
|
||||
Vendored
+795
@@ -0,0 +1,795 @@
|
||||
package org.jetbrains.kotlinx.dataframe.impl.io
|
||||
|
||||
import kotlinx.serialization.json.JsonArray
|
||||
import kotlinx.serialization.json.JsonNull
|
||||
import kotlinx.serialization.json.JsonObject
|
||||
import kotlinx.serialization.json.JsonPrimitive
|
||||
import kotlinx.serialization.json.boolean
|
||||
import kotlinx.serialization.json.booleanOrNull
|
||||
import kotlinx.serialization.json.double
|
||||
import kotlinx.serialization.json.doubleOrNull
|
||||
import kotlinx.serialization.json.float
|
||||
import kotlinx.serialization.json.floatOrNull
|
||||
import kotlinx.serialization.json.int
|
||||
import kotlinx.serialization.json.intOrNull
|
||||
import kotlinx.serialization.json.jsonArray
|
||||
import kotlinx.serialization.json.long
|
||||
import kotlinx.serialization.json.longOrNull
|
||||
import org.jetbrains.kotlinx.dataframe.AnyCol
|
||||
import org.jetbrains.kotlinx.dataframe.AnyFrame
|
||||
import org.jetbrains.kotlinx.dataframe.DataColumn
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.DataRow
|
||||
import org.jetbrains.kotlinx.dataframe.api.JsonPath
|
||||
import org.jetbrains.kotlinx.dataframe.api.NameValueProperty
|
||||
import org.jetbrains.kotlinx.dataframe.api.cast
|
||||
import org.jetbrains.kotlinx.dataframe.api.chunked
|
||||
import org.jetbrains.kotlinx.dataframe.api.columnOf
|
||||
import org.jetbrains.kotlinx.dataframe.api.concat
|
||||
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
|
||||
import org.jetbrains.kotlinx.dataframe.api.firstOrNull
|
||||
import org.jetbrains.kotlinx.dataframe.api.getColumn
|
||||
import org.jetbrains.kotlinx.dataframe.api.mapIndexed
|
||||
import org.jetbrains.kotlinx.dataframe.api.named
|
||||
import org.jetbrains.kotlinx.dataframe.api.schema
|
||||
import org.jetbrains.kotlinx.dataframe.api.splitInto
|
||||
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
|
||||
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
|
||||
import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion
|
||||
import org.jetbrains.kotlinx.dataframe.documentation.UnifyingNumbers
|
||||
import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
|
||||
import org.jetbrains.kotlinx.dataframe.impl.DataCollector
|
||||
import org.jetbrains.kotlinx.dataframe.impl.asList
|
||||
import org.jetbrains.kotlinx.dataframe.impl.schema.DataFrameSchemaImpl
|
||||
import org.jetbrains.kotlinx.dataframe.io.ARRAY_COLUMN_NAME
|
||||
import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic
|
||||
import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ANY_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.io.VALUE_COLUMN_NAME
|
||||
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
|
||||
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
|
||||
import org.jetbrains.kotlinx.dataframe.type
|
||||
import org.jetbrains.kotlinx.dataframe.typeClass
|
||||
import org.jetbrains.kotlinx.dataframe.values
|
||||
import kotlin.reflect.KType
|
||||
import kotlin.reflect.KTypeProjection
|
||||
import kotlin.reflect.full.createType
|
||||
import kotlin.reflect.typeOf
|
||||
|
||||
private fun DataFrame<Any?>.unwrapUnnamedColumns() = dataFrameOf(columns().map { it.unwrapUnnamedColumn() })
|
||||
|
||||
private fun AnyCol.unwrapUnnamedColumn() = if (this is UnnamedColumn) col else this
|
||||
|
||||
private enum class AnyColType {
|
||||
ANY,
|
||||
ARRAYS,
|
||||
OBJECTS,
|
||||
}
|
||||
|
||||
internal interface AnyNameValueProperty : NameValueProperty<Any?> {
|
||||
override val value: Any?
|
||||
}
|
||||
|
||||
internal fun readJsonImpl(
|
||||
parsed: Any?,
|
||||
unifyNumbers: Boolean,
|
||||
header: List<String>,
|
||||
keyValuePaths: List<JsonPath> = emptyList(),
|
||||
typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS,
|
||||
): DataFrame<*> {
|
||||
val df: AnyFrame = when (typeClashTactic) {
|
||||
ARRAY_AND_VALUE_COLUMNS -> {
|
||||
when (parsed) {
|
||||
is JsonArray -> fromJsonListArrayAndValueColumns(
|
||||
records = parsed,
|
||||
unifyNumbers = unifyNumbers,
|
||||
header = header,
|
||||
keyValuePaths = keyValuePaths,
|
||||
)
|
||||
|
||||
else -> fromJsonListArrayAndValueColumns(
|
||||
records = listOf(parsed),
|
||||
unifyNumbers = unifyNumbers,
|
||||
keyValuePaths = keyValuePaths,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
ANY_COLUMNS -> {
|
||||
when (parsed) {
|
||||
is JsonArray -> fromJsonListAnyColumns(
|
||||
records = parsed,
|
||||
unifyNumbers = unifyNumbers,
|
||||
header = header,
|
||||
keyValuePaths = keyValuePaths,
|
||||
)
|
||||
|
||||
else -> fromJsonListAnyColumns(
|
||||
records = listOf(parsed),
|
||||
unifyNumbers = unifyNumbers,
|
||||
keyValuePaths = keyValuePaths,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
return df.unwrapUnnamedColumns()
|
||||
}
|
||||
|
||||
/**
|
||||
* Json to DataFrame converter that creates [Any] columns.
|
||||
* A.k.a. [TypeClashTactic.ANY_COLUMNS].
|
||||
*
|
||||
* @param records List of json elements to be converted to a [DataFrame].
|
||||
* @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers].
|
||||
* @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[NameValueProperty]>
|
||||
* will be created.
|
||||
* @param header Optional list of column names. If given, [records] will be read like an object with [header] being the keys.
|
||||
* @return [DataFrame] from the given [records].
|
||||
*/
|
||||
internal fun fromJsonListAnyColumns(
|
||||
records: List<*>,
|
||||
unifyNumbers: Boolean,
|
||||
keyValuePaths: List<JsonPath> = emptyList(),
|
||||
header: List<String> = emptyList(),
|
||||
jsonPath: JsonPath = JsonPath(),
|
||||
): AnyFrame {
|
||||
var hasPrimitive = false
|
||||
var hasArray = false
|
||||
var hasObject = false
|
||||
|
||||
// list element type can be JsonObject, JsonArray or primitive
|
||||
val nameGenerator = ColumnNameGenerator()
|
||||
records.forEach { record ->
|
||||
when (record) {
|
||||
is JsonObject -> {
|
||||
hasObject = true
|
||||
record.entries.forEach { nameGenerator.addIfAbsent(it.key) }
|
||||
}
|
||||
|
||||
is JsonArray -> hasArray = true
|
||||
|
||||
is JsonNull, null -> Unit
|
||||
|
||||
is JsonPrimitive -> hasPrimitive = true
|
||||
}
|
||||
}
|
||||
|
||||
val colType = when {
|
||||
hasArray && !hasPrimitive && !hasObject -> AnyColType.ARRAYS
|
||||
hasObject && !hasPrimitive && !hasArray -> AnyColType.OBJECTS
|
||||
else -> AnyColType.ANY
|
||||
}
|
||||
val justPrimitives = hasPrimitive && !hasArray && !hasObject
|
||||
val isKeyValue = keyValuePaths.any { jsonPath.matches(it) }
|
||||
|
||||
if (isKeyValue && colType != AnyColType.OBJECTS) {
|
||||
error("Key value path $jsonPath does not match objects.")
|
||||
}
|
||||
|
||||
@Suppress("KotlinConstantConditions")
|
||||
val columns: List<AnyCol> = when {
|
||||
// Create one column of type Any? (or guessed primitive type) from all the records
|
||||
colType == AnyColType.ANY -> {
|
||||
val collector: DataCollector<Any?> =
|
||||
if (justPrimitives) {
|
||||
createDataCollector(records.size) // guess the type
|
||||
} else {
|
||||
createDataCollector(records.size, typeOf<Any?>()) // use Any?
|
||||
}
|
||||
|
||||
val nanIndices = mutableListOf<Int>()
|
||||
records.forEachIndexed { i, v ->
|
||||
when (v) {
|
||||
is JsonObject -> {
|
||||
val parsed =
|
||||
fromJsonListAnyColumns(
|
||||
records = listOf(v),
|
||||
unifyNumbers = unifyNumbers,
|
||||
keyValuePaths = keyValuePaths,
|
||||
jsonPath = jsonPath.replaceLastWildcardWithIndex(i),
|
||||
)
|
||||
collector.add(
|
||||
if (parsed.isSingleUnnamedColumn()) {
|
||||
(parsed.getColumn(0) as UnnamedColumn).col.values.first()
|
||||
} else {
|
||||
parsed.firstOrNull() ?: DataRow.empty
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
is JsonArray -> {
|
||||
val parsed = fromJsonListAnyColumns(
|
||||
records = v,
|
||||
unifyNumbers = unifyNumbers,
|
||||
keyValuePaths = keyValuePaths,
|
||||
jsonPath = jsonPath.replaceLastWildcardWithIndex(i).appendArrayWithWildcard(),
|
||||
)
|
||||
collector.add(
|
||||
if (parsed.isSingleUnnamedColumn()) {
|
||||
(parsed.getColumn(0) as UnnamedColumn).col.values.asList()
|
||||
} else {
|
||||
parsed.unwrapUnnamedColumns()
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
is JsonNull -> collector.add(null)
|
||||
|
||||
is JsonPrimitive -> {
|
||||
when {
|
||||
v.content == "NaN" -> {
|
||||
nanIndices.add(i)
|
||||
collector.add(null)
|
||||
}
|
||||
|
||||
v.isString -> collector.add(v.content)
|
||||
|
||||
v.booleanOrNull != null -> collector.add(v.boolean)
|
||||
|
||||
v.intOrNull != null -> collector.add(v.int)
|
||||
|
||||
v.longOrNull != null -> collector.add(v.long)
|
||||
|
||||
v.floatOrNull != null -> collector.add(v.float)
|
||||
|
||||
v.doubleOrNull != null -> collector.add(v.double)
|
||||
|
||||
else -> error("Malformed JSON element ${v::class}: $v")
|
||||
}
|
||||
}
|
||||
|
||||
else -> collector.add(v)
|
||||
}
|
||||
}
|
||||
val column = createColumnGuessingType(VALUE_COLUMN_NAME, collector.data, unifyNumbers = unifyNumbers)
|
||||
val res = if (nanIndices.isNotEmpty()) {
|
||||
fun <C> DataColumn<C>.updateNaNs(nanValue: C): DataColumn<C> {
|
||||
var j = 0
|
||||
var nextNanIndex = nanIndices[j]
|
||||
return mapIndexed(column.type) { i, v ->
|
||||
if (i == nextNanIndex) {
|
||||
j++
|
||||
nextNanIndex = if (j < nanIndices.size) nanIndices[j] else -1
|
||||
nanValue
|
||||
} else {
|
||||
v
|
||||
}
|
||||
}
|
||||
}
|
||||
when (column.typeClass) {
|
||||
Double::class -> column.cast<Double?>().updateNaNs(Double.NaN)
|
||||
Float::class -> column.cast<Float?>().updateNaNs(Float.NaN)
|
||||
String::class -> column.cast<String?>().updateNaNs("NaN")
|
||||
else -> column
|
||||
}
|
||||
} else {
|
||||
column
|
||||
}
|
||||
listOf(UnnamedColumn(res))
|
||||
}
|
||||
|
||||
// Create one column of type FrameColumn, or List<> from all the records if they are all arrays
|
||||
colType == AnyColType.ARRAYS -> {
|
||||
val values = mutableListOf<Any?>()
|
||||
val startIndices = ArrayList<Int>()
|
||||
records.forEach {
|
||||
startIndices.add(values.size)
|
||||
when (it) {
|
||||
is JsonArray -> values.addAll(it)
|
||||
is JsonNull, null -> Unit
|
||||
else -> error("Expected JsonArray, got $it")
|
||||
}
|
||||
}
|
||||
val parsed = fromJsonListAnyColumns(
|
||||
records = values,
|
||||
unifyNumbers = unifyNumbers,
|
||||
keyValuePaths = keyValuePaths,
|
||||
jsonPath = jsonPath.appendArrayWithWildcard(),
|
||||
)
|
||||
|
||||
val res = when {
|
||||
parsed.isSingleUnnamedColumn() -> {
|
||||
val col = (parsed.getColumn(0) as UnnamedColumn).col
|
||||
val elementType = col.type
|
||||
val columnValues = col.values
|
||||
.asList()
|
||||
.splitByIndices(startIndices.asSequence())
|
||||
.toList()
|
||||
DataColumn.createValueColumn(
|
||||
name = ARRAY_COLUMN_NAME,
|
||||
values = columnValues,
|
||||
type = List::class.createType(listOf(KTypeProjection.invariant(elementType))),
|
||||
)
|
||||
}
|
||||
|
||||
else ->
|
||||
parsed.unwrapUnnamedColumns()
|
||||
.chunked(
|
||||
startIndices = startIndices,
|
||||
name = ARRAY_COLUMN_NAME, // will be erased
|
||||
)
|
||||
}
|
||||
listOf(UnnamedColumn(res))
|
||||
}
|
||||
|
||||
// Create one column of type FrameColumn<KeyValueProperty>
|
||||
colType == AnyColType.OBJECTS && isKeyValue -> {
|
||||
// collect the value types to make sure Value columns with lists and other values aren't all turned into lists
|
||||
val valueTypes = mutableSetOf<KType>()
|
||||
val dataFrames = records.map { record ->
|
||||
when (record) {
|
||||
is JsonObject -> {
|
||||
val map = record.mapValues { (key, value) ->
|
||||
val parsed = fromJsonListAnyColumns(
|
||||
records = listOf(value),
|
||||
unifyNumbers = unifyNumbers,
|
||||
keyValuePaths = keyValuePaths,
|
||||
jsonPath = jsonPath.append(key),
|
||||
)
|
||||
if (parsed.isSingleUnnamedColumn()) {
|
||||
(parsed.getColumn(0) as UnnamedColumn).col.values.first()
|
||||
} else {
|
||||
parsed.unwrapUnnamedColumns().firstOrNull()
|
||||
}
|
||||
}
|
||||
val valueType = map.values.map {
|
||||
guessValueType(sequenceOf(it), unifyNumbers = unifyNumbers)
|
||||
}.commonType()
|
||||
|
||||
valueTypes += valueType
|
||||
|
||||
dataFrameOf(
|
||||
columnOf(*map.keys.toTypedArray()).named(NameValueProperty<*>::name.name),
|
||||
createColumnGuessingType(
|
||||
values = map.values,
|
||||
suggestedType = TypeSuggestion.Use(valueType),
|
||||
unifyNumbers = unifyNumbers,
|
||||
).named(NameValueProperty<*>::value.name),
|
||||
)
|
||||
}
|
||||
|
||||
is JsonNull, null -> DataFrame.emptyOf<AnyNameValueProperty>()
|
||||
|
||||
else -> error("Expected JsonObject, got $record")
|
||||
}
|
||||
}
|
||||
|
||||
val valueColumns = dataFrames.map { it[NameValueProperty<*>::value.name] }
|
||||
val valueColumnSchema = when {
|
||||
// in these cases we can safely combine the columns to get a single column schema
|
||||
valueColumns.all { it is ColumnGroup<*> } || valueColumns.all { it is FrameColumn<*> } ->
|
||||
valueColumns.concat().toDataFrame().schema().columns.values.single()
|
||||
|
||||
// to avoid listification, we create the value columns schema ourselves (https://github.com/Kotlin/dataframe/issues/184)
|
||||
else -> ColumnSchema.Value(valueTypes.commonType())
|
||||
}
|
||||
|
||||
listOf(
|
||||
UnnamedColumn(
|
||||
DataColumn.createFrameColumn(
|
||||
name = VALUE_COLUMN_NAME, // will be erased unless at top-level
|
||||
groups = dataFrames,
|
||||
schema = lazy {
|
||||
DataFrameSchemaImpl(
|
||||
columns = mapOf(
|
||||
NameValueProperty<*>::name.name to ColumnSchema.Value(typeOf<String>()),
|
||||
NameValueProperty<*>::value.name to valueColumnSchema,
|
||||
),
|
||||
)
|
||||
},
|
||||
),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
// Create multiple columns from all the records if they are all objects, merging the objects in essence
|
||||
colType == AnyColType.OBJECTS && !isKeyValue -> {
|
||||
nameGenerator.names.map { colName ->
|
||||
val values = ArrayList<Any?>(records.size)
|
||||
|
||||
records.forEach {
|
||||
when (it) {
|
||||
is JsonObject -> values.add(it[colName])
|
||||
is JsonNull, null -> values.add(null)
|
||||
else -> error("Expected JsonObject, got $it")
|
||||
}
|
||||
}
|
||||
|
||||
val parsed = fromJsonListAnyColumns(
|
||||
records = values,
|
||||
unifyNumbers = unifyNumbers,
|
||||
keyValuePaths = keyValuePaths,
|
||||
jsonPath = jsonPath.append(colName),
|
||||
)
|
||||
when {
|
||||
parsed.columnsCount() == 0 ->
|
||||
DataColumn.createValueColumn(
|
||||
name = colName,
|
||||
values = arrayOfNulls<Any?>(values.size).toList(),
|
||||
type = typeOf<Any?>(),
|
||||
)
|
||||
|
||||
parsed.isSingleUnnamedColumn() ->
|
||||
(parsed.getColumn(0) as UnnamedColumn).col.rename(colName)
|
||||
|
||||
else ->
|
||||
DataColumn.createColumnGroup(colName, parsed.unwrapUnnamedColumns()) as AnyCol
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
else -> error("")
|
||||
}
|
||||
|
||||
return when {
|
||||
columns.isEmpty() -> DataFrame.empty(records.size)
|
||||
|
||||
columns.size == 1 && hasArray && header.isNotEmpty() && columns[0].typeClass == List::class ->
|
||||
columns[0]
|
||||
.cast<List<*>>()
|
||||
.splitInto(*header.toTypedArray())
|
||||
|
||||
else -> columns.toDataFrame()
|
||||
}
|
||||
}
|
||||
|
||||
private fun AnyFrame.isSingleUnnamedColumn() = columnsCount() == 1 && getColumn(0) is UnnamedColumn
|
||||
|
||||
/**
|
||||
* Json to DataFrame converter that creates allows creates `value` and `array` accessors
|
||||
* instead of [Any] columns.
|
||||
* A.k.a. [TypeClashTactic.ARRAY_AND_VALUE_COLUMNS].
|
||||
*
|
||||
* @param records List of json elements to be converted to a [DataFrame].
|
||||
* @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers].
|
||||
* @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[NameValueProperty]>
|
||||
* will be created.
|
||||
* @param header Optional list of column names. If given, [records] will be read like an object with [header] being the keys.
|
||||
* @return [DataFrame] from the given [records].
|
||||
*/
|
||||
internal fun fromJsonListArrayAndValueColumns(
|
||||
records: List<*>,
|
||||
unifyNumbers: Boolean,
|
||||
keyValuePaths: List<JsonPath> = emptyList(),
|
||||
header: List<String> = emptyList(),
|
||||
jsonPath: JsonPath = JsonPath(),
|
||||
): AnyFrame {
|
||||
var hasPrimitive = false
|
||||
var hasArray = false
|
||||
val isKeyValue = keyValuePaths.any { jsonPath.matches(it) }
|
||||
|
||||
// list element type can be JsonObject, JsonArray or primitive
|
||||
// So first, we gather all properties of objects to merge including "array" and "value" if needed
|
||||
// so the resulting type of a property with instances 123, ["abc"], and { "a": 1, "b": 2 } will be
|
||||
// { array: List<String>, value: Int?, a: Int?, b: Int? }
|
||||
// and instances will look like
|
||||
// { "array": [], "value": 123, "a": null, "b": null }
|
||||
|
||||
val nameGenerator = ColumnNameGenerator()
|
||||
records.forEach { record ->
|
||||
when (record) {
|
||||
is JsonObject -> record.entries.forEach {
|
||||
nameGenerator.addIfAbsent(it.key)
|
||||
}
|
||||
|
||||
is JsonArray -> hasArray = true
|
||||
|
||||
is JsonNull, null -> Unit
|
||||
|
||||
is JsonPrimitive -> hasPrimitive = true
|
||||
}
|
||||
}
|
||||
if (records.all { it == null || it is JsonNull }) hasPrimitive = true
|
||||
|
||||
// Add a value column to the collected names if needed
|
||||
val valueColumn = if (hasPrimitive || records.isEmpty()) {
|
||||
nameGenerator.addUnique(VALUE_COLUMN_NAME)
|
||||
} else {
|
||||
null
|
||||
}
|
||||
|
||||
// Add an array column to the collected names if needed
|
||||
val arrayColumn = if (hasArray) {
|
||||
nameGenerator.addUnique(ARRAY_COLUMN_NAME)
|
||||
} else {
|
||||
null
|
||||
}
|
||||
|
||||
// only properties that consist of just objects (or are empty) can be merged to key/value FrameColumns
|
||||
if (isKeyValue && (hasPrimitive || hasArray)) {
|
||||
error("Key value path $jsonPath does not match objects.")
|
||||
}
|
||||
|
||||
// Create columns from the collected names
|
||||
val columns: List<AnyCol> = when {
|
||||
// instead of using the names, generate a single key/value frame column
|
||||
isKeyValue -> {
|
||||
val dataFrames = records.map { record ->
|
||||
when (record) {
|
||||
is JsonObject -> {
|
||||
val map = record.mapValues { (key, value) ->
|
||||
val parsed = fromJsonListArrayAndValueColumns(
|
||||
records = listOf(value),
|
||||
unifyNumbers = unifyNumbers,
|
||||
keyValuePaths = keyValuePaths,
|
||||
jsonPath = jsonPath.append(key),
|
||||
)
|
||||
if (parsed.isSingleUnnamedColumn()) {
|
||||
(parsed.getColumn(0) as UnnamedColumn).col.values.first()
|
||||
} else {
|
||||
parsed.unwrapUnnamedColumns().firstOrNull()
|
||||
}
|
||||
}
|
||||
val valueType =
|
||||
map.values
|
||||
.map { guessValueType(sequenceOf(it), unifyNumbers = unifyNumbers) }
|
||||
.commonType()
|
||||
|
||||
dataFrameOf(
|
||||
columnOf(*map.keys.toTypedArray()).named(NameValueProperty<*>::name.name),
|
||||
createColumnGuessingType(
|
||||
values = map.values,
|
||||
suggestedType = TypeSuggestion.Use(valueType),
|
||||
unifyNumbers = unifyNumbers,
|
||||
).named(NameValueProperty<*>::value.name),
|
||||
)
|
||||
}
|
||||
|
||||
is JsonNull, null -> DataFrame.emptyOf<AnyNameValueProperty>()
|
||||
|
||||
else -> error("Expected JsonObject, got $record")
|
||||
}
|
||||
}
|
||||
|
||||
listOf(
|
||||
UnnamedColumn(
|
||||
DataColumn.createFrameColumn(
|
||||
name = VALUE_COLUMN_NAME, // will be erased unless at top-level
|
||||
groups = dataFrames,
|
||||
schema = lazy {
|
||||
dataFrames.mapNotNull { it.takeIf { it.rowsCount() > 0 }?.schema() }.intersectSchemas()
|
||||
},
|
||||
),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
// generate columns using the collected names
|
||||
else ->
|
||||
nameGenerator.names.map { colName ->
|
||||
when {
|
||||
// Collect primitive values from records into the `value` column if needed
|
||||
colName == valueColumn && (hasPrimitive || records.isEmpty()) -> {
|
||||
val collector: DataCollector<Any?> = createDataCollector(records.size)
|
||||
val nanIndices = mutableListOf<Int>()
|
||||
records.forEachIndexed { i, v ->
|
||||
when (v) {
|
||||
is JsonObject -> collector.add(null)
|
||||
|
||||
is JsonArray -> collector.add(null)
|
||||
|
||||
is JsonNull -> collector.add(null)
|
||||
|
||||
is JsonPrimitive -> {
|
||||
when {
|
||||
v.content == "NaN" -> {
|
||||
nanIndices.add(i)
|
||||
collector.add(null)
|
||||
}
|
||||
|
||||
v.isString -> collector.add(v.content)
|
||||
|
||||
v.booleanOrNull != null -> collector.add(v.boolean)
|
||||
|
||||
v.intOrNull != null -> collector.add(v.int)
|
||||
|
||||
v.longOrNull != null -> collector.add(v.long)
|
||||
|
||||
v.floatOrNull != null -> collector.add(v.float)
|
||||
|
||||
v.doubleOrNull != null -> collector.add(v.double)
|
||||
|
||||
else -> error("Malformed JSON element ${v::class}: $v")
|
||||
}
|
||||
}
|
||||
|
||||
else -> collector.add(v)
|
||||
}
|
||||
}
|
||||
val column = createColumnGuessingType(colName, collector.data, unifyNumbers = unifyNumbers)
|
||||
val res = if (nanIndices.isNotEmpty()) {
|
||||
fun <C> DataColumn<C>.updateNaNs(nanValue: C): DataColumn<C> {
|
||||
var j = 0
|
||||
var nextNanIndex = nanIndices[j]
|
||||
return mapIndexed(column.type) { i, v ->
|
||||
if (i == nextNanIndex) {
|
||||
j++
|
||||
nextNanIndex = if (j < nanIndices.size) nanIndices[j] else -1
|
||||
nanValue
|
||||
} else {
|
||||
v
|
||||
}
|
||||
}
|
||||
}
|
||||
when (column.typeClass) {
|
||||
Double::class -> column.cast<Double?>().updateNaNs(Double.NaN)
|
||||
Float::class -> column.cast<Float?>().updateNaNs(Float.NaN)
|
||||
String::class -> column.cast<String?>().updateNaNs("NaN")
|
||||
else -> column
|
||||
}
|
||||
} else {
|
||||
column
|
||||
}
|
||||
UnnamedColumn(res)
|
||||
}
|
||||
|
||||
// Collect arrays from records into the `array` column if needed
|
||||
colName == arrayColumn && hasArray -> {
|
||||
val values = mutableListOf<Any?>()
|
||||
val startIndices = ArrayList<Int>()
|
||||
records.forEach {
|
||||
startIndices.add(values.size)
|
||||
if (it is JsonArray) values.addAll(it.jsonArray)
|
||||
}
|
||||
val parsed = fromJsonListArrayAndValueColumns(
|
||||
records = values,
|
||||
unifyNumbers = unifyNumbers,
|
||||
keyValuePaths = keyValuePaths,
|
||||
jsonPath = jsonPath.appendArrayWithWildcard(),
|
||||
)
|
||||
|
||||
val res = when {
|
||||
parsed.isSingleUnnamedColumn() -> {
|
||||
val col = (parsed.getColumn(0) as UnnamedColumn).col
|
||||
val elementType = col.type
|
||||
val columnValues =
|
||||
col.values
|
||||
.asList()
|
||||
.splitByIndices(startIndices.asSequence())
|
||||
.toList()
|
||||
DataColumn.createValueColumn(
|
||||
name = colName,
|
||||
values = columnValues,
|
||||
type = List::class.createType(listOf(KTypeProjection.invariant(elementType))),
|
||||
)
|
||||
}
|
||||
|
||||
else -> parsed.unwrapUnnamedColumns().chunked(startIndices, colName)
|
||||
}
|
||||
UnnamedColumn(res)
|
||||
}
|
||||
|
||||
// Collect the current column name as property from the objects in records
|
||||
else -> {
|
||||
val values = ArrayList<Any?>(records.size)
|
||||
records.forEach {
|
||||
when (it) {
|
||||
is JsonObject -> values.add(it[colName])
|
||||
else -> values.add(null)
|
||||
}
|
||||
}
|
||||
|
||||
val parsed = fromJsonListArrayAndValueColumns(
|
||||
records = values,
|
||||
unifyNumbers = unifyNumbers,
|
||||
keyValuePaths = keyValuePaths,
|
||||
jsonPath = jsonPath.append(colName),
|
||||
)
|
||||
when {
|
||||
parsed.columnsCount() == 0 ->
|
||||
DataColumn.createValueColumn(
|
||||
name = colName,
|
||||
values = arrayOfNulls<Any?>(values.size).toList(),
|
||||
type = typeOf<Any?>(),
|
||||
)
|
||||
|
||||
parsed.isSingleUnnamedColumn() ->
|
||||
(parsed.getColumn(0) as UnnamedColumn).col.rename(colName)
|
||||
|
||||
else ->
|
||||
DataColumn.createColumnGroup(colName, parsed.unwrapUnnamedColumns()) as AnyCol
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return when {
|
||||
columns.isEmpty() ->
|
||||
DataFrame.empty(records.size)
|
||||
|
||||
columns.size == 1 && hasArray && header.isNotEmpty() && columns[0].typeClass == List::class ->
|
||||
columns[0]
|
||||
.cast<List<*>>()
|
||||
.splitInto(*header.toTypedArray())
|
||||
|
||||
else ->
|
||||
columns.toDataFrame()
|
||||
}
|
||||
}
|
||||
|
||||
// we need it to check if AnyFrame created by recursive call has single unnamed column,
|
||||
// unnamed column means this column is not created from field of a record [{"value": 1}, {"value": 2}],
|
||||
// but filtered values [1, { ... }, []] -> [1, null, null]
|
||||
// or arrays: [1, { ...}, []] -> [null, null, []]
|
||||
private class UnnamedColumn(val col: DataColumn<Any?>) : DataColumn<Any?> by col
|
||||
|
||||
// region friend module error suppression
|
||||
|
||||
@Suppress("INVISIBLE_REFERENCE")
|
||||
private fun createDataCollector(initCapacity: Int = 0) =
|
||||
org.jetbrains.kotlinx.dataframe.impl.createDataCollector(initCapacity)
|
||||
|
||||
@Suppress("INVISIBLE_REFERENCE")
|
||||
private fun <T> createDataCollector(initCapacity: Int = 0, type: KType) =
|
||||
org.jetbrains.kotlinx.dataframe.impl.createDataCollector<T>(initCapacity, type)
|
||||
|
||||
@Suppress("INVISIBLE_REFERENCE")
|
||||
private fun <T> createColumnGuessingType(
|
||||
name: String,
|
||||
values: Iterable<T>,
|
||||
suggestedType: TypeSuggestion = TypeSuggestion.Infer,
|
||||
defaultValue: T? = null,
|
||||
nullable: Boolean? = null,
|
||||
listifyValues: Boolean = false,
|
||||
allColsMakesColGroup: Boolean = false,
|
||||
unifyNumbers: Boolean = false,
|
||||
) = org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType(
|
||||
name = name,
|
||||
values = values,
|
||||
suggestedType = suggestedType,
|
||||
defaultValue = defaultValue,
|
||||
nullable = nullable,
|
||||
listifyValues = listifyValues,
|
||||
allColsMakesColGroup = allColsMakesColGroup,
|
||||
unifyNumbers = unifyNumbers,
|
||||
)
|
||||
|
||||
@Suppress("INVISIBLE_REFERENCE")
|
||||
private fun <T> createColumnGuessingType(
|
||||
values: Iterable<T>,
|
||||
suggestedType: TypeSuggestion = TypeSuggestion.Infer,
|
||||
defaultValue: T? = null,
|
||||
nullable: Boolean? = null,
|
||||
listifyValues: Boolean = false,
|
||||
allColsMakesColGroup: Boolean = false,
|
||||
unifyNumbers: Boolean = false,
|
||||
) = org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType(
|
||||
values = values,
|
||||
suggestedType = suggestedType,
|
||||
defaultValue = defaultValue,
|
||||
nullable = nullable,
|
||||
listifyValues = listifyValues,
|
||||
allColsMakesColGroup = allColsMakesColGroup,
|
||||
unifyNumbers = unifyNumbers,
|
||||
)
|
||||
|
||||
@Suppress("INVISIBLE_REFERENCE")
|
||||
private fun guessValueType(
|
||||
values: Sequence<Any?>,
|
||||
upperBound: KType? = null,
|
||||
listifyValues: Boolean = false,
|
||||
allColsMakesRow: Boolean = false,
|
||||
unifyNumbers: Boolean = false,
|
||||
) = org.jetbrains.kotlinx.dataframe.impl.guessValueType(
|
||||
values = values,
|
||||
upperBound = upperBound,
|
||||
listifyValues = listifyValues,
|
||||
allColsMakesRow = allColsMakesRow,
|
||||
unifyNumbers = unifyNumbers,
|
||||
)
|
||||
|
||||
@Suppress("INVISIBLE_REFERENCE")
|
||||
private fun <T> List<T>.splitByIndices(startIndices: Sequence<Int>) =
|
||||
org.jetbrains.kotlinx.dataframe.impl.splitByIndices(list = this, startIndices = startIndices)
|
||||
|
||||
@Suppress("INVISIBLE_REFERENCE")
|
||||
private fun Iterable<KType?>.commonType(useStar: Boolean = true) =
|
||||
org.jetbrains.kotlinx.dataframe.impl.commonType(types = this, useStar)
|
||||
|
||||
@Suppress("INVISIBLE_REFERENCE")
|
||||
private fun Iterable<DataFrameSchema>.intersectSchemas() =
|
||||
org.jetbrains.kotlinx.dataframe.impl.schema.intersectSchemas(schemas = this)
|
||||
|
||||
// endregion
|
||||
Vendored
+460
@@ -0,0 +1,460 @@
|
||||
@file:OptIn(ExperimentalSerializationApi::class)
|
||||
|
||||
package org.jetbrains.kotlinx.dataframe.impl.io
|
||||
|
||||
import kotlinx.serialization.ExperimentalSerializationApi
|
||||
import kotlinx.serialization.json.Json
|
||||
import kotlinx.serialization.json.JsonArray
|
||||
import kotlinx.serialization.json.JsonElement
|
||||
import kotlinx.serialization.json.JsonObject
|
||||
import kotlinx.serialization.json.JsonPrimitive
|
||||
import kotlinx.serialization.json.addAll
|
||||
import kotlinx.serialization.json.buildJsonArray
|
||||
import kotlinx.serialization.json.buildJsonObject
|
||||
import kotlinx.serialization.json.encodeToJsonElement
|
||||
import kotlinx.serialization.json.put
|
||||
import kotlinx.serialization.json.putJsonArray
|
||||
import kotlinx.serialization.json.putJsonObject
|
||||
import org.jetbrains.kotlinx.dataframe.AnyCol
|
||||
import org.jetbrains.kotlinx.dataframe.AnyFrame
|
||||
import org.jetbrains.kotlinx.dataframe.ColumnsContainer
|
||||
import org.jetbrains.kotlinx.dataframe.DataColumn
|
||||
import org.jetbrains.kotlinx.dataframe.api.FormattedFrame
|
||||
import org.jetbrains.kotlinx.dataframe.api.indices
|
||||
import org.jetbrains.kotlinx.dataframe.api.isList
|
||||
import org.jetbrains.kotlinx.dataframe.api.rows
|
||||
import org.jetbrains.kotlinx.dataframe.api.schema
|
||||
import org.jetbrains.kotlinx.dataframe.api.take
|
||||
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
|
||||
import org.jetbrains.kotlinx.dataframe.columns.ColumnKind
|
||||
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.DATA
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.IS_FORMATTED
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KIND
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KOTLIN_DATAFRAME
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.METADATA
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NCOL
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.NROW
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.TYPE
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.TYPES
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.VERSION
|
||||
import org.jetbrains.kotlinx.dataframe.io.ARRAY_COLUMN_NAME
|
||||
import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions
|
||||
import org.jetbrains.kotlinx.dataframe.io.CustomEncoder
|
||||
import org.jetbrains.kotlinx.dataframe.io.VALUE_COLUMN_NAME
|
||||
import org.jetbrains.kotlinx.dataframe.jupyter.KotlinNotebookPluginUtils
|
||||
import org.jetbrains.kotlinx.dataframe.name
|
||||
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
|
||||
import org.jetbrains.kotlinx.dataframe.typeClass
|
||||
import java.awt.RenderingHints
|
||||
import java.awt.image.BufferedImage
|
||||
import java.awt.image.ImageObserver
|
||||
import java.io.IOException
|
||||
|
||||
// See docs/serialization_format.md for a description of
|
||||
// serialization versions and format.
|
||||
internal const val SERIALIZATION_VERSION = "2.2.0"
|
||||
|
||||
internal object SerializationKeys {
|
||||
const val DATA = "data"
|
||||
const val METADATA = "metadata"
|
||||
const val KIND = "kind"
|
||||
const val NCOL = "ncol"
|
||||
const val NROW = "nrow"
|
||||
const val VERSION = "\$version"
|
||||
const val COLUMNS = "columns"
|
||||
const val KOTLIN_DATAFRAME = "kotlin_dataframe"
|
||||
const val TYPE = "type"
|
||||
const val TYPES = "types"
|
||||
const val IS_FORMATTED = "is_formatted"
|
||||
}
|
||||
|
||||
private val valueTypes =
|
||||
setOf(Boolean::class, Double::class, Int::class, Float::class, Long::class, Short::class, Byte::class)
|
||||
|
||||
@OptIn(ExperimentalSerializationApi::class)
|
||||
private fun convert(value: Any?): JsonElement =
|
||||
when (value) {
|
||||
is JsonElement -> value
|
||||
is Number -> JsonPrimitive(value)
|
||||
is String -> JsonPrimitive(value)
|
||||
is Char -> JsonPrimitive(value.toString())
|
||||
is Boolean -> JsonPrimitive(value)
|
||||
null -> JsonPrimitive(null)
|
||||
else -> JsonPrimitive(value.toString())
|
||||
}
|
||||
|
||||
internal fun encodeRow(frame: ColumnsContainer<*>, index: Int): JsonObject {
|
||||
val values: Map<String, JsonElement> = frame.columns().associate { col ->
|
||||
col.name to when {
|
||||
col is ColumnGroup<*> -> encodeRow(col, index)
|
||||
|
||||
col is FrameColumn<*> -> encodeFrame(col[index])
|
||||
|
||||
col.isList() -> {
|
||||
col[index]?.let {
|
||||
JsonArray((it as List<*>).map { value -> convert(value) })
|
||||
} ?: JsonPrimitive(null)
|
||||
}
|
||||
|
||||
col.typeClass in valueTypes -> {
|
||||
val v = col[index]
|
||||
convert(v)
|
||||
}
|
||||
|
||||
else -> JsonPrimitive(col[index]?.toString())
|
||||
}
|
||||
}
|
||||
|
||||
if (values.isEmpty()) return buildJsonObject { }
|
||||
return JsonObject(values)
|
||||
}
|
||||
|
||||
internal fun encodeRowWithMetadata(
|
||||
frame: ColumnsContainer<*>,
|
||||
index: Int,
|
||||
rowLimit: Int? = null,
|
||||
customEncoders: List<CustomEncoder> = emptyList(),
|
||||
): JsonElement? {
|
||||
val values: List<Pair<String, JsonElement>> = frame.columns().map { col ->
|
||||
when (col) {
|
||||
is ColumnGroup<*> -> {
|
||||
val schema = col.schema()
|
||||
buildJsonObject {
|
||||
put(DATA, encodeRowWithMetadata(col, index, rowLimit, customEncoders) ?: JsonPrimitive(null))
|
||||
putJsonObject(METADATA) {
|
||||
put(KIND, JsonPrimitive(ColumnKind.Group.toString()))
|
||||
put(COLUMNS, Json.encodeToJsonElement(schema.columns.keys))
|
||||
putJsonArray(TYPES) {
|
||||
addAll(
|
||||
schema.columns.values.map { columnSchema ->
|
||||
createJsonTypeDescriptor(columnSchema)
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
is FrameColumn<*> -> {
|
||||
val data = if (rowLimit == null) {
|
||||
encodeFrameWithMetadata(col[index], null, customEncoders)
|
||||
} else {
|
||||
encodeFrameWithMetadata(col[index].take(rowLimit), rowLimit, customEncoders)
|
||||
}
|
||||
val schema = col.schema.value
|
||||
buildJsonObject {
|
||||
put(DATA, data)
|
||||
putJsonObject(METADATA) {
|
||||
put(KIND, JsonPrimitive(ColumnKind.Frame.toString()))
|
||||
put(COLUMNS, Json.encodeToJsonElement(schema.columns.keys))
|
||||
putJsonArray(TYPES) {
|
||||
addAll(
|
||||
schema.columns.values.map { columnSchema ->
|
||||
createJsonTypeDescriptor(columnSchema)
|
||||
},
|
||||
)
|
||||
}
|
||||
put(NCOL, JsonPrimitive(col[index].columnsCount()))
|
||||
put(NROW, JsonPrimitive(col[index].rowsCount()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
else -> encodeValue(col, index, customEncoders)
|
||||
}.let { col.name to it }
|
||||
}
|
||||
if (values.isEmpty()) return null
|
||||
return JsonObject(values.toMap())
|
||||
}
|
||||
|
||||
internal fun encodeValue(col: AnyCol, index: Int, customEncoders: List<CustomEncoder> = emptyList()): JsonElement {
|
||||
val matchingEncoder = customEncoders.firstOrNull { it.canEncode(col[index]) }
|
||||
|
||||
return when {
|
||||
matchingEncoder != null -> matchingEncoder.encode(col[index])
|
||||
|
||||
col.isList() -> col[index]?.let { list ->
|
||||
val values = (list as List<*>).map { convert(it) }
|
||||
JsonArray(values)
|
||||
} ?: JsonArray(emptyList())
|
||||
|
||||
col.typeClass in valueTypes -> convert(col[index])
|
||||
|
||||
else -> JsonPrimitive(col[index]?.toString())
|
||||
}
|
||||
}
|
||||
|
||||
internal class DataframeConvertableEncoder(
|
||||
private val encoders: List<CustomEncoder>,
|
||||
private val rowLimit: Int? = null,
|
||||
) : CustomEncoder {
|
||||
override fun canEncode(input: Any?): Boolean = isDataframeConvertable(input)
|
||||
|
||||
override fun encode(input: Any?): JsonElement =
|
||||
input?.let {
|
||||
val data = encodeFrameWithMetadata(
|
||||
KotlinNotebookPluginUtils.convertToDataFrame(input),
|
||||
rowLimit,
|
||||
encoders,
|
||||
)
|
||||
val isFormatted = input is FormattedFrame<*>
|
||||
buildJsonObject {
|
||||
put(DATA, data)
|
||||
putJsonObject(METADATA) {
|
||||
put(KIND, JsonPrimitive(CellKind.DataFrameConvertable.toString()))
|
||||
put(IS_FORMATTED, JsonPrimitive(isFormatted))
|
||||
}
|
||||
}
|
||||
} ?: JsonPrimitive(null)
|
||||
}
|
||||
|
||||
internal class BufferedImageEncoder(private val options: Base64ImageEncodingOptions) : CustomEncoder {
|
||||
override fun canEncode(input: Any?): Boolean = input is BufferedImage
|
||||
|
||||
override fun encode(input: Any?): JsonElement =
|
||||
JsonPrimitive(
|
||||
input?.let { image -> encodeBufferedImageAsBase64(image as BufferedImage, options) } ?: "",
|
||||
)
|
||||
|
||||
private fun encodeBufferedImageAsBase64(
|
||||
image: BufferedImage,
|
||||
imageEncodingOptions: Base64ImageEncodingOptions = Base64ImageEncodingOptions(),
|
||||
): String =
|
||||
try {
|
||||
val preparedImage = if (imageEncodingOptions.isLimitSizeOn) {
|
||||
image.resizeKeepingAspectRatio(imageEncodingOptions.imageSizeLimit)
|
||||
} else {
|
||||
image
|
||||
}
|
||||
|
||||
val bytes = if (imageEncodingOptions.isGzipOn) {
|
||||
preparedImage.toByteArray().encodeGzip()
|
||||
} else {
|
||||
preparedImage.toByteArray()
|
||||
}
|
||||
|
||||
bytes.toBase64()
|
||||
} catch (_: IOException) {
|
||||
""
|
||||
}
|
||||
}
|
||||
|
||||
private fun createJsonTypeDescriptor(columnSchema: ColumnSchema): JsonObject =
|
||||
JsonObject(
|
||||
mutableMapOf(KIND to JsonPrimitive(columnSchema.kind.toString())).also {
|
||||
if (columnSchema.kind == ColumnKind.Value) {
|
||||
it[TYPE] = JsonPrimitive(columnSchema.type.toString())
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
internal fun encodeFrameWithMetadata(
|
||||
frame: AnyFrame,
|
||||
rowLimit: Int? = null,
|
||||
customEncoders: List<CustomEncoder> = emptyList(),
|
||||
): JsonArray {
|
||||
val valueColumn = frame.extractValueColumn()
|
||||
val arrayColumn = frame.extractArrayColumn()
|
||||
|
||||
val arraysAreFrames = arrayColumn?.kind() == ColumnKind.Frame
|
||||
|
||||
val data = frame.indices().map { rowIndex ->
|
||||
valueColumn?.get(rowIndex)
|
||||
?: arrayColumn?.get(rowIndex)?.let {
|
||||
if (arraysAreFrames) {
|
||||
encodeFrameWithMetadata(
|
||||
it as AnyFrame,
|
||||
rowLimit,
|
||||
customEncoders,
|
||||
)
|
||||
} else {
|
||||
null
|
||||
}
|
||||
}
|
||||
?: encodeRowWithMetadata(frame, rowIndex, rowLimit, customEncoders)
|
||||
}
|
||||
|
||||
return buildJsonArray { addAll(data.map { convert(it) }) }
|
||||
}
|
||||
|
||||
internal fun AnyFrame.extractValueColumn(): DataColumn<*>? {
|
||||
val allColumns = columns()
|
||||
|
||||
return allColumns.filter { it.name.startsWith(VALUE_COLUMN_NAME) }
|
||||
.takeIf { isPossibleToFindUnnamedColumns }
|
||||
?.maxByOrNull { it.name }
|
||||
?.let { valueCol ->
|
||||
// check that value in this column is not null only when other values are null
|
||||
if (valueCol.kind() != ColumnKind.Value) {
|
||||
null
|
||||
} else {
|
||||
// check that value in this column is not null only when other values are null
|
||||
val isValidValueColumn = rows().all { row ->
|
||||
if (valueCol[row] != null) {
|
||||
allColumns.all { col ->
|
||||
if (col.name != valueCol.name) {
|
||||
col[row] == null
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
||||
if (isValidValueColumn) {
|
||||
valueCol
|
||||
} else {
|
||||
null
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If there is only 1 column, then `isValidValueColumn` always true.
|
||||
// But at the same time, we shouldn't treat dataFrameOf("value")(1,2,3) like an unnamed column
|
||||
// because it was created by the user.
|
||||
internal val AnyFrame.isPossibleToFindUnnamedColumns: Boolean
|
||||
get() = columns().size != 1
|
||||
|
||||
internal fun AnyFrame.extractArrayColumn(): DataColumn<*>? {
|
||||
val allColumns = columns()
|
||||
|
||||
return columns().filter { it.name.startsWith(ARRAY_COLUMN_NAME) }
|
||||
.takeIf { isPossibleToFindUnnamedColumns }
|
||||
?.maxByOrNull { it.name }
|
||||
?.let { arrayCol ->
|
||||
if (arrayCol.kind() == ColumnKind.Group) {
|
||||
null
|
||||
} else {
|
||||
// check that value in this column is not null only when other values are null
|
||||
val isValidArrayColumn = rows().all { row ->
|
||||
if (arrayCol[row] != null) {
|
||||
allColumns.all { col ->
|
||||
if (col.name != arrayCol.name) {
|
||||
col[row] == null
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
||||
if (isValidArrayColumn) {
|
||||
arrayCol
|
||||
} else {
|
||||
null
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
internal fun encodeFrame(frame: AnyFrame): JsonArray {
|
||||
val valueColumn = frame.extractValueColumn()
|
||||
val arrayColumn = frame.extractArrayColumn()
|
||||
|
||||
val arraysAreFrames = arrayColumn?.kind() == ColumnKind.Frame
|
||||
|
||||
val data = frame.indices().map { rowIndex ->
|
||||
when {
|
||||
valueColumn != null -> valueColumn[rowIndex]
|
||||
|
||||
arrayColumn != null -> arrayColumn[rowIndex]?.let {
|
||||
if (arraysAreFrames) {
|
||||
encodeFrame(it as AnyFrame)
|
||||
} else {
|
||||
null
|
||||
}
|
||||
}
|
||||
|
||||
else -> encodeRow(frame, rowIndex)
|
||||
}
|
||||
}
|
||||
|
||||
return buildJsonArray { addAll(data.map { convert(it) }) }
|
||||
}
|
||||
|
||||
internal fun encodeDataFrameWithMetadata(
|
||||
frame: AnyFrame,
|
||||
rowLimit: Int,
|
||||
nestedRowLimit: Int? = null,
|
||||
customEncoders: List<CustomEncoder> = emptyList(),
|
||||
isFormatted: Boolean = false,
|
||||
): JsonObject =
|
||||
buildJsonObject {
|
||||
put(VERSION, JsonPrimitive(SERIALIZATION_VERSION))
|
||||
putJsonObject(METADATA) {
|
||||
putJsonArray(COLUMNS) { addAll(frame.columnNames().map { JsonPrimitive(it) }) }
|
||||
putJsonArray(TYPES) {
|
||||
addAll(
|
||||
frame.schema().columns.values.map { colSchema ->
|
||||
createJsonTypeDescriptor(colSchema)
|
||||
},
|
||||
)
|
||||
}
|
||||
put(NROW, JsonPrimitive(frame.rowsCount()))
|
||||
put(NCOL, JsonPrimitive(frame.columnsCount()))
|
||||
put(IS_FORMATTED, JsonPrimitive(isFormatted))
|
||||
}
|
||||
put(
|
||||
KOTLIN_DATAFRAME,
|
||||
encodeFrameWithMetadata(
|
||||
frame = frame.take(rowLimit),
|
||||
rowLimit = nestedRowLimit,
|
||||
customEncoders = customEncoders,
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
@OptIn(ExperimentalSerializationApi::class)
|
||||
internal fun encodeFrameNoDynamicNestedTables(df: AnyFrame, limit: Int, isFormatted: Boolean): JsonObject =
|
||||
buildJsonObject {
|
||||
put(NROW, df.rowsCount())
|
||||
put(NCOL, df.columnsCount())
|
||||
putJsonArray(COLUMNS) { addAll(df.columnNames()) }
|
||||
put(IS_FORMATTED, JsonPrimitive(isFormatted))
|
||||
put(
|
||||
KOTLIN_DATAFRAME,
|
||||
encodeFrame(df.take(limit)),
|
||||
)
|
||||
}
|
||||
|
||||
// region friend module error suppression
|
||||
|
||||
@Suppress("INVISIBLE_REFERENCE")
|
||||
private object CellKind {
|
||||
val DataFrameConvertable = org.jetbrains.kotlinx.dataframe.columns.CellKind.DataFrameConvertable
|
||||
}
|
||||
|
||||
@Suppress("INVISIBLE_REFERENCE")
|
||||
private fun isDataframeConvertable(dataframeLike: Any?) =
|
||||
KotlinNotebookPluginUtils.isDataframeConvertable(dataframeLike = dataframeLike)
|
||||
|
||||
@Suppress("INVISIBLE_REFERENCE")
|
||||
internal fun BufferedImage.resizeKeepingAspectRatio(
|
||||
maxSize: Int,
|
||||
resultImageType: Int = BufferedImage.TYPE_INT_ARGB,
|
||||
interpolation: Any = RenderingHints.VALUE_INTERPOLATION_NEAREST_NEIGHBOR,
|
||||
renderingQuality: Any = RenderingHints.VALUE_RENDER_QUALITY,
|
||||
antialiasing: Any = RenderingHints.VALUE_ANTIALIAS_ON,
|
||||
observer: ImageObserver? = null,
|
||||
) = org.jetbrains.kotlinx.dataframe.impl.io.resizeKeepingAspectRatio(
|
||||
image = this,
|
||||
maxSize = maxSize,
|
||||
resultImageType = resultImageType,
|
||||
interpolation = interpolation,
|
||||
renderingQuality = renderingQuality,
|
||||
antialiasing = antialiasing,
|
||||
observer = observer,
|
||||
)
|
||||
|
||||
private const val DEFAULT_IMG_FORMAT: String = "png"
|
||||
|
||||
@Suppress("INVISIBLE_REFERENCE")
|
||||
private fun BufferedImage.toByteArray(format: String = DEFAULT_IMG_FORMAT) =
|
||||
org.jetbrains.kotlinx.dataframe.impl.io.toByteArray(image = this, format = format)
|
||||
|
||||
// endregion
|
||||
+19
@@ -0,0 +1,19 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io;
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame;
|
||||
import org.jetbrains.kotlinx.dataframe.annotations.RequiredByIntellijPlugin;
|
||||
|
||||
import java.util.Collections;
|
||||
|
||||
class JsonFacadeForDebugger {
|
||||
/**
|
||||
* utility for rendering dataframe as interactive table in the debugger - it needs json model
|
||||
* Java class easier to discover in the debugger
|
||||
* DO NOT BREAK ABI OF THIS METHOD!!
|
||||
* Keep it for backward compatibility, create a new method if signature must change
|
||||
*/
|
||||
@RequiredByIntellijPlugin
|
||||
static String convertToJson(DataFrame<?> df, int rowLimit, Integer nestedRowLimit) {
|
||||
return JsonKt.toJsonWithMetadata(df, rowLimit, nestedRowLimit, false, Collections.emptyList(), false);
|
||||
}
|
||||
}
|
||||
+484
@@ -0,0 +1,484 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import kotlinx.serialization.ExperimentalSerializationApi
|
||||
import kotlinx.serialization.json.Json
|
||||
import kotlinx.serialization.json.JsonElement
|
||||
import kotlinx.serialization.json.decodeFromStream
|
||||
import org.intellij.lang.annotations.Language
|
||||
import org.jetbrains.kotlinx.dataframe.AnyFrame
|
||||
import org.jetbrains.kotlinx.dataframe.AnyRow
|
||||
import org.jetbrains.kotlinx.dataframe.DataColumn
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.DataRow
|
||||
import org.jetbrains.kotlinx.dataframe.api.FormattedFrame
|
||||
import org.jetbrains.kotlinx.dataframe.api.JsonPath
|
||||
import org.jetbrains.kotlinx.dataframe.api.NameValueProperty
|
||||
import org.jetbrains.kotlinx.dataframe.api.single
|
||||
import org.jetbrains.kotlinx.dataframe.codeGen.AbstractDefaultReadMethod
|
||||
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod
|
||||
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
|
||||
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
|
||||
import org.jetbrains.kotlinx.dataframe.documentation.UnifyingNumbers
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.encodeDataFrameWithMetadata
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.encodeFrame
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.encodeRow
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.readJsonImpl
|
||||
import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic
|
||||
import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ANY_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS
|
||||
import java.io.File
|
||||
import java.io.InputStream
|
||||
import java.net.URL
|
||||
import java.nio.file.Path
|
||||
import kotlin.io.path.writeText
|
||||
import kotlin.reflect.typeOf
|
||||
|
||||
public class JSON(
|
||||
private val typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS,
|
||||
private val keyValuePaths: List<JsonPath> = emptyList(),
|
||||
private val unifyNumbers: Boolean = true,
|
||||
) : SupportedDataFrameFormat {
|
||||
|
||||
override fun readDataFrame(stream: InputStream, header: List<String>): AnyFrame =
|
||||
DataFrame.readJson(
|
||||
stream = stream,
|
||||
header = header,
|
||||
typeClashTactic = typeClashTactic,
|
||||
keyValuePaths = keyValuePaths,
|
||||
unifyNumbers = unifyNumbers,
|
||||
)
|
||||
|
||||
override fun readDataFrame(path: Path, header: List<String>): AnyFrame =
|
||||
DataFrame.readJson(
|
||||
path = path,
|
||||
header = header,
|
||||
keyValuePaths = keyValuePaths,
|
||||
typeClashTactic = typeClashTactic,
|
||||
unifyNumbers = unifyNumbers,
|
||||
)
|
||||
|
||||
override fun acceptsExtension(ext: String): Boolean = ext == "json"
|
||||
|
||||
override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough
|
||||
|
||||
override val testOrder: Int = 10_000
|
||||
|
||||
override fun createDefaultReadMethod(pathRepresentation: String?): DefaultReadDfMethod =
|
||||
DefaultReadJsonMethod(
|
||||
path = pathRepresentation,
|
||||
arguments = MethodArguments()
|
||||
.add(
|
||||
"keyValuePaths",
|
||||
typeOf<List<JsonPath>>(),
|
||||
"listOf(${
|
||||
keyValuePaths.joinToString {
|
||||
"org.jetbrains.kotlinx.dataframe.api.JsonPath(\"\"\"${it.path}\"\"\")"
|
||||
}
|
||||
})",
|
||||
)
|
||||
.add(
|
||||
"typeClashTactic",
|
||||
typeOf<TypeClashTactic>(),
|
||||
"org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.${typeClashTactic.name}",
|
||||
)
|
||||
.add(
|
||||
"unifyNumbers",
|
||||
typeOf<Boolean>(),
|
||||
unifyNumbers.toString(),
|
||||
),
|
||||
)
|
||||
|
||||
/**
|
||||
* Allows the choice of how to handle type clashes when reading a JSON file.
|
||||
* Such as:
|
||||
* ```json
|
||||
* [
|
||||
* { "a": "text" },
|
||||
* { "a": { "b": 2 } },
|
||||
* { "a": [6, 7, 8] }
|
||||
* ]
|
||||
* ```
|
||||
*
|
||||
* [ARRAY_AND_VALUE_COLUMNS] (default) will create a [DataFrame] looking like (including `null` and `[]` values):
|
||||
* ```
|
||||
* ⌌----------------------------------------------⌍
|
||||
* | | a:{b:Int?, value:String?, array:List<Int>}|
|
||||
* |--|-------------------------------------------|
|
||||
* | 0| { b:null, value:"text", array:[] }|
|
||||
* | 1| { b:2, value:null, array:[] }|
|
||||
* | 2| { b:null, value:null, array:[6, 7, 8] }|
|
||||
* ⌎----------------------------------------------⌏
|
||||
* ```
|
||||
* So, for the type clashing argument it will create a [ColumnGroup] with the properties `value`, `array`,
|
||||
* and the unwrapped properties of the objects the property can be.
|
||||
*
|
||||
* [ANY_COLUMNS] will create a [DataFrame] looking like:
|
||||
* ```
|
||||
* ⌌-------------⌍
|
||||
* | | a:Any|
|
||||
* |--|----------|
|
||||
* | 0| "text"|
|
||||
* | 1| { b:2 }|
|
||||
* | 2| [6, 7, 8]|
|
||||
* ⌎-------------⌏
|
||||
* ```
|
||||
*/
|
||||
public enum class TypeClashTactic {
|
||||
ARRAY_AND_VALUE_COLUMNS,
|
||||
ANY_COLUMNS,
|
||||
}
|
||||
}
|
||||
|
||||
internal const val ARRAY_COLUMN_NAME: String = "array"
|
||||
internal const val VALUE_COLUMN_NAME: String = "value"
|
||||
|
||||
/**
|
||||
* @param file Where to fetch the Json as [InputStream] to be converted to a [DataFrame].
|
||||
* @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[NameValueProperty]>
|
||||
* will be created.
|
||||
* @param typeClashTactic How to handle type clashes when reading a JSON file.
|
||||
* @param header Optional list of column names. If given, the file will be read like an object with [header] being the keys.
|
||||
* @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default.
|
||||
* @return [DataFrame] from the given [file].
|
||||
*/
|
||||
public fun DataFrame.Companion.readJson(
|
||||
file: File,
|
||||
header: List<String> = emptyList(),
|
||||
keyValuePaths: List<JsonPath> = emptyList(),
|
||||
typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS,
|
||||
unifyNumbers: Boolean = true,
|
||||
): AnyFrame = DataFrame.readJson(file.toPath(), header, keyValuePaths, typeClashTactic, unifyNumbers)
|
||||
|
||||
/**
|
||||
* @param path Where to fetch the Json as [InputStream] to be converted to a [DataFrame].
|
||||
* @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[NameValueProperty]>
|
||||
* will be created.
|
||||
* @param typeClashTactic How to handle type clashes when reading a JSON file.
|
||||
* @param header Optional list of column names. If given, the file will be read like an object with [header] being the keys.
|
||||
* @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default.
|
||||
* @return [DataFrame] from the given [path].
|
||||
*/
|
||||
public fun DataFrame.Companion.readJson(
|
||||
path: Path,
|
||||
header: List<String> = emptyList(),
|
||||
keyValuePaths: List<JsonPath> = emptyList(),
|
||||
typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS,
|
||||
unifyNumbers: Boolean = true,
|
||||
): AnyFrame = DataFrame.readJson(path.toUri().toURL(), header, keyValuePaths, typeClashTactic, unifyNumbers)
|
||||
|
||||
/**
|
||||
* @param file Where to fetch the Json as [InputStream] to be converted to a [DataRow].
|
||||
* @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[NameValueProperty]>
|
||||
* will be created.
|
||||
* @param typeClashTactic How to handle type clashes when reading a JSON file.
|
||||
* @param header Optional list of column names. If given, the file will be read like an object with [header] being the keys.
|
||||
* @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default.
|
||||
* @return [DataRow] from the given [file].
|
||||
*/
|
||||
public fun DataRow.Companion.readJson(
|
||||
file: File,
|
||||
header: List<String> = emptyList(),
|
||||
keyValuePaths: List<JsonPath> = emptyList(),
|
||||
typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS,
|
||||
unifyNumbers: Boolean = true,
|
||||
): AnyRow = DataFrame.readJson(file.toPath(), header, keyValuePaths, typeClashTactic, unifyNumbers).single()
|
||||
|
||||
/**
|
||||
* @param path Where to fetch the Json as [InputStream] to be converted to a [DataRow].
|
||||
* @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[NameValueProperty]>
|
||||
* will be created.
|
||||
* @param typeClashTactic How to handle type clashes when reading a JSON file.
|
||||
* @param header Optional list of column names. If given, the file will be read like an object with [header] being the keys.
|
||||
* @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default.
|
||||
* @return [DataRow] from the given [path].
|
||||
*/
|
||||
public fun DataRow.Companion.readJson(
|
||||
path: Path,
|
||||
header: List<String> = emptyList(),
|
||||
keyValuePaths: List<JsonPath> = emptyList(),
|
||||
typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS,
|
||||
unifyNumbers: Boolean = true,
|
||||
): AnyRow = DataFrame.readJson(path, header, keyValuePaths, typeClashTactic, unifyNumbers).single()
|
||||
|
||||
/**
|
||||
* @param path URL or file path from where to fetch the Json as [InputStream] to be converted to a [DataFrame].
|
||||
* @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[NameValueProperty]>
|
||||
* will be created.
|
||||
* @param typeClashTactic How to handle type clashes when reading a JSON file.
|
||||
* @param header Optional list of column names. If given, the stream will be read like an object with [header] being the keys.
|
||||
* @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default.
|
||||
* @return [DataFrame] from the given [path].
|
||||
*/
|
||||
public fun DataFrame.Companion.readJson(
|
||||
path: String,
|
||||
header: List<String> = emptyList(),
|
||||
keyValuePaths: List<JsonPath> = emptyList(),
|
||||
typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS,
|
||||
unifyNumbers: Boolean = true,
|
||||
): AnyFrame = DataFrame.readJson(asUrl(path), header, keyValuePaths, typeClashTactic, unifyNumbers)
|
||||
|
||||
/**
|
||||
* @param path URL or file path from where to fetch the Json as [InputStream] to be converted to a [DataRow].
|
||||
* @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[NameValueProperty]>
|
||||
* will be created.
|
||||
* @param typeClashTactic How to handle type clashes when reading a JSON file.
|
||||
* @param header Optional list of column names. If given, the stream will be read like an object with [header] being the keys.
|
||||
* @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default.
|
||||
* @return [DataRow] from the given [path].
|
||||
*/
|
||||
public fun DataRow.Companion.readJson(
|
||||
path: String,
|
||||
header: List<String> = emptyList(),
|
||||
keyValuePaths: List<JsonPath> = emptyList(),
|
||||
typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS,
|
||||
unifyNumbers: Boolean = true,
|
||||
): AnyRow = DataFrame.readJson(path, header, keyValuePaths, typeClashTactic, unifyNumbers).single()
|
||||
|
||||
/**
|
||||
* @param url Where to fetch the Json as [InputStream] to be converted to a [DataFrame].
|
||||
* @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[NameValueProperty]>
|
||||
* will be created.
|
||||
* @param typeClashTactic How to handle type clashes when reading a JSON file.
|
||||
* @param header Optional list of column names. If given, the stream will be read like an object with [header] being the keys.
|
||||
* @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default.
|
||||
* @return [DataFrame] from the given [url].
|
||||
*/
|
||||
public fun DataFrame.Companion.readJson(
|
||||
url: URL,
|
||||
header: List<String> = emptyList(),
|
||||
keyValuePaths: List<JsonPath> = emptyList(),
|
||||
typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS,
|
||||
unifyNumbers: Boolean = true,
|
||||
): AnyFrame = catchHttpResponse(url) { DataFrame.readJson(it, header, keyValuePaths, typeClashTactic, unifyNumbers) }
|
||||
|
||||
/**
|
||||
* @param url Where to fetch the Json as [InputStream] to be converted to a [DataRow].
|
||||
* @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[NameValueProperty]>
|
||||
* will be created.
|
||||
* @param typeClashTactic How to handle type clashes when reading a JSON file.
|
||||
* @param header Optional list of column names. If given, the stream will be read like an object with [header] being the keys.
|
||||
* @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default.
|
||||
* @return [DataRow] from the given [url].
|
||||
*/
|
||||
public fun DataRow.Companion.readJson(
|
||||
url: URL,
|
||||
header: List<String> = emptyList(),
|
||||
keyValuePaths: List<JsonPath> = emptyList(),
|
||||
typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS,
|
||||
unifyNumbers: Boolean = true,
|
||||
): AnyRow = DataFrame.readJson(url, header, keyValuePaths, typeClashTactic, unifyNumbers).single()
|
||||
|
||||
/**
|
||||
* @param stream Json as [InputStream] to be converted to a [DataFrame].
|
||||
* @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[NameValueProperty]>
|
||||
* will be created.
|
||||
* @param typeClashTactic How to handle type clashes when reading a JSON file.
|
||||
* @param header Optional list of column names. If given, [stream] will be read like an object with [header] being the keys.
|
||||
* @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default.
|
||||
* @return [DataFrame] from the given [stream].
|
||||
*/
|
||||
@OptIn(ExperimentalSerializationApi::class)
|
||||
public fun DataFrame.Companion.readJson(
|
||||
stream: InputStream,
|
||||
header: List<String> = emptyList(),
|
||||
keyValuePaths: List<JsonPath> = emptyList(),
|
||||
typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS,
|
||||
unifyNumbers: Boolean = true,
|
||||
): AnyFrame =
|
||||
readJsonImpl(Json.decodeFromStream<JsonElement>(stream), unifyNumbers, header, keyValuePaths, typeClashTactic)
|
||||
|
||||
/**
|
||||
* @param stream Json as [InputStream] to be converted to a [DataRow].
|
||||
* @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[NameValueProperty]>
|
||||
* will be created.
|
||||
* @param typeClashTactic How to handle type clashes when reading a JSON file.
|
||||
* @param header Optional list of column names. If given, [stream] will be read like an object with [header] being the keys.
|
||||
* @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default.
|
||||
* @return [DataRow] from the given [stream].
|
||||
*/
|
||||
public fun DataRow.Companion.readJson(
|
||||
stream: InputStream,
|
||||
header: List<String> = emptyList(),
|
||||
keyValuePaths: List<JsonPath> = emptyList(),
|
||||
typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS,
|
||||
unifyNumbers: Boolean = true,
|
||||
): AnyRow = DataFrame.readJson(stream, header, keyValuePaths, typeClashTactic, unifyNumbers).single()
|
||||
|
||||
/**
|
||||
* @param text Json as [String] to be converted to a [DataFrame].
|
||||
* @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[NameValueProperty]>
|
||||
* will be created.
|
||||
* @param typeClashTactic How to handle type clashes when reading a JSON file.
|
||||
* @param header Optional list of column names. If given, [text] will be read like an object with [header] being the keys.
|
||||
* @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default.
|
||||
* @return [DataFrame] from the given [text].
|
||||
*/
|
||||
public fun DataFrame.Companion.readJsonStr(
|
||||
@Language("json") text: String,
|
||||
header: List<String> = emptyList(),
|
||||
keyValuePaths: List<JsonPath> = emptyList(),
|
||||
typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS,
|
||||
unifyNumbers: Boolean = true,
|
||||
): AnyFrame = readJsonImpl(Json.parseToJsonElement(text), unifyNumbers, header, keyValuePaths, typeClashTactic)
|
||||
|
||||
/**
|
||||
* @param text Json as [String] to be converted to a [DataRow].
|
||||
* @param keyValuePaths List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[NameValueProperty]>
|
||||
* will be created.
|
||||
* @param typeClashTactic How to handle type clashes when reading a JSON file.
|
||||
* @param header Optional list of column names. If given, [text] will be read like an object with [header] being the keys.
|
||||
* @param unifyNumbers Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default.
|
||||
* @return [DataRow] from the given [text].
|
||||
*/
|
||||
public fun DataRow.Companion.readJsonStr(
|
||||
@Language("json") text: String,
|
||||
header: List<String> = emptyList(),
|
||||
keyValuePaths: List<JsonPath> = emptyList(),
|
||||
typeClashTactic: TypeClashTactic = ARRAY_AND_VALUE_COLUMNS,
|
||||
unifyNumbers: Boolean = true,
|
||||
): AnyRow = DataFrame.readJsonStr(text, header, keyValuePaths, typeClashTactic, unifyNumbers).single()
|
||||
|
||||
public fun AnyFrame.toJson(prettyPrint: Boolean = false): String {
|
||||
val json = Json {
|
||||
this.prettyPrint = prettyPrint
|
||||
isLenient = true
|
||||
allowSpecialFloatingPointValues = true
|
||||
}
|
||||
return json.encodeToString(JsonElement.serializer(), encodeFrame(this@toJson))
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the DataFrame to a JSON string representation with additional metadata about serialized data.
|
||||
* It is heavily used to implement some integration features in Kotlin Notebook IntelliJ IDEA plugin.
|
||||
*
|
||||
* @param rowLimit The maximum number of top-level dataframe rows to include in the output JSON.
|
||||
* @param nestedRowLimit The maximum number of nested frame rows to include in the output JSON.
|
||||
* If null, all rows are included.
|
||||
* Applied for each frame column recursively
|
||||
* @param prettyPrint Specifies whether the output JSON should be formatted with indentation and line breaks.
|
||||
* @param customEncoders The options for encoding things like images.
|
||||
* The default is empty list, which indicates that the image is not encoded as Base64.
|
||||
* @param isFormatted Specifies whether the DataFrame should be formatted,
|
||||
* a.k.a. it comes from [FormattedFrame.df] or it contains a
|
||||
* [DataColumn][DataColumn]`<`[FormattedFrame][FormattedFrame]`<*>>` at any depth.
|
||||
* This is just a marker; formatting is applied by the renderer. Defaults to `false`.
|
||||
*
|
||||
* @return The DataFrame converted to a JSON string with metadata.
|
||||
*/
|
||||
public fun AnyFrame.toJsonWithMetadata(
|
||||
rowLimit: Int,
|
||||
nestedRowLimit: Int? = null,
|
||||
prettyPrint: Boolean = false,
|
||||
customEncoders: List<CustomEncoder> = emptyList(),
|
||||
isFormatted: Boolean = false,
|
||||
): String {
|
||||
val json = Json {
|
||||
this.prettyPrint = prettyPrint
|
||||
isLenient = true
|
||||
allowSpecialFloatingPointValues = true
|
||||
}
|
||||
return json.encodeToString(
|
||||
JsonElement.serializer(),
|
||||
encodeDataFrameWithMetadata(
|
||||
frame = this@toJsonWithMetadata,
|
||||
rowLimit = rowLimit,
|
||||
nestedRowLimit = nestedRowLimit,
|
||||
customEncoders = customEncoders,
|
||||
isFormatted = isFormatted,
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Interface for defining a custom encoder. That applied to the value during dataframe JSON serialization
|
||||
*/
|
||||
public interface CustomEncoder {
|
||||
/**
|
||||
* Determines whether this encoder can encode the given input.
|
||||
*
|
||||
* @param input The input object to be checked for suitability.
|
||||
* @return `true` if the input can be encoded, otherwise `false`.
|
||||
*/
|
||||
public fun canEncode(input: Any?): Boolean
|
||||
|
||||
/**
|
||||
* Encodes the provided input into a JSON element.
|
||||
*
|
||||
* @param input The input object to be encoded.
|
||||
* @return A JsonElement representing the encoded input.
|
||||
*/
|
||||
public fun encode(input: Any?): JsonElement
|
||||
}
|
||||
|
||||
internal const val DEFAULT_IMG_SIZE = 600
|
||||
|
||||
/**
|
||||
* Class representing the options for encoding images.
|
||||
*
|
||||
* @property imageSizeLimit The maximum size to which images should be resized. Defaults to the value of DEFAULT_IMG_SIZE.
|
||||
* @property options Bitwise-OR of the [GZIP_ON] and [LIMIT_SIZE_ON] constants. Defaults to [GZIP_ON] or [LIMIT_SIZE_ON].
|
||||
*/
|
||||
public class Base64ImageEncodingOptions(
|
||||
public val imageSizeLimit: Int = DEFAULT_IMG_SIZE,
|
||||
private val options: Int = GZIP_ON or LIMIT_SIZE_ON,
|
||||
) {
|
||||
public val isGzipOn: Boolean
|
||||
get() = options and GZIP_ON == GZIP_ON
|
||||
|
||||
public val isLimitSizeOn: Boolean
|
||||
get() = options and LIMIT_SIZE_ON == LIMIT_SIZE_ON
|
||||
|
||||
public companion object {
|
||||
public const val ALL_OFF: Int = 0
|
||||
public const val GZIP_ON: Int = 1 // 2^0
|
||||
public const val LIMIT_SIZE_ON: Int = 2 // 2^1
|
||||
}
|
||||
}
|
||||
|
||||
public fun AnyRow.toJson(prettyPrint: Boolean = false): String {
|
||||
val json = Json {
|
||||
this.prettyPrint = prettyPrint
|
||||
isLenient = true
|
||||
allowSpecialFloatingPointValues = true
|
||||
}
|
||||
return json.encodeToString(JsonElement.serializer(), encodeRow(df(), index()))
|
||||
}
|
||||
|
||||
public fun AnyFrame.writeJson(file: File, prettyPrint: Boolean = false) {
|
||||
writeJson(file.toPath(), prettyPrint)
|
||||
}
|
||||
|
||||
public fun AnyFrame.writeJson(path: Path, prettyPrint: Boolean = false) {
|
||||
path.writeText(toJson(prettyPrint))
|
||||
}
|
||||
|
||||
public fun AnyFrame.writeJson(path: String, prettyPrint: Boolean = false): Unit = writeJson(File(path), prettyPrint)
|
||||
|
||||
public fun AnyFrame.writeJson(writer: Appendable, prettyPrint: Boolean = false) {
|
||||
writer.append(toJson(prettyPrint))
|
||||
}
|
||||
|
||||
public fun AnyRow.writeJson(file: File, prettyPrint: Boolean = false) {
|
||||
writeJson(file.toPath(), prettyPrint)
|
||||
}
|
||||
|
||||
public fun AnyRow.writeJson(path: Path, prettyPrint: Boolean = false) {
|
||||
path.writeText(toJson(prettyPrint))
|
||||
}
|
||||
|
||||
public fun AnyRow.writeJson(path: String, prettyPrint: Boolean = false) {
|
||||
writeJson(File(path), prettyPrint)
|
||||
}
|
||||
|
||||
public fun AnyRow.writeJson(writer: Appendable, prettyPrint: Boolean = false) {
|
||||
writer.append(toJson(prettyPrint))
|
||||
}
|
||||
|
||||
private const val READ_JSON = "readJson"
|
||||
|
||||
internal class DefaultReadJsonMethod(path: String?, arguments: MethodArguments) :
|
||||
AbstractDefaultReadMethod(
|
||||
path = path,
|
||||
arguments = arguments,
|
||||
methodName = READ_JSON,
|
||||
)
|
||||
+1
@@ -0,0 +1 @@
|
||||
org.jetbrains.kotlinx.dataframe.io.JSON
|
||||
+177
@@ -0,0 +1,177 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import io.kotest.matchers.shouldBe
|
||||
import io.kotest.matchers.string.shouldContain
|
||||
import kotlinx.serialization.json.JsonObject
|
||||
import kotlinx.serialization.json.jsonArray
|
||||
import kotlinx.serialization.json.jsonObject
|
||||
import kotlinx.serialization.json.jsonPrimitive
|
||||
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.BufferedImageEncoder
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.SerializationKeys.KOTLIN_DATAFRAME
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.resizeKeepingAspectRatio
|
||||
import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.ALL_OFF
|
||||
import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.GZIP_ON
|
||||
import org.jetbrains.kotlinx.dataframe.io.Base64ImageEncodingOptions.Companion.LIMIT_SIZE_ON
|
||||
import org.junit.jupiter.params.ParameterizedTest
|
||||
import org.junit.jupiter.params.provider.MethodSource
|
||||
import java.awt.image.BufferedImage
|
||||
import java.io.ByteArrayInputStream
|
||||
import java.io.ByteArrayOutputStream
|
||||
import java.io.File
|
||||
import java.util.Base64
|
||||
import java.util.zip.GZIPInputStream
|
||||
import javax.imageio.ImageIO
|
||||
import kotlin.math.abs
|
||||
|
||||
class ImageSerializationTests {
|
||||
@ParameterizedTest
|
||||
@MethodSource("imageEncodingOptionsToTest")
|
||||
fun `serialize images as base64`(encodingOptions: Base64ImageEncodingOptions?) {
|
||||
val images = readImagesFromResources()
|
||||
val json = encodeImagesAsJson(images, encodingOptions)
|
||||
|
||||
if (encodingOptions == DISABLED) {
|
||||
checkImagesEncodedAsToString(json, images.size)
|
||||
return
|
||||
}
|
||||
|
||||
val decodedImages = decodeImagesFromJson(json, images.size, encodingOptions)
|
||||
|
||||
for ((decodedImage, original) in decodedImages.zip(images)) {
|
||||
val expectedImage = resizeIfNeeded(original, encodingOptions)
|
||||
isImagesIdentical(decodedImage, expectedImage, 2) shouldBe true
|
||||
}
|
||||
}
|
||||
|
||||
private fun readImagesFromResources(): List<BufferedImage> {
|
||||
val dir = File(testResource("imgs").path)
|
||||
|
||||
return dir.listFiles()?.map { file ->
|
||||
try {
|
||||
ImageIO.read(file)
|
||||
} catch (ex: Exception) {
|
||||
throw IllegalArgumentException("Error reading ${file.name}: ${ex.message}")
|
||||
}
|
||||
} ?: emptyList()
|
||||
}
|
||||
|
||||
private fun encodeImagesAsJson(
|
||||
images: List<BufferedImage>,
|
||||
encodingOptions: Base64ImageEncodingOptions?,
|
||||
): JsonObject {
|
||||
val df = dataFrameOf("imgs" to images)
|
||||
val jsonStr = df.toJsonWithMetadata(
|
||||
20,
|
||||
nestedRowLimit = 20,
|
||||
customEncoders = listOfNotNull(encodingOptions?.let { BufferedImageEncoder(encodingOptions) }),
|
||||
)
|
||||
|
||||
return parseJsonStr(jsonStr)
|
||||
}
|
||||
|
||||
private fun checkImagesEncodedAsToString(json: JsonObject, numImgs: Int) {
|
||||
for (i in 0..<numImgs) {
|
||||
val row = json[KOTLIN_DATAFRAME]!!.jsonArray[i].jsonObject
|
||||
val img = row["imgs"]?.jsonPrimitive?.content
|
||||
|
||||
img shouldContain "BufferedImage"
|
||||
}
|
||||
}
|
||||
|
||||
private fun decodeImagesFromJson(
|
||||
json: JsonObject,
|
||||
imgsNum: Int,
|
||||
encodingOptions: Base64ImageEncodingOptions,
|
||||
): List<BufferedImage> {
|
||||
val result = mutableListOf<BufferedImage>()
|
||||
for (i in 0..<imgsNum) {
|
||||
val row = json[KOTLIN_DATAFRAME]!!.jsonArray[i].jsonObject
|
||||
val imgString = row["imgs"]!!.jsonPrimitive.content
|
||||
|
||||
val bytes = decodeBase64Image(imgString, encodingOptions)
|
||||
val decodedImage = createImageFromBytes(bytes)
|
||||
|
||||
result.add(decodedImage)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
private fun decodeBase64Image(imgString: String, encodingOptions: Base64ImageEncodingOptions): ByteArray =
|
||||
when {
|
||||
encodingOptions.isGzipOn -> decompressGzip(Base64.getDecoder().decode(imgString))
|
||||
else -> Base64.getDecoder().decode(imgString)
|
||||
}
|
||||
|
||||
private fun decompressGzip(input: ByteArray): ByteArray =
|
||||
ByteArrayOutputStream().use { byteArrayOutputStream ->
|
||||
GZIPInputStream(input.inputStream()).use { inputStream ->
|
||||
inputStream.copyTo(byteArrayOutputStream)
|
||||
}
|
||||
byteArrayOutputStream.toByteArray()
|
||||
}
|
||||
|
||||
private fun resizeIfNeeded(image: BufferedImage, encodingOptions: Base64ImageEncodingOptions): BufferedImage =
|
||||
when {
|
||||
!encodingOptions.isLimitSizeOn -> image
|
||||
else -> image.resizeKeepingAspectRatio(encodingOptions.imageSizeLimit)
|
||||
}
|
||||
|
||||
private fun createImageFromBytes(bytes: ByteArray): BufferedImage {
|
||||
val bais = ByteArrayInputStream(bytes)
|
||||
return ImageIO.read(bais)
|
||||
}
|
||||
|
||||
private fun isImagesIdentical(img1: BufferedImage, img2: BufferedImage, allowedDelta: Int): Boolean {
|
||||
// First check dimensions
|
||||
if (img1.width != img2.width || img1.height != img2.height) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Then check each pixel
|
||||
for (y in 0 until img1.height) {
|
||||
for (x in 0 until img1.width) {
|
||||
val rgb1 = img1.getRGB(x, y)
|
||||
val rgb2 = img2.getRGB(x, y)
|
||||
|
||||
val r1 = (rgb1 shr 16) and 0xFF
|
||||
val g1 = (rgb1 shr 8) and 0xFF
|
||||
val b1 = rgb1 and 0xFF
|
||||
|
||||
val r2 = (rgb2 shr 16) and 0xFF
|
||||
val g2 = (rgb2 shr 8) and 0xFF
|
||||
val b2 = rgb2 and 0xFF
|
||||
|
||||
val diff = abs(r1 - r2) + abs(g1 - g2) + abs(b1 - b2)
|
||||
|
||||
// If the difference in color components exceed our allowance return false
|
||||
if (diff > allowedDelta) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If no exceeding difference was found, the images are identical within our allowedDelta
|
||||
return true
|
||||
}
|
||||
|
||||
companion object {
|
||||
private val DEFAULT = Base64ImageEncodingOptions()
|
||||
private val GZIP_ON_RESIZE_OFF = Base64ImageEncodingOptions(options = GZIP_ON)
|
||||
private val GZIP_OFF_RESIZE_OFF = Base64ImageEncodingOptions(options = ALL_OFF)
|
||||
private val GZIP_ON_RESIZE_TO_700 =
|
||||
Base64ImageEncodingOptions(imageSizeLimit = 700, options = GZIP_ON or LIMIT_SIZE_ON)
|
||||
private val DISABLED = null
|
||||
|
||||
@JvmStatic
|
||||
fun imageEncodingOptionsToTest(): List<Base64ImageEncodingOptions?> =
|
||||
listOf(
|
||||
DEFAULT,
|
||||
GZIP_ON_RESIZE_OFF,
|
||||
GZIP_OFF_RESIZE_OFF,
|
||||
GZIP_ON_RESIZE_TO_700,
|
||||
null,
|
||||
)
|
||||
}
|
||||
}
|
||||
+1231
File diff suppressed because it is too large
Load Diff
Binary file not shown.
|
After Width: | Height: | Size: 67 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 36 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 14 KiB |
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user