init research

This commit is contained in:
2026-02-08 11:20:43 -10:00
commit bdf064f54d
3041 changed files with 1592200 additions and 0 deletions
+7
View File
@@ -0,0 +1,7 @@
## :dataframe-jdbc
This module, published as `dataframe-jdbc`, contains all logic and tests for DataFrame to be able to work with
JDBC data sources.
See [Read from SQL databases](https://kotlin.github.io/dataframe/readsqldatabases.html) for more information
about how to use it.
+255
View File
@@ -0,0 +1,255 @@
public final class org/jetbrains/kotlinx/dataframe/io/DbConnectionConfig {
public fun <init> (Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)V
public synthetic fun <init> (Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;ZILkotlin/jvm/internal/DefaultConstructorMarker;)V
public final fun copy (Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;
public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;
public fun equals (Ljava/lang/Object;)Z
public final fun getPassword ()Ljava/lang/String;
public final fun getReadOnly ()Z
public final fun getUrl ()Ljava/lang/String;
public final fun getUser ()Ljava/lang/String;
public fun hashCode ()I
public fun toString ()Ljava/lang/String;
}
public final class org/jetbrains/kotlinx/dataframe/io/Jdbc : org/jetbrains/kotlinx/dataframe/io/SupportedCodeGenerationFormat, org/jetbrains/kotlinx/dataframe/io/SupportedDataFrameFormat {
public fun <init> ()V
public fun acceptsExtension (Ljava/lang/String;)Z
public fun acceptsSample (Lorg/jetbrains/kotlinx/dataframe/io/SupportedFormatSample;)Z
public fun createDefaultReadMethod (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethod;
public fun getTestOrder ()I
public fun readCodeForGeneration (Ljava/io/File;Ljava/lang/String;Z)Ljava/lang/String;
public fun readCodeForGeneration (Ljava/io/InputStream;Ljava/lang/String;Z)Ljava/lang/String;
public fun readDataFrame (Ljava/io/File;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public fun readDataFrame (Ljava/io/InputStream;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public fun readDataFrame (Ljava/nio/file/Path;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}
public final class org/jetbrains/kotlinx/dataframe/io/JdbcSchemaKt {
public static final fun buildCodeForDB (Ljava/net/URL;Ljava/lang/String;)Ljava/lang/String;
public static final fun getDatabaseCodeGenReader (Lorg/jetbrains/kotlinx/dataframe/codeGen/CodeGenerator$Companion;)Lkotlin/jvm/functions/Function2;
}
public final class org/jetbrains/kotlinx/dataframe/io/ReadDataFrameSchemaKt {
public static final fun readAllSqlTables (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljava/sql/Connection;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Ljava/util/Map;
public static final fun readAllSqlTables (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljavax/sql/DataSource;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Ljava/util/Map;
public static final fun readAllSqlTables (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Ljava/util/Map;
public static synthetic fun readAllSqlTables$default (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljava/sql/Connection;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Ljava/util/Map;
public static synthetic fun readAllSqlTables$default (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljavax/sql/DataSource;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Ljava/util/Map;
public static synthetic fun readAllSqlTables$default (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Ljava/util/Map;
public static final fun readDataFrameSchema (Ljava/sql/Connection;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
public static final fun readDataFrameSchema (Ljava/sql/ResultSet;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
public static final fun readDataFrameSchema (Ljavax/sql/DataSource;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
public static final fun readDataFrameSchema (Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
public static synthetic fun readDataFrameSchema$default (Ljava/sql/Connection;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
public static synthetic fun readDataFrameSchema$default (Ljavax/sql/DataSource;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
public static synthetic fun readDataFrameSchema$default (Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
public static final fun readResultSet (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljava/sql/ResultSet;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
public static final fun readSqlQuery (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljava/sql/Connection;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
public static final fun readSqlQuery (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljavax/sql/DataSource;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
public static final fun readSqlQuery (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
public static synthetic fun readSqlQuery$default (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljava/sql/Connection;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
public static synthetic fun readSqlQuery$default (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljavax/sql/DataSource;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
public static synthetic fun readSqlQuery$default (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
public static final fun readSqlTable (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljava/sql/Connection;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
public static final fun readSqlTable (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljavax/sql/DataSource;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
public static final fun readSqlTable (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
public static synthetic fun readSqlTable$default (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljava/sql/Connection;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
public static synthetic fun readSqlTable$default (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljavax/sql/DataSource;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
public static synthetic fun readSqlTable$default (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
}
public final class org/jetbrains/kotlinx/dataframe/io/ReadJdbcKt {
public static final fun readAllSqlTables (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;Lkotlin/jvm/functions/Function1;)Ljava/util/Map;
public static final fun readAllSqlTables (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljavax/sql/DataSource;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;Lkotlin/jvm/functions/Function1;)Ljava/util/Map;
public static final fun readAllSqlTables (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;Lkotlin/jvm/functions/Function1;)Ljava/util/Map;
public static synthetic fun readAllSqlTables$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;Lkotlin/jvm/functions/Function1;ILjava/lang/Object;)Ljava/util/Map;
public static synthetic fun readAllSqlTables$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljavax/sql/DataSource;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;Lkotlin/jvm/functions/Function1;ILjava/lang/Object;)Ljava/util/Map;
public static synthetic fun readAllSqlTables$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;Lkotlin/jvm/functions/Function1;ILjava/lang/Object;)Ljava/util/Map;
public static final fun readDataFrame (Ljava/sql/Connection;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readDataFrame (Ljava/sql/ResultSet;Ljava/sql/Connection;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readDataFrame (Ljava/sql/ResultSet;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;Ljava/lang/Integer;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readDataFrame (Ljavax/sql/DataSource;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readDataFrame (Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readDataFrame$default (Ljava/sql/Connection;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readDataFrame$default (Ljava/sql/ResultSet;Ljava/sql/Connection;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readDataFrame$default (Ljava/sql/ResultSet;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;Ljava/lang/Integer;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readDataFrame$default (Ljavax/sql/DataSource;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readDataFrame$default (Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readResultSet (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/ResultSet;Ljava/sql/Connection;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readResultSet (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/ResultSet;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;Ljava/lang/Integer;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readResultSet$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/ResultSet;Ljava/sql/Connection;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readResultSet$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/ResultSet;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;Ljava/lang/Integer;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readSqlQuery (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readSqlQuery (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljavax/sql/DataSource;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readSqlQuery (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readSqlQuery$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readSqlQuery$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljavax/sql/DataSource;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readSqlQuery$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readSqlTable (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readSqlTable (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljavax/sql/DataSource;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readSqlTable (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readSqlTable$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readSqlTable$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljavax/sql/DataSource;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readSqlTable$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}
public abstract class org/jetbrains/kotlinx/dataframe/io/db/DbType {
public fun <init> (Ljava/lang/String;)V
public fun buildDataColumn (Ljava/lang/String;Ljava/util/List;Lkotlin/reflect/KType;Z)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public fun buildSelectTableQueryWithLimit (Ljava/lang/String;Ljava/lang/Integer;)Ljava/lang/String;
public fun buildSqlQueryWithLimit (Ljava/lang/String;I)Ljava/lang/String;
public static synthetic fun buildSqlQueryWithLimit$default (Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;Ljava/lang/String;IILjava/lang/Object;)Ljava/lang/String;
public abstract fun buildTableMetadata (Ljava/sql/ResultSet;)Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;
public fun configureReadStatement (Ljava/sql/PreparedStatement;)V
public abstract fun convertSqlTypeToColumnSchemaValue (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema;
public abstract fun convertSqlTypeToKType (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lkotlin/reflect/KType;
public fun createConnection (Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;)Ljava/sql/Connection;
public fun extractValueFromResultSet (Ljava/sql/ResultSet;ILorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;Lkotlin/reflect/KType;)Ljava/lang/Object;
public final fun getDbTypeInJdbcUrl ()Ljava/lang/String;
public fun getDefaultFetchSize ()I
public fun getDefaultQueryTimeout ()Ljava/lang/Integer;
public abstract fun getDriverClassName ()Ljava/lang/String;
public fun getTableColumnsMetadata (Ljava/sql/ResultSet;)Ljava/util/List;
public fun getTableTypes ()Ljava/util/List;
public abstract fun isSystemTable (Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;)Z
public fun makeCommonSqlToKTypeMapping (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lkotlin/reflect/KType;
public fun quoteIdentifier (Ljava/lang/String;)Ljava/lang/String;
}
public final class org/jetbrains/kotlinx/dataframe/io/db/DuckDb : org/jetbrains/kotlinx/dataframe/io/db/DbType {
public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/io/db/DuckDb;
public fun buildTableMetadata (Ljava/sql/ResultSet;)Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;
public fun convertSqlTypeToColumnSchemaValue (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema;
public fun convertSqlTypeToKType (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lkotlin/reflect/KType;
public fun createConnection (Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;)Ljava/sql/Connection;
public fun getDriverClassName ()Ljava/lang/String;
public fun isSystemTable (Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;)Z
}
public class org/jetbrains/kotlinx/dataframe/io/db/H2 : org/jetbrains/kotlinx/dataframe/io/db/DbType {
public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Companion;
public static final field MODE_MARIADB Ljava/lang/String;
public static final field MODE_MSSQLSERVER Ljava/lang/String;
public static final field MODE_MYSQL Ljava/lang/String;
public static final field MODE_POSTGRESQL Ljava/lang/String;
public fun <init> ()V
public fun <init> (Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)V
public fun <init> (Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode;)V
public synthetic fun <init> (Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode;ILkotlin/jvm/internal/DefaultConstructorMarker;)V
public fun buildSqlQueryWithLimit (Ljava/lang/String;I)Ljava/lang/String;
public fun buildTableMetadata (Ljava/sql/ResultSet;)Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;
public fun convertSqlTypeToColumnSchemaValue (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema;
public fun convertSqlTypeToKType (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lkotlin/reflect/KType;
public fun getDriverClassName ()Ljava/lang/String;
public final fun getMode ()Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode;
public fun isSystemTable (Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;)Z
}
public final class org/jetbrains/kotlinx/dataframe/io/db/H2$Companion {
}
public final class org/jetbrains/kotlinx/dataframe/io/db/H2$Mode : java/lang/Enum {
public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode$Companion;
public static final field MariaDb Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode;
public static final field MsSqlServer Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode;
public static final field MySql Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode;
public static final field PostgreSql Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode;
public static final field Regular Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode;
public static fun getEntries ()Lkotlin/enums/EnumEntries;
public final fun getValue ()Ljava/lang/String;
public final fun toDbType ()Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;
public static fun valueOf (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode;
public static fun values ()[Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode;
}
public final class org/jetbrains/kotlinx/dataframe/io/db/H2$Mode$Companion {
public final fun fromDbType (Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode;
public final fun fromValue (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode;
}
public final class org/jetbrains/kotlinx/dataframe/io/db/MariaDb : org/jetbrains/kotlinx/dataframe/io/db/DbType {
public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/io/db/MariaDb;
public fun buildTableMetadata (Ljava/sql/ResultSet;)Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;
public fun convertSqlTypeToColumnSchemaValue (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema;
public fun convertSqlTypeToKType (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lkotlin/reflect/KType;
public fun getDriverClassName ()Ljava/lang/String;
public fun isSystemTable (Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;)Z
public fun quoteIdentifier (Ljava/lang/String;)Ljava/lang/String;
}
public final class org/jetbrains/kotlinx/dataframe/io/db/MsSql : org/jetbrains/kotlinx/dataframe/io/db/DbType {
public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/io/db/MsSql;
public fun buildSqlQueryWithLimit (Ljava/lang/String;I)Ljava/lang/String;
public fun buildTableMetadata (Ljava/sql/ResultSet;)Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;
public fun convertSqlTypeToColumnSchemaValue (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema;
public fun convertSqlTypeToKType (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lkotlin/reflect/KType;
public fun getDriverClassName ()Ljava/lang/String;
public fun isSystemTable (Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;)Z
public fun quoteIdentifier (Ljava/lang/String;)Ljava/lang/String;
}
public final class org/jetbrains/kotlinx/dataframe/io/db/MySql : org/jetbrains/kotlinx/dataframe/io/db/DbType {
public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/io/db/MySql;
public fun buildTableMetadata (Ljava/sql/ResultSet;)Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;
public fun convertSqlTypeToColumnSchemaValue (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema;
public fun convertSqlTypeToKType (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lkotlin/reflect/KType;
public fun getDriverClassName ()Ljava/lang/String;
public fun isSystemTable (Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;)Z
public fun quoteIdentifier (Ljava/lang/String;)Ljava/lang/String;
}
public final class org/jetbrains/kotlinx/dataframe/io/db/PostgreSql : org/jetbrains/kotlinx/dataframe/io/db/DbType {
public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/io/db/PostgreSql;
public fun buildTableMetadata (Ljava/sql/ResultSet;)Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;
public fun convertSqlTypeToColumnSchemaValue (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema;
public fun convertSqlTypeToKType (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lkotlin/reflect/KType;
public fun getDriverClassName ()Ljava/lang/String;
public fun isSystemTable (Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;)Z
public fun quoteIdentifier (Ljava/lang/String;)Ljava/lang/String;
}
public final class org/jetbrains/kotlinx/dataframe/io/db/Sqlite : org/jetbrains/kotlinx/dataframe/io/db/DbType {
public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/io/db/Sqlite;
public fun buildTableMetadata (Ljava/sql/ResultSet;)Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;
public fun convertSqlTypeToColumnSchemaValue (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema;
public fun convertSqlTypeToKType (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lkotlin/reflect/KType;
public fun createConnection (Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;)Ljava/sql/Connection;
public fun getDriverClassName ()Ljava/lang/String;
public fun isSystemTable (Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;)Z
}
public final class org/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata {
public fun <init> (Ljava/lang/String;Ljava/lang/String;IILjava/lang/String;Z)V
public synthetic fun <init> (Ljava/lang/String;Ljava/lang/String;IILjava/lang/String;ZILkotlin/jvm/internal/DefaultConstructorMarker;)V
public final fun copy (Ljava/lang/String;Ljava/lang/String;IILjava/lang/String;Z)Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;
public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;Ljava/lang/String;Ljava/lang/String;IILjava/lang/String;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;
public fun equals (Ljava/lang/Object;)Z
public final fun getJavaClassName ()Ljava/lang/String;
public final fun getJdbcType ()I
public final fun getName ()Ljava/lang/String;
public final fun getSize ()I
public final fun getSqlTypeName ()Ljava/lang/String;
public fun hashCode ()I
public final fun isNullable ()Z
public fun toString ()Ljava/lang/String;
}
public final class org/jetbrains/kotlinx/dataframe/io/db/TableMetadata {
public fun <init> (Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V
public final fun copy (Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;
public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;
public fun equals (Ljava/lang/Object;)Z
public final fun getCatalogue ()Ljava/lang/String;
public final fun getName ()Ljava/lang/String;
public final fun getSchemaName ()Ljava/lang/String;
public fun hashCode ()I
public fun toString ()Ljava/lang/String;
}
public final class org/jetbrains/kotlinx/dataframe/io/db/UtilKt {
public static final fun driverClassNameFromUrl (Ljava/lang/String;)Ljava/lang/String;
public static final fun extractDBTypeFromConnection (Ljava/sql/Connection;)Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;
public static final fun extractDBTypeFromUrl (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;
}
+42
View File
@@ -0,0 +1,42 @@
plugins {
with(convention.plugins) {
alias(kotlinJvm8)
}
with(libs.plugins) {
alias(publisher)
alias(binary.compatibility.validator)
}
}
group = "org.jetbrains.kotlinx"
dependencies {
api(projects.core)
compileOnly(libs.duckdb.jdbc)
compileOnly(libs.sqlite)
implementation(libs.kotlinLogging)
testImplementation(libs.mariadb)
testImplementation(libs.sqlite)
testImplementation(libs.postgresql)
testImplementation(libs.mysql)
testImplementation(libs.h2db)
testImplementation(libs.mssql)
testImplementation(libs.junit)
testImplementation(libs.sl4jsimple)
testImplementation(libs.jts.core)
testImplementation(libs.duckdb.jdbc)
testImplementation(projects.dataframeJson)
testImplementation(libs.kotestAssertions) {
exclude("org.jetbrains.kotlin", "kotlin-stdlib-jdk8")
}
testImplementation(libs.hikaricp)
}
kotlinPublications {
publication {
publicationName = "dataframeJDBC"
artifactId = project.name
description = "JDBC support for Kotlin DataFrame"
packageName = artifactId
}
}
@@ -0,0 +1,93 @@
package org.jetbrains.kotlinx.dataframe.io
/**
* Represents the configuration for an internally managed JDBC database connection.
*
* This class defines connection parameters used by the library to create a `Connection`
* when the user does not provide one explicitly.
* It is designed for safe, read-only access by default.
*
* __NOTE:__ Connections created using this configuration are managed entirely by the library.
* Users do not have access to the underlying `Connection` instance and cannot commit or close it manually.
*
* ### Read-Only Mode Behavior:
*
* When [readOnly] is `true` (default), the connection operates in read-only mode with:
* - `Connection.setReadOnly(true)`
* - `Connection.setAutoCommit(false)`
* - automatic `rollback()` at the end of execution
*
* When [readOnly] is `false`, the connection uses JDBC defaults (usually read-write),
* but the library still rejects any queries that appear to modify data
* (e.g. contain `INSERT`, `UPDATE`, `DELETE`, etc.).
*
* ### Examples:
*
* ```kotlin
* // Safe read-only connection (default)
* val config = DbConnectionConfig("jdbc:sqlite::memory:")
* val df = DataFrame.readSqlQuery(config, "SELECT * FROM books")
*
* // Use default JDBC connection settings (still protected against mutations)
* val config = DbConnectionConfig(
* url = "jdbc:sqlite::memory:",
* readOnly = false
* )
* ```
*
* @property [url] The JDBC URL of the database, e.g., `"jdbc:postgresql://localhost:5432/mydb"`.
* Must follow the standard format: `jdbc:subprotocol:subname`.
*
* @property [user] The username used for authentication.
* Optional, default is an empty string.
*
* @property [password] The password used for authentication.
* Optional, default is an empty string.
*
* @property [readOnly] If `true` (default), enables read-only mode. If `false`, uses JDBC defaults
* but still prevents data-modifying queries. See class documentation for details.
*/
public class DbConnectionConfig(
public val url: String,
public val user: String = "",
public val password: String = "",
public val readOnly: Boolean = true,
) {
override fun equals(other: Any?): Boolean {
if (this === other) return true
if (other !is DbConnectionConfig) return false
if (url != other.url) return false
if (user != other.user) return false
if (password != other.password) return false
if (readOnly != other.readOnly) return false
return true
}
override fun hashCode(): Int {
var result = url.hashCode()
result = 31 * result + user.hashCode()
result = 31 * result + password.hashCode()
result = 31 * result + readOnly.hashCode()
return result
}
override fun toString(): String = "DbConnectionConfig(url='$url', user='$user', password='***', readOnly=$readOnly)"
/**
* Creates a copy of this configuration with the option to override specific properties.
*
* @param url The JDBC URL. If not specified, uses the current value.
* @param user The username. If not specified, uses the current value.
* @param password The password. If not specified, uses the current value.
* @param readOnly The read-only flag. If not specified, uses the current value.
* @return A new [DbConnectionConfig] instance with the specified changes.
*/
public fun copy(
url: String = this.url,
user: String = this.user,
password: String = this.password,
readOnly: Boolean = this.readOnly,
): DbConnectionConfig = DbConnectionConfig(url, user, password, readOnly)
}
@@ -0,0 +1,56 @@
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.codeGen.AbstractDefaultReadMethod
import org.jetbrains.kotlinx.dataframe.codeGen.Code
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod
import java.io.File
import java.io.InputStream
import java.nio.file.Path
// TODO: https://github.com/Kotlin/dataframe/issues/450
public class Jdbc :
SupportedCodeGenerationFormat,
SupportedDataFrameFormat {
public override fun readDataFrame(stream: InputStream, header: List<String>): AnyFrame = DataFrame.readJDBC(stream)
public override fun readDataFrame(path: Path, header: List<String>): AnyFrame = DataFrame.readJDBC(path)
override fun readCodeForGeneration(
stream: InputStream,
name: String,
generateHelperCompanionObject: Boolean,
): Code {
TODO("Not yet implemented")
}
override fun readCodeForGeneration(file: File, name: String, generateHelperCompanionObject: Boolean): Code {
TODO("Not yet implemented")
}
override fun acceptsExtension(ext: String): Boolean = ext == "jdbc"
override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough
override val testOrder: Int = 40000
override fun createDefaultReadMethod(pathRepresentation: String?): DefaultReadDfMethod =
DefaultReadJdbcMethod(pathRepresentation)
}
private fun DataFrame.Companion.readJDBC(stream: File): DataFrame<*> {
TODO("Not yet implemented")
}
private fun DataFrame.Companion.readJDBC(path: Path): DataFrame<*> {
TODO("Not yet implemented")
}
private fun DataFrame.Companion.readJDBC(stream: InputStream): DataFrame<*> {
TODO("Not yet implemented")
}
internal class DefaultReadJdbcMethod(path: String?) : AbstractDefaultReadMethod(path, MethodArguments.EMPTY, READ_JDBC)
private const val READ_JDBC = "readJDBC"
@@ -0,0 +1,561 @@
package org.jetbrains.kotlinx.dataframe.io.db
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.api.Infer
import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig
import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import java.math.BigDecimal
import java.sql.Blob
import java.sql.Clob
import java.sql.Connection
import java.sql.DatabaseMetaData
import java.sql.DriverManager
import java.sql.NClob
import java.sql.PreparedStatement
import java.sql.Ref
import java.sql.ResultSet
import java.sql.ResultSetMetaData
import java.sql.RowId
import java.sql.SQLXML
import java.sql.Time
import java.sql.Timestamp
import java.sql.Types
import java.time.LocalDateTime
import java.time.OffsetDateTime
import java.time.OffsetTime
import java.util.Date
import java.util.UUID
import kotlin.reflect.KClass
import kotlin.reflect.KType
import kotlin.reflect.full.createType
import kotlin.reflect.full.isSupertypeOf
import kotlin.reflect.full.safeCast
import kotlin.reflect.full.starProjectedType
/**
* The `DbType` class represents a database type used for reading dataframe from the database.
*
* @property [dbTypeInJdbcUrl] The name of the database as specified in the JDBC URL.
*/
public abstract class DbType(public val dbTypeInJdbcUrl: String) {
/**
* Represents the JDBC driver class name for a given database type.
*
* NOTE: It's important for usage in dataframe-gradle-plugin for force class loading.
*
* @return The JDBC driver class name as a [String].
*/
public abstract val driverClassName: String
/**
* The table type(s) (`TABLE_TYPE`) of ordinary tables in the SQL database, used by
* [readAllSqlTables], and [readAllSqlTables] as a filter when querying the database
* for all the tables it has using [DatabaseMetaData.getTables].
*
* This is usually "TABLE" or "BASE TABLE", which is what [tableTypes] is set to by default,
* but it can be overridden to any custom list of table types, or `null` to let the JDBC integration
* return all types of tables.
*
* See [DatabaseMetaData.getTableTypes] for all supported table types of your specific database.
*/
public open val tableTypes: List<String>? = listOf("TABLE", "BASE TABLE")
/**
* Specifies the default batch size for fetching rows from the database during query execution.
*
* This property determines how many rows are fetched in a single batch from the database.
* A proper fetch size can improve performance by reducing the number of network round-trips required
* when handling large result sets.
*
* Value is set to 1000 by default, but it can be overridden based on database-specific requirements
* or performance considerations.
*/
public open val defaultFetchSize: Int = 1000
/**
* Specifies the default timeout in seconds for database queries.
*
* If set to `null`, no timeout is applied, allowing queries to run indefinitely.
* This property can be used to set a default query timeout for the database type,
* which can help manage long-running queries.
*/
public open val defaultQueryTimeout: Int? = null // null = no timeout
/**
* Returns a [ColumnSchema] produced from [tableColumnMetadata].
*/
public abstract fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema?
/**
* Checks if the given table name is a system table for the specified database type.
*
* @param [tableMetadata] the table object representing the table from the database.
* @return True if the table is a system table for the specified database type, false otherwise.
*/
public abstract fun isSystemTable(tableMetadata: TableMetadata): Boolean
/**
* Builds the table metadata based on the database type and the ResultSet from the query.
*
* @param [tables] the ResultSet containing the table's meta-information.
* @return the TableMetadata object representing the table metadata.
*/
public abstract fun buildTableMetadata(tables: ResultSet): TableMetadata
/**
* Converts SQL data type to a Kotlin data type.
*
* @param [tableColumnMetadata] The metadata of the table column.
* @return The corresponding Kotlin data type, or null if no mapping is found.
*/
public abstract fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType?
/**
* Builds a SELECT query for reading from a table.
*
* @param [tableName] the name of the table to query.
* @param [limit] the maximum number of rows to retrieve. If 0 or negative, no limit is applied.
* @return the SQL query string.
*/
public open fun buildSelectTableQueryWithLimit(tableName: String, limit: Int?): String {
require(tableName.isNotBlank()) { "Table name cannot be blank" }
val quotedTableName = quoteIdentifier(tableName)
return if (limit != null && limit > 0) {
buildSqlQueryWithLimit("SELECT * FROM $quotedTableName", limit)
} else {
"SELECT * FROM $quotedTableName"
}
}
/**
* Configures the provided `PreparedStatement` for optimized read operations.
*
* This method sets the fetch size for efficient streaming, applies a query timeout if specified,
* and configures the fetch direction to forward-only for better performance in read-only operations.
*
* @param statement the `PreparedStatement` to be configured
*/
public open fun configureReadStatement(statement: PreparedStatement) {
// Set fetch size for better streaming performance
statement.fetchSize = defaultFetchSize
defaultQueryTimeout?.let {
statement.queryTimeout = it
}
// Set the fetch direction (forward-only for read-only operations)
statement.fetchDirection = ResultSet.FETCH_FORWARD
}
/**
* Quotes an identifier (table or column name) according to database-specific rules.
*
* Examples:
* - PostgreSQL: "tableName" or "schema"."table"
* - MySQL: `tableName` or `schema`.`table`
* - MS SQL: `[tableName]` or `[schema].[table]`
* - SQLite/H2: no quotes for simple names
*
* @param [name] the identifier to quote (can contain dots for schema.table).
* @return the quoted identifier.
*/
public open fun quoteIdentifier(name: String): String {
require(name.isNotBlank()) { "Identifier cannot be blank" }
// Default: no quoting (works for SQLite, H2, simple names)
return name
}
/**
* Constructs a SQL query with a limit clause.
*
* @param sqlQuery The original SQL query.
* @param limit The maximum number of rows to retrieve from the query. Default is 1.
* @return A new SQL query with the limit clause added.
*/
public open fun buildSqlQueryWithLimit(sqlQuery: String, limit: Int = 1): String = "$sqlQuery LIMIT $limit"
/**
* Creates a database connection using the provided configuration.
* This method is only called when working with [DbConnectionConfig] (internally managed connections).
*
* Some databases (like [Sqlite]) require read-only mode to be set during connection creation
* rather than after the connection is established.
*
* @param [dbConfig] The database configuration containing URL, credentials, and read-only flag.
* @return A configured [Connection] instance.
*/
public open fun createConnection(dbConfig: DbConnectionConfig): Connection {
val connection = DriverManager.getConnection(dbConfig.url, dbConfig.user, dbConfig.password)
if (dbConfig.readOnly) {
connection.isReadOnly = true
}
return connection
}
/**
* Extracts a value from the ResultSet for the given column.
* This method can be overridden by custom database types to provide specialized parsing logic.
*
* @param [rs] the ResultSet to read from
* @param [columnIndex] zero-based column index
* @param [columnMetadata] metadata for the column
* @param [kType] the Kotlin type for this column
* @return the extracted value, or null
*/
public open fun extractValueFromResultSet(
rs: ResultSet,
columnIndex: Int,
columnMetadata: TableColumnMetadata,
kType: KType,
): Any? =
try {
rs.getObject(columnIndex + 1)
// TODO: add a special handler for Blob via Streams
} catch (_: Throwable) {
// TODO: expand for all the types like in generateKType function
if (kType.isSupertypeOf(String::class.starProjectedType)) {
rs.getString(columnIndex + 1)
} else {
rs.getString(columnIndex + 1)
}
}
/**
* Builds a single DataColumn with proper type handling.
* Accepts a mutable list to allow efficient post-processing.
*/
public open fun buildDataColumn(
name: String,
values: MutableList<Any?>,
kType: KType,
inferNullability: Boolean,
): DataColumn<*> {
val correctedValues = postProcessColumnValues(values, kType)
return DataColumn.createValueColumn(
name = name,
values = correctedValues,
infer = convertNullabilityInference(inferNullability),
type = kType,
)
}
private fun convertNullabilityInference(inferNullability: Boolean) =
if (inferNullability) Infer.Nulls else Infer.None
/**
* Processes the column values retrieved from the database and performs transformations based on the provided
* Kotlin type and column metadata. The method allows for custom post-processing logic, such as handling
* specific database column types, including arrays.
*
* @param values the list of raw values retrieved from the database for the column.
* @param kType the Kotlin type that the column values should be transformed to.
* @return a list of processed column values, with transformations applied where necessary, or the original list if no transformation is needed.
*/
private fun postProcessColumnValues(values: MutableList<Any?>, kType: KType): List<Any?> =
when {
/* EXAMPLE: columnMetadata.sqlTypeName == "MY_CUSTOM_ARRAY" -> {
values.map { /* custom transformation */ }
} */
kType.classifier == Array::class -> {
handleArrayValues(values)
}
else -> values
}
/**
* Converts SQL Array objects to strongly-typed arrays.
*
* Extracts arrays from SQL Array objects and converts them to a consistent type
* if all elements share the same type. Returns original arrays if types vary.
*
* @param values raw values containing SQL Array objects
* @return list of consistently typed arrays, or original arrays if no common type exists
*/
private fun handleArrayValues(values: MutableList<Any?>): List<Any> {
// Intermediate variable for the first mapping
val sqlArrays = values.mapNotNull {
(it as? java.sql.Array)?.array?.let { array -> array as? Array<*> }
}
// Flatten the arrays to iterate through all elements and filter out null values, then map to component types
val allElementTypes = sqlArrays
.flatMap { array ->
(array.javaClass.componentType?.kotlin?.let { listOf(it) } ?: emptyList())
} // Get the component type of each array and convert it to a Kotlin class, if available
// Find distinct types and ensure there's only one distinct type
val commonElementType = allElementTypes
.distinct() // Get unique element types
.singleOrNull() // Ensure there's only one unique element type, otherwise return null
?: Any::class // Fallback to Any::class if multiple distinct types or no elements found
return if (commonElementType != Any::class) {
sqlArrays.map { castArray(it, commonElementType).toTypedArray() }
} else {
sqlArrays
}
}
/** Utility function to cast arrays based on the type of elements */
private fun <T : Any> castArray(array: Array<*>, elementType: KClass<T>): List<T> =
array.mapNotNull { elementType.safeCast(it) }
/**
* Creates a mapping between common SQL types and their corresponding KTypes.
*
* @param tableColumnMetadata The metadata of the table column.
* @return The KType associated with the SQL type or a default type if no mapping is found.
*/
public open fun makeCommonSqlToKTypeMapping(tableColumnMetadata: TableColumnMetadata): KType {
val jdbcTypeToKTypeMapping = mapOf(
Types.BIT to Boolean::class,
Types.TINYINT to Int::class,
Types.SMALLINT to Int::class,
Types.INTEGER to Int::class,
Types.BIGINT to Long::class,
Types.FLOAT to Float::class,
Types.REAL to Float::class,
Types.DOUBLE to Double::class,
Types.NUMERIC to BigDecimal::class,
Types.DECIMAL to BigDecimal::class,
Types.CHAR to String::class,
Types.VARCHAR to String::class,
Types.LONGVARCHAR to String::class,
Types.DATE to Date::class,
Types.TIME to Time::class,
Types.TIMESTAMP to Timestamp::class,
Types.BINARY to ByteArray::class,
Types.VARBINARY to ByteArray::class,
Types.LONGVARBINARY to ByteArray::class,
Types.NULL to String::class,
Types.JAVA_OBJECT to Any::class,
Types.DISTINCT to Any::class,
Types.STRUCT to Any::class,
Types.ARRAY to Array::class,
Types.BLOB to ByteArray::class,
Types.CLOB to Clob::class,
Types.REF to Ref::class,
Types.DATALINK to Any::class,
Types.BOOLEAN to Boolean::class,
Types.ROWID to RowId::class,
Types.NCHAR to String::class,
Types.NVARCHAR to String::class,
Types.LONGNVARCHAR to String::class,
Types.NCLOB to NClob::class,
Types.SQLXML to SQLXML::class,
Types.REF_CURSOR to Ref::class,
Types.TIME_WITH_TIMEZONE to OffsetTime::class,
Types.TIMESTAMP_WITH_TIMEZONE to OffsetDateTime::class,
)
fun determineKotlinClass(tableColumnMetadata: TableColumnMetadata): KClass<*> =
when {
tableColumnMetadata.jdbcType == Types.OTHER -> when (tableColumnMetadata.javaClassName) {
"[B" -> ByteArray::class
else -> Any::class
}
tableColumnMetadata.javaClassName == "[B" -> ByteArray::class
tableColumnMetadata.javaClassName == "java.sql.Blob" -> Blob::class
tableColumnMetadata.jdbcType == Types.TIMESTAMP &&
tableColumnMetadata.javaClassName == "java.time.LocalDateTime" -> LocalDateTime::class
tableColumnMetadata.jdbcType == Types.BINARY &&
tableColumnMetadata.javaClassName == "java.util.UUID" -> UUID::class
tableColumnMetadata.jdbcType == Types.REAL &&
tableColumnMetadata.javaClassName == "java.lang.Double" -> Double::class
tableColumnMetadata.jdbcType == Types.FLOAT &&
tableColumnMetadata.javaClassName == "java.lang.Double" -> Double::class
tableColumnMetadata.jdbcType == Types.NUMERIC &&
tableColumnMetadata.javaClassName == "java.lang.Double" -> Double::class
// Force BIGINT to always be Long, regardless of javaClassName
// Some JDBC drivers (e.g., MariaDB) may report Integer for small BIGINT values
// TODO: tableColumnMetadata.jdbcType == Types.BIGINT -> Long::class
else -> jdbcTypeToKTypeMapping[tableColumnMetadata.jdbcType] ?: String::class
}
fun createArrayTypeIfNeeded(kClass: KClass<*>, isNullable: Boolean): KType =
if (kClass == Array::class) {
val typeParam = kClass.typeParameters[0].createType()
kClass.createType(
arguments = listOf(kotlin.reflect.KTypeProjection.invariant(typeParam)),
nullable = isNullable,
)
} else {
kClass.createType(nullable = isNullable)
}
val kClass: KClass<*> = determineKotlinClass(tableColumnMetadata)
val kType = createArrayTypeIfNeeded(kClass, tableColumnMetadata.isNullable)
return kType
}
/**
* Retrieves column metadata from a JDBC ResultSet.
*
* This method reads column metadata from [ResultSetMetaData] with graceful fallbacks
* for JDBC drivers that throw [java.sql.SQLFeatureNotSupportedException] for certain methods
* (e.g., Apache Hive).
*
* Fallback behavior for unsupported methods:
* - `getColumnName()` → `getColumnLabel()` → `"column_N"`
* - `getTableName()` → extract from column name if contains '.' → `null`
* - `isNullable()` → [DatabaseMetaData.getColumns] → `true` (assume nullable)
* - `getColumnTypeName()` → `"OTHER"`
* - `getColumnType()` → [java.sql.Types.OTHER]
* - `getColumnDisplaySize()` → `0`
* - `getColumnClassName()` → `"java.lang.Object"`
*
* Override this method in subclasses to provide database-specific behavior
* (for example, to disable fallback for databases like Teradata or Oracle
* where [DatabaseMetaData.getColumns] is known to be slow).
*
* @param resultSet The [ResultSet] containing query results.
* @return A list of [TableColumnMetadata] objects.
*/
public open fun getTableColumnsMetadata(resultSet: ResultSet): List<TableColumnMetadata> {
val rsMetaData = resultSet.metaData
val connection = resultSet.statement.connection
val dbMetaData = connection.metaData
// Some JDBC drivers (e.g., Hive) throw SQLFeatureNotSupportedException
val catalog = try {
connection.catalog.takeUnless { it.isNullOrBlank() }
} catch (_: Exception) {
null
}
val schema = try {
connection.schema.takeUnless { it.isNullOrBlank() }
} catch (_: Exception) {
null
}
val columnCount = rsMetaData.columnCount
val columns = mutableListOf<TableColumnMetadata>()
val nameCounter = mutableMapOf<String, Int>()
for (index in 1..columnCount) {
// Try to getColumnName, fallback to getColumnLabel, then generate name
val columnName = try {
rsMetaData.getColumnName(index)
} catch (_: Exception) {
try {
rsMetaData.getColumnLabel(index)
} catch (_: Exception) {
"column$index"
}
}
// Some JDBC drivers (e.g., Apache Hive) throw SQLFeatureNotSupportedException
val tableName = try {
rsMetaData.getTableName(index).takeUnless { it.isBlank() }
} catch (_: Exception) {
// Fallback: try to extract table name from column name if it contains '.'
val dotIndex = columnName.lastIndexOf('.')
if (dotIndex > 0) columnName.take(dotIndex) else null
}
// Try to detect nullability from ResultSetMetaData
val isNullable = try {
when (rsMetaData.isNullable(index)) {
ResultSetMetaData.columnNoNulls -> false
ResultSetMetaData.columnNullable -> true
// Unknown nullability: assume it nullable, may trigger fallback
ResultSetMetaData.columnNullableUnknown -> true
else -> true
}
} catch (_: Exception) {
// Some drivers may throw for unsupported features
// Try fallback to DatabaseMetaData, with additional safety
try {
dbMetaData.getColumns(catalog, schema, tableName, columnName).use { cols ->
if (cols.next()) !cols.getString("IS_NULLABLE").equals("NO", ignoreCase = true) else true
}
} catch (_: Exception) {
// Fallback failed, assume nullable as the safest default
true
}
}
// adding fallbacks to avoid SQLException
val columnType = try {
rsMetaData.getColumnTypeName(index)
} catch (_: Exception) {
"OTHER"
}
val jdbcType = try {
rsMetaData.getColumnType(index)
} catch (_: Exception) {
Types.OTHER
}
val displaySize = try {
rsMetaData.getColumnDisplaySize(index)
} catch (_: Exception) {
0
}
val javaClassName = try {
rsMetaData.getColumnClassName(index)
} catch (_: Exception) {
"java.lang.Object"
}
val uniqueName = manageColumnNameDuplication(nameCounter, columnName)
columns += TableColumnMetadata(
uniqueName,
columnType,
jdbcType,
displaySize,
javaClassName,
isNullable,
)
}
return columns
}
/**
* Manages the duplication of column names by appending a unique identifier to the original name if necessary.
*
* @param columnNameCounter a mutable map that keeps track of the count for each column name.
* @param originalName the original name of the column to be managed.
* @return the modified column name that is free from duplication.
*/
internal fun manageColumnNameDuplication(columnNameCounter: MutableMap<String, Int>, originalName: String): String {
var name = originalName
val count = columnNameCounter[originalName]
if (count != null) {
var incrementedCount = count + 1
while (columnNameCounter.containsKey("${originalName}_$incrementedCount")) {
incrementedCount++
}
columnNameCounter[originalName] = incrementedCount
name = "${originalName}_$incrementedCount"
} else {
columnNameCounter[originalName] = 0
}
return name
}
}
@@ -0,0 +1,258 @@
package org.jetbrains.kotlinx.dataframe.io.db
import io.github.oshai.kotlinlogging.KotlinLogging
import org.duckdb.DuckDBColumnType
import org.duckdb.DuckDBColumnType.ARRAY
import org.duckdb.DuckDBColumnType.BIGINT
import org.duckdb.DuckDBColumnType.BIT
import org.duckdb.DuckDBColumnType.BLOB
import org.duckdb.DuckDBColumnType.BOOLEAN
import org.duckdb.DuckDBColumnType.DATE
import org.duckdb.DuckDBColumnType.DECIMAL
import org.duckdb.DuckDBColumnType.DOUBLE
import org.duckdb.DuckDBColumnType.ENUM
import org.duckdb.DuckDBColumnType.FLOAT
import org.duckdb.DuckDBColumnType.HUGEINT
import org.duckdb.DuckDBColumnType.INTEGER
import org.duckdb.DuckDBColumnType.INTERVAL
import org.duckdb.DuckDBColumnType.JSON
import org.duckdb.DuckDBColumnType.LIST
import org.duckdb.DuckDBColumnType.MAP
import org.duckdb.DuckDBColumnType.SMALLINT
import org.duckdb.DuckDBColumnType.STRUCT
import org.duckdb.DuckDBColumnType.TIME
import org.duckdb.DuckDBColumnType.TIMESTAMP
import org.duckdb.DuckDBColumnType.TIMESTAMP_MS
import org.duckdb.DuckDBColumnType.TIMESTAMP_NS
import org.duckdb.DuckDBColumnType.TIMESTAMP_S
import org.duckdb.DuckDBColumnType.TIMESTAMP_WITH_TIME_ZONE
import org.duckdb.DuckDBColumnType.TIME_WITH_TIME_ZONE
import org.duckdb.DuckDBColumnType.TINYINT
import org.duckdb.DuckDBColumnType.UBIGINT
import org.duckdb.DuckDBColumnType.UHUGEINT
import org.duckdb.DuckDBColumnType.UINTEGER
import org.duckdb.DuckDBColumnType.UNION
import org.duckdb.DuckDBColumnType.UNKNOWN
import org.duckdb.DuckDBColumnType.USMALLINT
import org.duckdb.DuckDBColumnType.UTINYINT
import org.duckdb.DuckDBColumnType.UUID
import org.duckdb.DuckDBColumnType.VARCHAR
import org.duckdb.DuckDBResultSetMetaData
import org.duckdb.JsonNode
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig
import org.jetbrains.kotlinx.dataframe.io.db.DuckDb.convertSqlTypeToKType
import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import java.math.BigDecimal
import java.math.BigInteger
import java.sql.Array
import java.sql.Blob
import java.sql.Connection
import java.sql.DatabaseMetaData
import java.sql.DriverManager
import java.sql.ResultSet
import java.sql.Struct
import java.sql.Timestamp
import java.time.LocalDate
import java.time.LocalTime
import java.time.OffsetDateTime
import java.time.OffsetTime
import java.util.Properties
import java.util.UUID
import kotlin.reflect.KType
import kotlin.reflect.KTypeProjection
import kotlin.reflect.full.createType
import kotlin.reflect.full.withNullability
import kotlin.reflect.typeOf
private val logger = KotlinLogging.logger {}
/**
* Represents the [DuckDB](http://duckdb.org/) database type.
*
* This class provides methods to convert data from a [ResultSet] to the appropriate type for DuckDB,
* and to generate the corresponding [column schema][ColumnSchema].
*/
public object DuckDb : DbType("duckdb") {
/** the name of the class of the DuckDB JDBC driver */
override val driverClassName: String = "org.duckdb.DuckDBDriver"
/**
* How a column type from JDBC, [tableColumnMetadata], is read in Java/Kotlin.
* The returned type must exactly follow [ResultSet.getObject] of your specific database's JDBC driver.
* Returning `null` defer the implementation to the default one (which may not always be correct).
*
* Following [org.duckdb.DuckDBVector.getObject].
*/
override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType =
tableColumnMetadata.sqlTypeName.toKType(tableColumnMetadata.isNullable)
/**
* How a column from JDBC should be represented as DataFrame (value) column
* See [convertSqlTypeToKType].
*/
override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema {
val type = convertSqlTypeToKType(tableColumnMetadata)
return ColumnSchema.Value(type)
}
/**
* Follows exactly [org.duckdb.DuckDBVector.getObject].
*
* "// dataframe-jdbc" is added for all types that are covered correctly by
* [org.jetbrains.kotlinx.dataframe.io.db.DbType.makeCommonSqlToKTypeMapping] at the moment, however, to cover
* all nested types, we'll use a full type-map for all [DuckDB types][DuckDBColumnType] exactly.
*/
@Suppress("ktlint:standard:blank-line-between-when-conditions")
internal fun String.toKType(isNullable: Boolean): KType {
val sqlTypeName = this
return when (DuckDBResultSetMetaData.TypeNameToType(sqlTypeName)) {
BOOLEAN -> typeOf<Boolean>() // dataframe-jdbc
TINYINT -> typeOf<Byte>()
SMALLINT -> typeOf<Short>()
INTEGER -> typeOf<Int>() // dataframe-jdbc
BIGINT -> typeOf<Long>() // dataframe-jdbc
HUGEINT -> typeOf<BigInteger>()
UHUGEINT -> typeOf<BigInteger>()
UTINYINT -> typeOf<Short>()
USMALLINT -> typeOf<Int>()
UINTEGER -> typeOf<Long>()
UBIGINT -> typeOf<BigInteger>()
FLOAT -> typeOf<Float>() // dataframe-jdbc
DOUBLE -> typeOf<Double>() // dataframe-jdbc
DECIMAL -> typeOf<BigDecimal>() // dataframe-jdbc
TIME -> typeOf<LocalTime>()
TIME_WITH_TIME_ZONE -> typeOf<OffsetTime>() // dataframe-jdbc
DATE -> typeOf<LocalDate>()
TIMESTAMP, TIMESTAMP_MS, TIMESTAMP_NS, TIMESTAMP_S -> typeOf<Timestamp>() // dataframe-jdbc
TIMESTAMP_WITH_TIME_ZONE -> typeOf<OffsetDateTime>() // dataframe-jdbc
JSON -> typeOf<JsonNode>()
BLOB -> typeOf<Blob>()
UUID -> typeOf<UUID>()
MAP -> {
val (key, value) = parseMapTypes(sqlTypeName)
Map::class.createType(
listOf(
KTypeProjection.invariant(key.toKType(false)),
KTypeProjection.invariant(value.toKType(true)),
),
)
}
LIST, ARRAY -> {
// TODO requires #1266 and #1273 for specific types
// val listType = parseListType(sqlTypeName)
// Array::class.createType(
// listOf(KTypeProjection.invariant(listType.toKType(true))),
// )
typeOf<Array>()
}
STRUCT -> typeOf<Struct>() // TODO requires #1266 for specific types
UNION -> typeOf<Any>() // Cannot handle this in Kotlin
VARCHAR -> typeOf<String>()
UNKNOWN, BIT, INTERVAL, ENUM -> typeOf<String>()
}.withNullability(isNullable)
}
/** Parses "MAP(X, Y)" into "X" and "Y", taking parentheses into account */
internal fun parseMapTypes(typeString: String): Pair<String, String> {
if (!typeString.startsWith("MAP(") || !typeString.endsWith(")")) {
error("invalid MAP type: $typeString")
}
val content = typeString.removeSurrounding("MAP(", ")")
// Find the comma that separates key and value types
var parenCount = 0
var commaIndex = -1
for (i in content.indices) {
when (content[i]) {
'(' -> parenCount++
')' -> parenCount--
',' -> if (parenCount == 0) {
commaIndex = i
break
}
}
}
if (commaIndex == -1) error("invalid MAP type: $typeString")
val keyType = content.take(commaIndex).trim()
val valueType = content.substring(commaIndex + 1).trim()
return Pair(keyType, valueType)
}
/** Parses "X[]" and "X[123]" into "X", and "X[][]" into "X[]" */
internal fun parseListType(typeString: String): String {
if (!typeString.endsWith("]")) {
error("invalid LIST/ARRAY type: $typeString")
}
return typeString.take(typeString.indexOfLast { it == '[' })
}
/**
* How to filter out system tables from user-created ones when using
* [DataFrame.readAllSqlTables][DataFrame.Companion.readAllSqlTables] and
* [DataFrameSchema.readAllSqlTables][org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema.Companion.readAllSqlTables].
*
* The names of these can sometimes be found in the specific JDBC integration.
*/
override fun isSystemTable(tableMetadata: TableMetadata): Boolean =
tableMetadata.schemaName?.lowercase()?.contains("information_schema") == true ||
tableMetadata.schemaName?.lowercase()?.contains("system") == true ||
tableMetadata.name.lowercase().contains("system_")
/**
* How to retrieve the correct table metadata when using
* [DataFrame.readAllSqlTables][DataFrame.Companion.readAllSqlTables] and
* [DataFrameSchema.readAllSqlTables][org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema.Companion.readAllSqlTables].
* The names of these can be found in the [DatabaseMetaData] implementation of the DuckDB JDBC integration.
*/
override fun buildTableMetadata(tables: ResultSet): TableMetadata =
TableMetadata(
tables.getString("TABLE_NAME"),
tables.getString("TABLE_SCHEM"),
tables.getString("TABLE_CAT"),
)
/**
* Creates a database connection using the provided configuration.
*
* DuckDB does not support changing read-only status after connection creation,
* but supports read-only mode through connection parameters.
*
* @param [dbConfig] The database configuration containing URL, credentials, and read-only flag.
* @return A configured [java.sql.Connection] instance.
*/
override fun createConnection(dbConfig: DbConnectionConfig): Connection {
val properties = Properties().apply {
dbConfig.user.takeIf { it.isNotEmpty() }?.let { setProperty("user", it) }
dbConfig.password.takeIf { it.isNotEmpty() }?.let { setProperty("password", it) }
// Handle DuckDB limitation: in-memory databases cannot be opened in read-only mode
if (dbConfig.readOnly && !dbConfig.url.isInMemoryDuckDb()) {
setProperty("access_mode", "read_only")
} else if (dbConfig.readOnly) {
logger.warn {
"Cannot create read-only in-memory DuckDB database (url=${dbConfig.url}). " +
"In-memory databases require write access for initialization. Connection will be created without read-only mode."
}
}
}
return DriverManager.getConnection(dbConfig.url, properties)
}
/**
* Checks if the DuckDB URL represents an in-memory database.
* In-memory DuckDB URLs are either "jdbc:duckdb:" or "jdbc:duckdb:" followed only by whitespace.
*/
private fun String.isInMemoryDuckDb(): Boolean =
this.trim() == "jdbc:duckdb:" || matches("jdbc:duckdb:\\s*$".toRegex())
}
@@ -0,0 +1,154 @@
package org.jetbrains.kotlinx.dataframe.io.db
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import java.sql.ResultSet
import java.util.Locale
import kotlin.reflect.KType
import org.jetbrains.kotlinx.dataframe.io.db.MariaDb as MariaDbType
import org.jetbrains.kotlinx.dataframe.io.db.MsSql as MsSqlType
import org.jetbrains.kotlinx.dataframe.io.db.MySql as MySqlType
import org.jetbrains.kotlinx.dataframe.io.db.PostgreSql as PostgreSqlType
/**
* Represents the H2 database type.
*
* This class provides methods to convert data from a ResultSet to the appropriate type for H2
* and to generate the corresponding column schema.
*
* NOTE: All date and timestamp-related types are converted to String to avoid java.sql.* types.
*/
public open class H2(public val mode: Mode = Mode.Regular) : DbType("h2") {
@Deprecated("Use H2(mode = Mode.XXX) instead", ReplaceWith("H2(H2.Mode.MySql)"))
public constructor(dialect: DbType) : this(
Mode.fromDbType(dialect)
?: throw IllegalArgumentException("H2 database could not be specified with H2 dialect!"),
)
private val delegate: DbType? = mode.toDbType()
/**
* Represents the compatibility modes supported by an H2 database.
*
* @property value The string value used in H2 JDBC URL and settings.
*/
public enum class Mode(public val value: String) {
/** Native H2 mode (no compatibility), our synthetic marker. */
Regular("H2-Regular"),
MySql("MySQL"),
PostgreSql("PostgreSQL"),
MsSqlServer("MSSQLServer"),
MariaDb("MariaDB"), ;
/**
* Converts this Mode to the corresponding DbType delegate.
*
* @return The DbType for this mode, or null for Regular mode.
*/
public fun toDbType(): DbType? =
when (this) {
Regular -> null
MySql -> MySqlType
PostgreSql -> PostgreSqlType
MsSqlServer -> MsSqlType
MariaDb -> MariaDbType
}
public companion object {
/**
* Creates a Mode from the given DbType.
*
* @param dialect The DbType to convert.
* @return The corresponding Mode, or null if the dialect is H2.
*/
public fun fromDbType(dialect: DbType): Mode? =
when (dialect) {
is H2 -> null
MySqlType -> MySql
PostgreSqlType -> PostgreSql
MsSqlType -> MsSqlServer
MariaDbType -> MariaDb
else -> Regular
}
/**
* Finds a Mode by its string value (case-insensitive).
* Handles both URL values (MySQL, PostgreSQL, etc.) and
* INFORMATION_SCHEMA values (Regular).
*
* @param value The string value to search for.
* @return The matching Mode, or null if not found.
*/
public fun fromValue(value: String): Mode? {
// "Regular" from INFORMATION_SCHEMA or "H2-Regular" from URL
if (value.equals("regular", ignoreCase = true) ||
value.equals("h2-regular", ignoreCase = true)
) {
return Regular
}
return entries.find { it.value.equals(value, ignoreCase = true) }
}
}
}
/**
* It contains constants related to different database modes.
*
* The mode value is used in the [extractDBTypeFromConnection] function to determine the corresponding `DbType` for the H2 database connection URL.
* For example, if the URL contains the mode value "MySQL", the H2 instance with the MySQL database type is returned.
* Otherwise, the `DbType` is determined based on the URL without the mode value.
*
* @see [extractDBTypeFromConnection]
* @see [createH2Instance]
*/
public companion object {
@Deprecated("Use Mode.MySql.value instead", ReplaceWith("Mode.MySql.value"))
public const val MODE_MYSQL: String = "MySQL"
@Deprecated("Use Mode.PostgreSql.value instead", ReplaceWith("Mode.PostgreSql.value"))
public const val MODE_POSTGRESQL: String = "PostgreSQL"
@Deprecated("Use Mode.MsSqlServer.value instead", ReplaceWith("Mode.MsSqlServer.value"))
public const val MODE_MSSQLSERVER: String = "MSSQLServer"
@Deprecated("Use Mode.MariaDb.value instead", ReplaceWith("Mode.MariaDb.value"))
public const val MODE_MARIADB: String = "MariaDB"
}
override val driverClassName: String
get() = "org.h2.Driver"
override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? =
delegate?.convertSqlTypeToColumnSchemaValue(tableColumnMetadata)
override fun isSystemTable(tableMetadata: TableMetadata): Boolean {
val locale = Locale.getDefault()
fun String?.containsWithLowercase(substr: String) = this?.lowercase(locale)?.contains(substr) == true
val schemaName = tableMetadata.schemaName
// could be extended for other symptoms of the system tables for H2
val isH2SystemTable = schemaName.containsWithLowercase("information_schema")
return if (delegate == null) {
isH2SystemTable
} else {
isH2SystemTable || delegate.isSystemTable(tableMetadata)
}
}
override fun buildTableMetadata(tables: ResultSet): TableMetadata =
delegate?.buildTableMetadata(tables)
?: TableMetadata(
tables.getString("table_name"),
tables.getString("table_schem"),
tables.getString("table_cat"),
)
override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? =
delegate?.convertSqlTypeToKType(tableColumnMetadata)
public override fun buildSqlQueryWithLimit(sqlQuery: String, limit: Int): String =
delegate?.buildSqlQueryWithLimit(sqlQuery, limit) ?: super.buildSqlQueryWithLimit(sqlQuery, limit)
}
@@ -0,0 +1,77 @@
package org.jetbrains.kotlinx.dataframe.io.db
import org.jetbrains.kotlinx.dataframe.io.db.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.io.db.TableMetadata
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import java.sql.ResultSet
import kotlin.reflect.KType
import kotlin.reflect.full.createType
/**
* Represents the MariaDb database type.
*
* This class provides methods to convert data from a ResultSet to the appropriate type for MariaDb,
* and to generate the corresponding column schema.
*/
public object MariaDb : DbType("mariadb") {
override val driverClassName: String
get() = "org.mariadb.jdbc.Driver"
override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? {
// Force BIGINT to always be Long, regardless of javaClassName
// MariaDB JDBC driver may report Integer for small BIGINT values
// TODO: investigate the corner case
// if (tableColumnMetadata.jdbcType == java.sql.Types.BIGINT) {
// val kType = Long::class.createType(nullable = tableColumnMetadata.isNullable)
// return ColumnSchema.Value(kType)
// }
if (tableColumnMetadata.sqlTypeName == "INTEGER UNSIGNED" ||
tableColumnMetadata.sqlTypeName == "INT UNSIGNED"
) {
val kType = Long::class.createType(nullable = tableColumnMetadata.isNullable)
return ColumnSchema.Value(kType)
}
if (tableColumnMetadata.sqlTypeName == "SMALLINT" && tableColumnMetadata.javaClassName == "java.lang.Short") {
val kType = Short::class.createType(nullable = tableColumnMetadata.isNullable)
return ColumnSchema.Value(kType)
}
return null
}
override fun isSystemTable(tableMetadata: TableMetadata): Boolean = MySql.isSystemTable(tableMetadata)
override fun buildTableMetadata(tables: ResultSet): TableMetadata =
TableMetadata(
tables.getString("table_name"),
tables.getString("table_schem"),
tables.getString("table_cat"),
)
override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? {
// Force BIGINT to always be Long, regardless of javaClassName
// MariaDB JDBC driver may report Integer for small BIGINT values
// TODO: investigate the corner case
// if (tableColumnMetadata.jdbcType == java.sql.Types.BIGINT) {
// return Long::class.createType(nullable = tableColumnMetadata.isNullable)
// }
if (tableColumnMetadata.sqlTypeName == "INTEGER UNSIGNED" ||
tableColumnMetadata.sqlTypeName == "INT UNSIGNED"
) {
return Long::class.createType(nullable = tableColumnMetadata.isNullable)
}
if (tableColumnMetadata.sqlTypeName == "SMALLINT" && tableColumnMetadata.javaClassName == "java.lang.Short") {
return Short::class.createType(nullable = tableColumnMetadata.isNullable)
}
return null
}
override fun quoteIdentifier(name: String): String {
// schema.table -> `schema`.`table`
return name.split(".").joinToString(".") { "`$it`" }
}
}
@@ -0,0 +1,59 @@
package org.jetbrains.kotlinx.dataframe.io.db
import org.jetbrains.kotlinx.dataframe.io.db.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.io.db.TableMetadata
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import java.sql.ResultSet
import java.util.Locale
import kotlin.reflect.KType
/**
* Represents the MSSQL database type.
*
* This class provides methods to convert data from a ResultSet to the appropriate type for MSSQL,
* and to generate the corresponding column schema.
*/
public object MsSql : DbType("sqlserver") {
override val driverClassName: String
get() = "com.microsoft.sqlserver.jdbc.SQLServerDriver"
override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? = null
override fun isSystemTable(tableMetadata: TableMetadata): Boolean {
val locale = Locale.getDefault()
fun String?.containsWithLowercase(substr: String) = this?.lowercase(locale)?.contains(substr) == true
val schemaName = tableMetadata.schemaName
val tableName = tableMetadata.name
val catalogName = tableMetadata.catalogue
return schemaName.containsWithLowercase("sys") ||
schemaName.containsWithLowercase("information_schema") ||
tableName.startsWith("sys") ||
tableName.startsWith("dt") ||
tableName.containsWithLowercase("sys_config") ||
catalogName.containsWithLowercase("system") ||
catalogName.containsWithLowercase("master") ||
catalogName.containsWithLowercase("model") ||
catalogName.containsWithLowercase("msdb") ||
catalogName.containsWithLowercase("tempdb")
}
override fun buildTableMetadata(tables: ResultSet): TableMetadata =
TableMetadata(
tables.getString("table_name"),
tables.getString("table_schem"),
tables.getString("table_cat"),
)
override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? = null
public override fun buildSqlQueryWithLimit(sqlQuery: String, limit: Int): String =
sqlQuery.replace("SELECT", "SELECT TOP $limit", ignoreCase = true)
override fun quoteIdentifier(name: String): String {
// schema.table -> [schema].[table]
return name.split(".").joinToString(".") { "[$it]" }
}
}
@@ -0,0 +1,63 @@
package org.jetbrains.kotlinx.dataframe.io.db
import org.jetbrains.kotlinx.dataframe.io.db.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.io.db.TableMetadata
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import java.sql.ResultSet
import java.util.Locale
import kotlin.reflect.KType
import kotlin.reflect.full.createType
/**
* Represents the MySql database type.
*
* This class provides methods to convert data from a ResultSet to the appropriate type for MySql,
* and to generate the corresponding column schema.
*/
public object MySql : DbType("mysql") {
override val driverClassName: String
get() = "com.mysql.jdbc.Driver"
override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? {
if (tableColumnMetadata.sqlTypeName == "INT UNSIGNED") {
val kType = Long::class.createType(nullable = tableColumnMetadata.isNullable)
return ColumnSchema.Value(kType)
}
return null
}
override fun isSystemTable(tableMetadata: TableMetadata): Boolean {
val locale = Locale.getDefault()
fun String?.containsWithLowercase(substr: String) = this?.lowercase(locale)?.contains(substr) == true
val schemaName = tableMetadata.schemaName
val name = tableMetadata.name
return schemaName.containsWithLowercase("information_schema") ||
tableMetadata.catalogue.containsWithLowercase("performance_schema") ||
tableMetadata.catalogue.containsWithLowercase("mysql") ||
schemaName?.contains("mysql.") == true ||
name.contains("mysql.") ||
name.contains("sys_config")
}
override fun buildTableMetadata(tables: ResultSet): TableMetadata =
TableMetadata(
tables.getString("table_name"),
tables.getString("table_schem"),
tables.getString("table_cat"),
)
override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? {
if (tableColumnMetadata.sqlTypeName == "INT UNSIGNED") {
return Long::class.createType(nullable = tableColumnMetadata.isNullable)
}
return null
}
override fun quoteIdentifier(name: String): String {
// schema.table -> `schema`.`table`
return name.split(".").joinToString(".") { "`$it`" }
}
}
@@ -0,0 +1,55 @@
package org.jetbrains.kotlinx.dataframe.io.db
import org.jetbrains.kotlinx.dataframe.io.db.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.io.db.TableMetadata
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import java.sql.ResultSet
import java.util.Locale
import kotlin.reflect.KType
import kotlin.reflect.full.createType
/**
* Represents the PostgreSql database type.
*
* This class provides methods to convert data from a ResultSet to the appropriate type for PostgreSql,
* and to generate the corresponding column schema.
*/
public object PostgreSql : DbType("postgresql") {
override val driverClassName: String
get() = "org.postgresql.Driver"
override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? {
// TODO: could be a wrapper of convertSqlTypeToKType
// because of https://github.com/pgjdbc/pgjdbc/issues/425
if (tableColumnMetadata.sqlTypeName == "money") {
val kType = String::class.createType(nullable = tableColumnMetadata.isNullable)
return ColumnSchema.Value(kType)
}
return null
}
override fun isSystemTable(tableMetadata: TableMetadata): Boolean =
tableMetadata.name.lowercase(Locale.getDefault()).contains("pg_") ||
tableMetadata.schemaName?.lowercase(Locale.getDefault())?.contains("pg_catalog.") ?: false
override fun buildTableMetadata(tables: ResultSet): TableMetadata =
TableMetadata(
tables.getString("table_name"),
tables.getString("table_schem"),
tables.getString("table_cat"),
)
override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? {
// because of https://github.com/pgjdbc/pgjdbc/issues/425
if (tableColumnMetadata.sqlTypeName == "money") {
return String::class.createType(nullable = tableColumnMetadata.isNullable)
}
return null
}
override fun quoteIdentifier(name: String): String {
// schema.table -> "schema"."table"
return name.split(".").joinToString(".") { "\"$it\"" }
}
}
@@ -0,0 +1,44 @@
package org.jetbrains.kotlinx.dataframe.io.db
import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig
import org.jetbrains.kotlinx.dataframe.io.db.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.io.db.TableMetadata
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import org.sqlite.SQLiteConfig
import java.sql.Connection
import java.sql.DriverManager
import java.sql.ResultSet
import kotlin.reflect.KType
/**
* Represents the Sqlite database type.
*
* This class provides methods to convert data from a ResultSet to the appropriate type for Sqlite,
* and to generate the corresponding column schema.
*/
public object Sqlite : DbType("sqlite") {
override val driverClassName: String
get() = "org.sqlite.JDBC"
override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? = null
override fun isSystemTable(tableMetadata: TableMetadata): Boolean = tableMetadata.name.startsWith("sqlite_")
override fun buildTableMetadata(tables: ResultSet): TableMetadata =
TableMetadata(
tables.getString("TABLE_NAME"),
tables.getString("TABLE_SCHEM"),
tables.getString("TABLE_CAT"),
)
override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? = null
override fun createConnection(dbConfig: DbConnectionConfig): Connection =
if (dbConfig.readOnly) {
val config = SQLiteConfig()
config.setReadOnly(true)
config.createConnection(dbConfig.url)
} else {
DriverManager.getConnection(dbConfig.url, dbConfig.user, dbConfig.password)
}
}
@@ -0,0 +1,68 @@
package org.jetbrains.kotlinx.dataframe.io.db
/**
* Represents a column in a database table to keep all required meta-information.
*
* @property [name] the name of the column.
* @property [sqlTypeName] the SQL data type of the column.
* @property [jdbcType] the JDBC data type of the column produced from [java.sql.Types].
* @property [size] the size of the column.
* @property [javaClassName] the class name in Java.
* @property [isNullable] true if column could contain nulls.
*/
public class TableColumnMetadata(
public val name: String,
public val sqlTypeName: String,
public val jdbcType: Int,
public val size: Int,
public val javaClassName: String,
public val isNullable: Boolean = false,
) {
override fun equals(other: Any?): Boolean {
if (this === other) return true
if (other !is TableColumnMetadata) return false
if (name != other.name) return false
if (sqlTypeName != other.sqlTypeName) return false
if (jdbcType != other.jdbcType) return false
if (size != other.size) return false
if (javaClassName != other.javaClassName) return false
if (isNullable != other.isNullable) return false
return true
}
override fun hashCode(): Int {
var result = name.hashCode()
result = 31 * result + sqlTypeName.hashCode()
result = 31 * result + jdbcType
result = 31 * result + size
result = 31 * result + javaClassName.hashCode()
result = 31 * result + isNullable.hashCode()
return result
}
override fun toString(): String =
"TableColumnMetadata(name='$name', sqlTypeName='$sqlTypeName', jdbcType=$jdbcType, " +
"size=$size, javaClassName='$javaClassName', isNullable=$isNullable)"
/**
* Creates a copy of the current `TableColumnMetadata` instance with optionally modified attributes.
*
* @param name The name of the table column. Defaults to the current instance's `name`.
* @param sqlTypeName The SQL type name of the column. Defaults to the current instance's `sqlTypeName`.
* @param jdbcType The JDBC type of the column, represented as an integer. Defaults to the current instance's `jdbcType`.
* @param size The size of the column. Defaults to the current instance's `size`.
* @param javaClassName The fully qualified name of the Java class representing the column type. Defaults to the current instance's `javaClassName`.
* @param isNullable Indicates whether the column is nullable. Defaults to the current instance's `isNullable`.
* @return A new `TableColumnMetadata` instance with the specified attribute values.
*/
public fun copy(
name: String = this.name,
sqlTypeName: String = this.sqlTypeName,
jdbcType: Int = this.jdbcType,
size: Int = this.size,
javaClassName: String = this.javaClassName,
isNullable: Boolean = this.isNullable,
): TableColumnMetadata = TableColumnMetadata(name, sqlTypeName, jdbcType, size, javaClassName, isNullable)
}
@@ -0,0 +1,48 @@
package org.jetbrains.kotlinx.dataframe.io.db
/**
* Represents a table metadata to store information about a database table,
* including its name, schema name, and catalogue name.
*
* NOTE: we need to extract both, [schemaName] and [catalogue]
* because the different databases have different implementations of metadata.
*
* @property [name] the name of the table.
* @property [schemaName] the name of the schema the table belongs to (optional).
* @property [catalogue] the name of the catalogue the table belongs to (optional).
*/
public class TableMetadata(public val name: String, public val schemaName: String?, public val catalogue: String?) {
override fun equals(other: Any?): Boolean {
if (this === other) return true
if (other !is TableMetadata) return false
if (name != other.name) return false
if (schemaName != other.schemaName) return false
if (catalogue != other.catalogue) return false
return true
}
override fun hashCode(): Int {
var result = name.hashCode()
result = 31 * result + (schemaName?.hashCode() ?: 0)
result = 31 * result + (catalogue?.hashCode() ?: 0)
return result
}
override fun toString(): String = "TableMetadata(name='$name', schemaName=$schemaName, catalogue=$catalogue)"
/**
* Creates a copy of the `TableMetadata` instance with optional modifications.
*
* @param name the name of the table; defaults to the current name of the instance.
* @param schemaName the name of the schema the table belongs to; defaults to the current schema name of the instance.
* @param catalogue the name of the catalogue the table belongs to; defaults to the current catalogue of the instance.
* @return a new `TableMetadata` instance with the specified or default values.
*/
public fun copy(
name: String = this.name,
schemaName: String? = this.schemaName,
catalogue: String? = this.catalogue,
): TableMetadata = TableMetadata(name, schemaName, catalogue)
}
@@ -0,0 +1,138 @@
package org.jetbrains.kotlinx.dataframe.io.db
import io.github.oshai.kotlinlogging.KotlinLogging
import java.sql.Connection
import java.sql.SQLException
private val logger = KotlinLogging.logger {}
private const val UNSUPPORTED_H2_MODE_MESSAGE =
"Unsupported H2 MODE: %s. Supported: MySQL, PostgreSQL, MSSQLServer, MariaDB, REGULAR/H2-Regular (or omit MODE)."
private const val H2_MODE_QUERY = "SELECT SETTING_VALUE FROM INFORMATION_SCHEMA.SETTINGS WHERE SETTING_NAME = 'MODE'"
private val H2_MODE_URL_PATTERN = "MODE=([^;:&]+)".toRegex(RegexOption.IGNORE_CASE)
/**
* Extracts the database type from the given connection.
* For H2, fetches the actual MODE from the active connection settings.
* For other databases, extracts type from URL.
*
* @param [connection] the database connection.
* @return the corresponding [DbType].
* @throws [IllegalStateException] if URL information is missing in connection meta-data.
* @throws [IllegalArgumentException] if the URL specifies an unsupported database type.
* @throws [SQLException] if the URL is null.
*/
public fun extractDBTypeFromConnection(connection: Connection): DbType {
val url = connection.metaData?.url
?: throw IllegalStateException("URL information is missing in connection meta data!")
logger.info { "Processing DB type extraction for connection url: $url" }
// First, determine the base database type from URL
val baseDbType = extractDBTypeFromUrl(url)
// For H2, refine the mode by querying the active connection settings
// This handles cases where MODE is not specified in URL, but H2 returns "Regular" from settings
return if (baseDbType is H2) {
val mode = fetchH2ModeFromConnection(connection)
parseH2ModeOrThrow(mode)
} else {
logger.info { "Identified DB type as $baseDbType from url: $url" }
baseDbType
}
}
/**
* Fetches H2 database mode from an active connection.
* Works only for H2 version 2.
*
* @param [connection] the database connection.
* @return the mode string or null if not set.
*/
private fun fetchH2ModeFromConnection(connection: Connection): String? {
var mode: String? = null
connection.prepareStatement(H2_MODE_QUERY).use { st ->
st.executeQuery().use { rs ->
if (rs.next()) {
mode = rs.getString("SETTING_VALUE")
logger.debug { "Fetched H2 DB mode: $mode" }
}
}
}
return mode?.trim()?.takeIf { it.isNotEmpty() }
}
/**
* Parses H2 mode string and returns the corresponding H2 DbType instance.
*
* @param [mode] the mode string (may be null or empty for Regular mode).
* @return H2 instance with the appropriate mode.
* @throws [IllegalArgumentException] if the mode is not supported.
*/
private fun parseH2ModeOrThrow(mode: String?): H2 {
if (mode.isNullOrEmpty()) {
return H2(H2.Mode.Regular)
}
return H2.Mode.fromValue(mode)?.let { H2(it) }
?: throw IllegalArgumentException(UNSUPPORTED_H2_MODE_MESSAGE.format(mode)).also {
logger.error { it.message }
}
}
/**
* Extracts the database type from the given JDBC URL.
*
* @param [url] the JDBC URL.
* @return the corresponding [DbType].
* @throws [SQLException] if the url is null.
* @throws [IllegalArgumentException] if the URL specifies an unsupported database type.
*/
public fun extractDBTypeFromUrl(url: String?): DbType {
url ?: throw SQLException("Database URL could not be null.")
return when {
H2().dbTypeInJdbcUrl in url -> createH2Instance(url)
MariaDb.dbTypeInJdbcUrl in url -> MariaDb
MySql.dbTypeInJdbcUrl in url -> MySql
Sqlite.dbTypeInJdbcUrl in url -> Sqlite
PostgreSql.dbTypeInJdbcUrl in url -> PostgreSql
MsSql.dbTypeInJdbcUrl in url -> MsSql
DuckDb.dbTypeInJdbcUrl in url -> DuckDb
else -> throw IllegalArgumentException(
"Unsupported database type in the url: $url. " +
"Only H2, MariaDB, MySQL, MSSQL, SQLite, PostgreSQL, and DuckDB are supported!",
)
}
}
/**
* Creates an instance of DbType based on the provided JDBC URL.
*
* @param [url] The JDBC URL representing the database connection.
* @return The corresponding [DbType] instance.
* @throws [IllegalArgumentException] if the provided URL does not contain a valid mode.
*/
private fun createH2Instance(url: String): DbType {
val mode = H2_MODE_URL_PATTERN.find(url)?.groupValues?.getOrNull(1)
return parseH2ModeOrThrow(mode?.takeIf { it.isNotBlank() })
}
/**
* Retrieves the driver class name from the given JDBC URL.
*
* @param [url] The JDBC URL to extract the driver class name from.
* @return The driver class name as a [String].
*/
public fun driverClassNameFromUrl(url: String): String {
val dbType = extractDBTypeFromUrl(url)
return dbType.driverClassName
}
@@ -0,0 +1,28 @@
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.codeGen.Code
import org.jetbrains.kotlinx.dataframe.codeGen.CodeGenerator
import org.jetbrains.kotlinx.dataframe.impl.codeGen.CodeGenerationReadResult
import java.net.URL
// TODO: helper functions created to support existing hierarchy https://github.com/Kotlin/dataframe/issues/450
public val CodeGenerator.Companion.databaseCodeGenReader: (url: URL, name: String) -> CodeGenerationReadResult
get() = { url, name ->
try {
val code = buildCodeForDB(url, name)
throw RuntimeException()
CodeGenerationReadResult.Success(code, Jdbc())
} catch (e: Throwable) {
CodeGenerationReadResult.Error(e)
}
}
public fun buildCodeForDB(url: URL, name: String): Code {
val annotationName = DataSchema::class.simpleName
val visibility = "public "
val propertyVisibility = "public "
val declarations = mutableListOf<String>()
return declarations.joinToString()
}
@@ -0,0 +1,464 @@
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.schema
import org.jetbrains.kotlinx.dataframe.impl.schema.DataFrameSchemaImpl
import org.jetbrains.kotlinx.dataframe.io.db.DbType
import org.jetbrains.kotlinx.dataframe.io.db.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.io.db.extractDBTypeFromConnection
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import java.sql.Connection
import java.sql.DriverManager
import java.sql.ResultSet
import javax.sql.DataSource
import kotlin.use
/**
* Retrieves the schema for an SQL table using the provided database configuration.
*
* ### Default Behavior:
* If [DbConnectionConfig.readOnly] is `true` (which is the default), the connection will be:
* - explicitly set as read-only via `Connection.setReadOnly(true)`
* - used with `autoCommit = false`
* - automatically rolled back after reading, ensuring no changes to the database
*
* Even if [DbConnectionConfig.readOnly] is set to `false`, the library still prevents data-modifying queries
* and only permits safe `SELECT` operations internally.
*
* @param [dbConfig] the database configuration to connect to the database, including URL, user, and password.
* @param [tableName] the name of the SQL table for which to retrieve the schema.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [dbConfig].
* @return the [DataFrameSchema] object representing the schema of the SQL table
*/
public fun DataFrameSchema.Companion.readSqlTable(
dbConfig: DbConnectionConfig,
tableName: String,
dbType: DbType? = null,
): DataFrameSchema =
withReadOnlyConnection(dbConfig, dbType) { connection ->
readSqlTable(connection, tableName, dbType)
}
/**
* Retrieves the schema for an SQL table using the provided [DataSource].
*
* ### Example with HikariCP:
* ```kotlin
* import com.zaxxer.hikari.HikariConfig
* import com.zaxxer.hikari.HikariDataSource
*
* val config = HikariConfig().apply {
* jdbcUrl = "jdbc:postgresql://localhost:5432/mydb"
* username = "user"
* password = "password"
* }
* val dataSource = HikariDataSource(config)
*
* // Get the schema for a specific table
* val customersSchema = DataFrame.getSchemaForSqlTable(dataSource, "customers")
*
* // Inspect the schema
* println(customersSchema.columns)
* ```
*
* @param [dataSource] the [DataSource] to get a database connection from.
* @param [tableName] the name of the SQL table for which to retrieve the schema.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [dataSource].
* @return the schema of the SQL table as a [DataFrameSchema] object.
* @see [DataSource.getConnection]
*/
public fun DataFrameSchema.Companion.readSqlTable(
dataSource: DataSource,
tableName: String,
dbType: DbType? = null,
): DataFrameSchema {
dataSource.connection.use { connection ->
return readSqlTable(connection, tableName, dbType)
}
}
/**
* Retrieves the schema for an SQL table using the provided database connection.
*
* @param [connection] the database connection.
* @param [tableName] the name of the SQL table for which to retrieve the schema.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [connection].
* @return the schema of the SQL table as a [DataFrameSchema] object.
*
* @see DriverManager.getConnection
*/
public fun DataFrameSchema.Companion.readSqlTable(
connection: Connection,
tableName: String,
dbType: DbType? = null,
): DataFrameSchema {
val determinedDbType = dbType ?: extractDBTypeFromConnection(connection)
// Read just 1 row to get the schema
val singleRowDataFrame = DataFrame.readSqlTable(
connection = connection,
tableName = tableName,
limit = 1,
inferNullability = false, // Schema extraction doesn't need nullability inference
dbType = determinedDbType,
strictValidation = true,
)
return singleRowDataFrame.schema()
}
/**
* Retrieves the schema of an SQL query result using the provided database configuration.
*
* ### Default Behavior:
* If [DbConnectionConfig.readOnly] is `true` (which is the default), the connection will be:
* - explicitly set as read-only via `Connection.setReadOnly(true)`
* - used with `autoCommit = false`
* - automatically rolled back after reading, ensuring no changes to the database
*
* Even if [DbConnectionConfig.readOnly] is set to `false`, the library still prevents data-modifying queries
* and only permits safe `SELECT` operations internally.
*
* @param [dbConfig] the database configuration to connect to the database, including URL, user, and password.
* @param [sqlQuery] the SQL query to execute and retrieve the schema from.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [dbConfig].
* @return the schema of the SQL query as a [DataFrameSchema] object.
*/
public fun DataFrameSchema.Companion.readSqlQuery(
dbConfig: DbConnectionConfig,
sqlQuery: String,
dbType: DbType? = null,
): DataFrameSchema =
withReadOnlyConnection(dbConfig, dbType) { connection ->
readSqlQuery(connection, sqlQuery, dbType)
}
/**
* Retrieves the schema of an SQL query result using the provided [DataSource].
*
* ### Example with HikariCP:
* ```kotlin
* import com.zaxxer.hikari.HikariConfig
* import com.zaxxer.hikari.HikariDataSource
*
* val config = HikariConfig().apply {
* jdbcUrl = "jdbc:postgresql://localhost:5432/mydb"
* username = "user"
* password = "password"
* }
* val dataSource = HikariDataSource(config)
*
* // Get the schema for a SQL query
* val querySchema = DataFrame.getSchemaForSqlQuery(
* dataSource,
* "SELECT name, age, city FROM customers WHERE age > 25"
* )
*
* // Inspect the schema
* println(querySchema.columns)
* ```
*
* @param [dataSource] the [DataSource] to get a database connection from.
* @param [sqlQuery] the SQL query to execute and retrieve the schema from.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [dataSource].
* @return the schema of the SQL query as a [DataFrameSchema] object.
*
* @see [DataSource.getConnection]
*/
public fun DataFrameSchema.Companion.readSqlQuery(
dataSource: DataSource,
sqlQuery: String,
dbType: DbType? = null,
): DataFrameSchema {
dataSource.connection.use { connection ->
return readSqlQuery(connection, sqlQuery, dbType)
}
}
/**
* Retrieves the schema of an SQL query result using the provided database connection.
*
* @param [connection] the database connection.
* @param [sqlQuery] the SQL query to execute and retrieve the schema from.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [connection].
* @return the schema of the SQL query as a [DataFrameSchema] object.
*
* @see DriverManager.getConnection
*/
public fun DataFrameSchema.Companion.readSqlQuery(
connection: Connection,
sqlQuery: String,
dbType: DbType? = null,
): DataFrameSchema {
val determinedDbType = dbType ?: extractDBTypeFromConnection(connection)
// Read just 1 row to get the schema
val singleRowDataFrame = DataFrame.readSqlQuery(
connection = connection,
sqlQuery = sqlQuery,
limit = 1,
inferNullability = false, // Schema extraction doesn't need nullability inference
dbType = determinedDbType,
strictValidation = true,
)
return singleRowDataFrame.schema()
}
/**
* Retrieves the schema of an SQL query result or the SQL table using the provided database configuration.
*
* ### Default Behavior:
* If [DbConnectionConfig.readOnly] is `true` (which is the default), the connection will be:
* - explicitly set as read-only via `Connection.setReadOnly(true)`
* - used with `autoCommit = false`
* - automatically rolled back after reading, ensuring no changes to the database
*
* Even if [DbConnectionConfig.readOnly] is set to `false`, the library still prevents data-modifying queries
* and only permits safe `SELECT` operations internally.
*
* @param [sqlQueryOrTableName] the SQL query to execute and retrieve the schema from.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [DbConnectionConfig].
* @return the schema of the SQL query as a [DataFrameSchema] object.
*/
public fun DbConnectionConfig.readDataFrameSchema(
sqlQueryOrTableName: String,
dbType: DbType? = null,
): DataFrameSchema =
when {
isSqlQuery(sqlQueryOrTableName) -> DataFrameSchema.readSqlQuery(this, sqlQueryOrTableName, dbType)
isSqlTableName(sqlQueryOrTableName) -> DataFrameSchema.readSqlTable(this, sqlQueryOrTableName, dbType)
else -> throw IllegalArgumentException(
"$sqlQueryOrTableName should be SQL query or name of one of the existing SQL tables!",
)
}
/**
* Retrieves the schema of an SQL query result or the SQL table using the provided [DataSource].
*
* ### Example with HikariCP:
* ```kotlin
* import com.zaxxer.hikari.HikariConfig
* import com.zaxxer.hikari.HikariDataSource
*
* val config = HikariConfig().apply {
* jdbcUrl = "jdbc:postgresql://localhost:5432/mydb"
* username = "user"
* password = "password"
* }
* val dataSource = HikariDataSource(config)
*
* // Get schema for a table
* val tableSchema = dataSource.getDataFrameSchema("customers")
*
* // Or get schema for a query
* val querySchema = dataSource.getDataFrameSchema("SELECT name, age FROM customers WHERE age > 25")
*
* // Inspect the schema
* println(tableSchema.columns)
* ```
*
* @param [sqlQueryOrTableName] the SQL query to execute or name of the SQL table.
* It should be a name of one of the existing SQL tables,
* or the SQL query should start from SELECT and contain one query for reading data without any manipulation.
* It should not contain `;` symbol.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [DataSource].
* @return the schema of the SQL query as a [DataFrameSchema] object.
*
* @see [DataSource.getConnection]
*/
public fun DataSource.readDataFrameSchema(sqlQueryOrTableName: String, dbType: DbType? = null): DataFrameSchema {
connection.use { conn ->
return when {
isSqlQuery(sqlQueryOrTableName) -> DataFrameSchema.readSqlQuery(conn, sqlQueryOrTableName, dbType)
isSqlTableName(sqlQueryOrTableName) -> DataFrameSchema.readSqlTable(conn, sqlQueryOrTableName, dbType)
else -> throw IllegalArgumentException(
"$sqlQueryOrTableName should be SQL query or name of one of the existing SQL tables!",
)
}
}
}
/**
* Retrieves the schema of an SQL query result or the SQL table using the provided database configuration.
*
* @param [sqlQueryOrTableName] the SQL query to execute and retrieve the schema from.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [Connection].
* @return the schema of the SQL query as a [DataFrameSchema] object.
*/
public fun Connection.readDataFrameSchema(sqlQueryOrTableName: String, dbType: DbType? = null): DataFrameSchema =
when {
isSqlQuery(sqlQueryOrTableName) -> DataFrameSchema.readSqlQuery(this, sqlQueryOrTableName, dbType)
isSqlTableName(sqlQueryOrTableName) -> DataFrameSchema.readSqlTable(this, sqlQueryOrTableName, dbType)
else -> throw IllegalArgumentException(
"$sqlQueryOrTableName should be SQL query or name of one of the existing SQL tables!",
)
}
/**
* Retrieves the schema from [ResultSet].
*
* NOTE: This function will not close connection and result set and not retrieve data from the result set.
*
* @param [resultSet] the [ResultSet] obtained from executing a database query.
* @param [dbType] the type of database that the [ResultSet] belongs to, could be a custom object, provided by user.
* @return the schema of the [ResultSet] as a [DataFrameSchema] object.
*/
public fun DataFrameSchema.Companion.readResultSet(resultSet: ResultSet, dbType: DbType): DataFrameSchema {
val tableColumns = getTableColumnsMetadata(resultSet, dbType)
return buildSchemaByTableColumns(tableColumns, dbType)
}
/**
* Retrieves the schema from [ResultSet].
*
* NOTE: This function will not close connection and result set and not retrieve data from the result set.
*
* @param [dbType] the type of database that the [ResultSet] belongs to, could be a custom object, provided by user.
* @return the schema of the [ResultSet] as a [DataFrameSchema] object.
*/
public fun ResultSet.readDataFrameSchema(dbType: DbType): DataFrameSchema = DataFrameSchema.readResultSet(this, dbType)
/**
* Retrieves the schemas of all non-system tables in the database using the provided database configuration.
*
* ### Default Behavior:
* If [DbConnectionConfig.readOnly] is `true` (which is the default), the connection will be:
* - explicitly set as read-only via `Connection.setReadOnly(true)`
* - used with `autoCommit = false`
* - automatically rolled back after reading, ensuring no changes to the database
*
* Even if [DbConnectionConfig.readOnly] is set to `false`, the library still prevents data-modifying queries
* and only permits safe `SELECT` operations internally.
*
* @param [dbConfig] the database configuration to connect to the database, including URL, user, and password.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [dbConfig].
* @return a map of [String, DataFrameSchema] objects representing the table name and its schema for each non-system table.
*/
public fun DataFrameSchema.Companion.readAllSqlTables(
dbConfig: DbConnectionConfig,
dbType: DbType? = null,
): Map<String, DataFrameSchema> =
withReadOnlyConnection(dbConfig, dbType) { connection ->
readAllSqlTables(connection, dbType)
}
/**
* Retrieves the schemas of all non-system tables in the database using the provided [DataSource].
*
* ### Example with HikariCP:
* ```kotlin
* import com.zaxxer.hikari.HikariConfig
* import com.zaxxer.hikari.HikariDataSource
*
* val config = HikariConfig().apply {
* jdbcUrl = "jdbc:postgresql://localhost:5432/mydb"
* username = "user"
* password = "password"
* }
* val dataSource = HikariDataSource(config)
*
* // Get schemas for all tables
* val allSchemas = DataFrame.getSchemaForAllSqlTables(dataSource)
*
* // Access individual table schemas
* val customersSchema = allSchemas["customers"]
* val ordersSchema = allSchemas["orders"]
*
* // Iterate through all schemas
* allSchemas.forEach { (tableName, schema) ->
* println("Table: \$tableName, Columns: \${schema.columns.keys}")
* }
* ```
*
* @param [dataSource] the DataSource to get a database connection from.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [dataSource].
* @return a map of [String, DataFrameSchema] objects representing the table name and its schema for each non-system table.
*
* @see [DataSource.getConnection]
*/
public fun DataFrameSchema.Companion.readAllSqlTables(
dataSource: DataSource,
dbType: DbType? = null,
): Map<String, DataFrameSchema> {
dataSource.connection.use { connection ->
return readAllSqlTables(connection, dbType)
}
}
/**
* Retrieves the schemas of all non-system tables in the database using the provided database connection.
*
* @param [connection] the database connection.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [connection].
* @return a map of [String, DataFrameSchema] objects representing the table name and its schema for each non-system table.
*/
public fun DataFrameSchema.Companion.readAllSqlTables(
connection: Connection,
dbType: DbType? = null,
): Map<String, DataFrameSchema> {
val metaData = connection.metaData
val determinedDbType = dbType ?: extractDBTypeFromConnection(connection)
// exclude system- and other tables without data
val tableTypes = determinedDbType.tableTypes?.toTypedArray()
val tables = metaData.getTables(null, null, null, tableTypes)
val dataFrameSchemas = mutableMapOf<String, DataFrameSchema>()
while (tables.next()) {
val jdbcTable = determinedDbType.buildTableMetadata(tables)
if (!determinedDbType.isSystemTable(jdbcTable)) {
// we filter her a second time because of specific logic with SQLite and possible issues with future databases
val tableName = jdbcTable.name
val dataFrameSchema = readSqlTable(connection, tableName, determinedDbType)
dataFrameSchemas += tableName to dataFrameSchema
}
}
return dataFrameSchemas
}
/**
* Builds a DataFrame schema based on the given table columns.
*
* @param [tableColumns] a mutable map containing the table columns, where the key represents the column name
* and the value represents the metadata of the column
* @param [dbType] the type of database.
* @return a [DataFrameSchema] object representing the schema built from the table columns.
*/
internal fun buildSchemaByTableColumns(
tableColumns: MutableList<TableColumnMetadata>,
dbType: DbType,
): DataFrameSchema {
val schemaColumns = tableColumns.associate {
Pair(it.name, generateColumnSchemaValue(dbType, it))
}
return DataFrameSchemaImpl(
columns = schemaColumns,
)
}
internal fun generateColumnSchemaValue(dbType: DbType, tableColumnMetadata: TableColumnMetadata): ColumnSchema =
dbType.convertSqlTypeToColumnSchemaValue(tableColumnMetadata)
?: ColumnSchema.Value(dbType.makeCommonSqlToKTypeMapping(tableColumnMetadata))
@@ -0,0 +1,956 @@
package org.jetbrains.kotlinx.dataframe.io
import io.github.oshai.kotlinlogging.KotlinLogging
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
import org.jetbrains.kotlinx.dataframe.io.db.DbType
import org.jetbrains.kotlinx.dataframe.io.db.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.io.db.extractDBTypeFromConnection
import java.sql.Connection
import java.sql.DatabaseMetaData
import java.sql.DriverManager
import java.sql.PreparedStatement
import java.sql.ResultSet
import javax.sql.DataSource
import kotlin.reflect.KType
private val logger = KotlinLogging.logger {}
/**
* Reads data from an SQL table and converts it into a DataFrame.
*
* ### Default Behavior:
* If [DbConnectionConfig.readOnly] is `true` (which is the default), the connection will be:
* - explicitly set as read-only via `Connection.setReadOnly(true)`
* - used with `autoCommit = false`
* - automatically rolled back after reading, ensuring no changes to the database
*
* Even if [DbConnectionConfig.readOnly] is set to `false`, the library still prevents data-modifying queries
* and only permits safe `SELECT` operations internally.
*
* @param [dbConfig] the configuration for the database, including URL, user, and password.
* @param [tableName] the name of the table to read data from.
* @param [limit] the maximum number of rows to retrieve from the table.
* `null` (default) means no limit - all available rows will be fetched
* or positive integer (e.g., `100`) - fetch at most that many rows
* @param [inferNullability] indicates how the column nullability should be inferred.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [dbConfig].
* @param [strictValidation] if `true`, the method validates that the provided table name is in a valid format.
* Default is `true` for strict validation.
* @param [configureStatement] optional lambda to configure the [PreparedStatement] before execution.
* This allows for custom tuning of fetch size, query timeout, and other JDBC parameters.
* @return the DataFrame containing the data from the SQL table.
*/
public fun DataFrame.Companion.readSqlTable(
dbConfig: DbConnectionConfig,
tableName: String,
limit: Int? = null,
inferNullability: Boolean = true,
dbType: DbType? = null,
strictValidation: Boolean = true,
configureStatement: (PreparedStatement) -> Unit = {},
): AnyFrame {
validateLimit(limit)
return withReadOnlyConnection(dbConfig, dbType) { conn ->
readSqlTable(conn, tableName, limit, inferNullability, dbType, strictValidation, configureStatement)
}
}
/**
* Reads data from an SQL table and converts it into a DataFrame.
*
* @param [dataSource] the [DataSource] to get a database connection from.
* @param [tableName] the name of the table to read data from.
* @param [limit] the maximum number of rows to retrieve from the table.
* `null` (default) means no limit - all available rows will be fetched
* or positive integer (e.g., `100`) - fetch at most that many rows
* @param [inferNullability] indicates how the column nullability should be inferred.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [dataSource].
* @param [strictValidation] if `true`, the method validates that the provided table name is in a valid format.
* Default is `true` for strict validation.
* @param [configureStatement] optional lambda to configure the [PreparedStatement] before execution.
* This allows for custom tuning of fetch size, query timeout, and other JDBC parameters.
* @return the DataFrame containing the data from the SQL table.
*
* @see [DataSource.getConnection]
*/
public fun DataFrame.Companion.readSqlTable(
dataSource: DataSource,
tableName: String,
limit: Int? = null,
inferNullability: Boolean = true,
dbType: DbType? = null,
strictValidation: Boolean = true,
configureStatement: (PreparedStatement) -> Unit = {},
): AnyFrame {
validateLimit(limit)
dataSource.connection.use { connection ->
return readSqlTable(
connection,
tableName,
limit,
inferNullability,
dbType,
strictValidation,
configureStatement,
)
}
}
/**
* Reads data from an SQL table and converts it into a DataFrame.
*
* @param [connection] the database connection to read tables from.
* @param [tableName] the name of the table to read data from.
* @param [limit] the maximum number of rows to retrieve from the table.
* `null` (default) means no limit - all available rows will be fetched
* or positive integer (e.g., `100`) - fetch at most that many rows
* @param [inferNullability] indicates how the column nullability should be inferred.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [connection].
* @param [strictValidation] if `true`, the method validates that the provided table name is in a valid format.
* Default is `true` for strict validation.
* @param [configureStatement] optional lambda to configure the [PreparedStatement] before execution.
* This allows for custom tuning of fetch size, query timeout, and other JDBC parameters.
* @return the DataFrame containing the data from the SQL table.
*
* @see [DriverManager.getConnection]
*/
public fun DataFrame.Companion.readSqlTable(
connection: Connection,
tableName: String,
limit: Int? = null,
inferNullability: Boolean = true,
dbType: DbType? = null,
strictValidation: Boolean = true,
configureStatement: (PreparedStatement) -> Unit = {},
): AnyFrame {
validateLimit(limit)
if (strictValidation) {
require(isValidTableName(tableName)) {
"The provided table name '$tableName' is invalid. Please ensure it matches a valid table name in the database schema."
}
} else {
logger.warn { "Strict validation is disabled. Make sure the table name '$tableName' is correct." }
}
val determinedDbType = dbType ?: extractDBTypeFromConnection(connection)
// Build SQL query using DbType
val sqlQuery = determinedDbType.buildSelectTableQueryWithLimit(tableName, limit)
return executeQueryAndBuildDataFrame(
connection,
sqlQuery,
determinedDbType,
configureStatement,
limit,
inferNullability,
)
}
/**
* Reads a data frame from the specified database using the provided SQL query and configurations.
*
* @param [connection] The database connection to be used for executing the query.
* @param [sqlQuery] The SQL query string to be executed.
* @param [determinedDbType] The type of database being accessed, which determines specific configurations.
* @param [configureStatement] A lambda function to configure the prepared statement before execution.
* @param [limit] the maximum number of rows to retrieve from the table.
* `null` (default) means no limit - all available rows will be fetched.
* @param [inferNullability] A flag to determine whether to infer nullability for result set fields.
* @return The data frame constructed from the database query results.
* @throws [IllegalStateException] If an error occurs while reading from the database or processing the data.
*/
private fun executeQueryAndBuildDataFrame(
connection: Connection,
sqlQuery: String,
determinedDbType: DbType,
configureStatement: (PreparedStatement) -> Unit,
limit: Int?,
inferNullability: Boolean,
): AnyFrame =
try {
connection.prepareStatement(sqlQuery).use { statement ->
logger.debug { "Connection established successfully (${connection.metaData.databaseProductName})" }
determinedDbType.configureReadStatement(statement)
configureStatement(statement)
logger.debug { "Executing query: $sqlQuery" }
statement.executeQuery().use { rs ->
val tableColumns = getTableColumnsMetadata(rs, determinedDbType)
fetchAndConvertDataFromResultSet(tableColumns, rs, determinedDbType, limit, inferNullability)
}
}
} catch (e: java.sql.SQLException) {
// Provide the same type for all SQLExceptions from JDBC and enrich with additional information
logger.error(e) { "Database operation failed: $sqlQuery" }
throw IllegalStateException(
"Failed to read from database. Query: $sqlQuery, Database: ${determinedDbType.dbTypeInJdbcUrl}",
e,
)
} catch (e: Exception) {
// Provide the same type for all unexpected errors from JDBC
logger.error(e) { "Unexpected error: ${e.message}" }
throw IllegalStateException("Unexpected error while reading from database", e)
}
/**
* Converts the result of an SQL query to the DataFrame.
*
* __NOTE:__ SQL query should start from SELECT and contain one query for reading data without any manipulation.
* It should not contain `;` symbol.
*
* ### Default Behavior:
* If [DbConnectionConfig.readOnly] is `true` (which is the default), the connection will be:
* - explicitly set as read-only via `Connection.setReadOnly(true)`
* - used with `autoCommit = false`
* - automatically rolled back after reading, ensuring no changes to the database
*
* Even if [DbConnectionConfig.readOnly] is set to `false`, the library still prevents data-modifying queries
* and only permits safe `SELECT` operations internally.
*
* @param [dbConfig] the database configuration to connect to the database, including URL, user, and password.
* @param [sqlQuery] the SQL query to execute.
* @param [limit] the maximum number of rows to retrieve from the result of the SQL query execution.
* `null` (default) means no limit - all available rows will be fetched
* or positive integer (e.g., `100`) - fetch at most that many rows
* @param [inferNullability] indicates how the column nullability should be inferred.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [dbConfig].
* @param [strictValidation] if `true`, the method validates that the provided query is in a valid format.
* Default is `true` for strict validation.
* @param [configureStatement] optional lambda to configure the [PreparedStatement] before execution.
* This allows for custom tuning of fetch size, query timeout, and other JDBC parameters.
* @return the DataFrame containing the result of the SQL query.
*/
public fun DataFrame.Companion.readSqlQuery(
dbConfig: DbConnectionConfig,
sqlQuery: String,
limit: Int? = null,
inferNullability: Boolean = true,
dbType: DbType? = null,
strictValidation: Boolean = true,
configureStatement: (PreparedStatement) -> Unit = {},
): AnyFrame {
validateLimit(limit)
return withReadOnlyConnection(dbConfig, dbType) { conn ->
readSqlQuery(conn, sqlQuery, limit, inferNullability, dbType, strictValidation, configureStatement)
}
}
/**
* Converts the result of an SQL query to the DataFrame.
*
* __NOTE:__ SQL query should start from SELECT and contain one query for reading data without any manipulation.
* It should not contain `;` symbol.
*
* @param [dataSource] the [DataSource] to obtain a database connection from.
* @param [sqlQuery] the SQL query to execute.
* @param [limit] the maximum number of rows to retrieve from the result of the SQL query execution.
* `null` (default) means no limit - all available rows will be fetched
* or positive integer (e.g., `100`) - fetch at most that many rows
* @param [inferNullability] indicates how the column nullability should be inferred.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [dataSource].
* @param [strictValidation] if `true`, the method validates that the provided query is in a valid format.
* Default is `true` for strict validation.
* @param [configureStatement] optional lambda to configure the [PreparedStatement] before execution.
* This allows for custom tuning of fetch size, query timeout, and other JDBC parameters.
* @return the DataFrame containing the result of the SQL query.
*
* @see [DataSource.getConnection]
*/
public fun DataFrame.Companion.readSqlQuery(
dataSource: DataSource,
sqlQuery: String,
limit: Int? = null,
inferNullability: Boolean = true,
dbType: DbType? = null,
strictValidation: Boolean = true,
configureStatement: (PreparedStatement) -> Unit = {},
): AnyFrame {
validateLimit(limit)
dataSource.connection.use { connection ->
return readSqlQuery(connection, sqlQuery, limit, inferNullability, dbType, strictValidation, configureStatement)
}
}
/**
* Converts the result of an SQL query to the DataFrame.
*
* __NOTE:__ SQL query should start from SELECT and contain one query for reading data without any manipulation.
* It should not contain `;` symbol.
*
* @param [connection] the database connection to execute the SQL query.
* @param [sqlQuery] the SQL query to execute.
* @param [limit] the maximum number of rows to retrieve from the result of the SQL query execution.
* `null` (default) means no limit - all available rows will be fetched
* or positive integer (e.g., `100`) - fetch at most that many rows
* @param [inferNullability] indicates how the column nullability should be inferred.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [connection].
* @param [strictValidation] if `true`, the method validates that the provided query is in a valid format.
* Default is `true` for strict validation.
* @param [configureStatement] optional lambda to configure the [PreparedStatement] before execution.
* This allows for custom tuning of fetch size, query timeout, and other JDBC parameters.
* @return the DataFrame containing the result of the SQL query.
*
* @see [DriverManager.getConnection]
*/
public fun DataFrame.Companion.readSqlQuery(
connection: Connection,
sqlQuery: String,
limit: Int? = null,
inferNullability: Boolean = true,
dbType: DbType? = null,
strictValidation: Boolean = true,
configureStatement: (PreparedStatement) -> Unit = {},
): AnyFrame {
validateLimit(limit)
if (strictValidation) {
require(isValidSqlQuery(sqlQuery)) {
"SQL query should start from SELECT and contain one query for reading data without any manipulation. " +
"Also it should not contain any separators like `;`."
}
} else {
logger.warn { "Strict validation is disabled. Ensure the SQL query '$sqlQuery' is correct and safe." }
}
val determinedDbType = dbType ?: extractDBTypeFromConnection(connection)
val internalSqlQuery = limit?.let {
determinedDbType.buildSqlQueryWithLimit(sqlQuery, it)
} ?: sqlQuery
return executeQueryAndBuildDataFrame(
connection,
internalSqlQuery,
determinedDbType,
configureStatement,
limit,
inferNullability,
)
}
/**
* Converts the result of an SQL query or SQL table (by name) to the DataFrame.
*
* ### Default Behavior:
* If [DbConnectionConfig.readOnly] is `true` (which is the default), the connection will be:
* - explicitly set as read-only via `Connection.setReadOnly(true)`
* - used with `autoCommit = false`
* - automatically rolled back after reading, ensuring no changes to the database
*
* Even if [DbConnectionConfig.readOnly] is set to `false`, the library still prevents data-modifying queries
* and only permits safe `SELECT` operations internally.
*
* @param [sqlQueryOrTableName] the SQL query to execute or name of the SQL table.
* It should be a name of one of the existing SQL tables,
* or the SQL query should start from SELECT and contain one query for reading data without any manipulation.
* It should not contain `;` symbol.
* @param [limit] the maximum number of rows to retrieve from the result of the SQL query execution.
* `null` (default) means no limit - all available rows will be fetched
* or positive integer (e.g., `100`) - fetch at most that many rows
* @param [inferNullability] indicates how the column nullability should be inferred.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [DbConnectionConfig].
* @param [strictValidation] if `true`, the method validates that the provided query or table name is in a valid format.
* Default is `true` for strict validation.
* @param [configureStatement] optional lambda to configure the [PreparedStatement] before execution.
* This allows for custom tuning of fetch size, query timeout, and other JDBC parameters.
* @return the DataFrame containing the result of the SQL query.
*/
public fun DbConnectionConfig.readDataFrame(
sqlQueryOrTableName: String,
limit: Int? = null,
inferNullability: Boolean = true,
dbType: DbType? = null,
strictValidation: Boolean = true,
configureStatement: (PreparedStatement) -> Unit = {},
): AnyFrame {
validateLimit(limit)
return when {
isSqlQuery(sqlQueryOrTableName) -> DataFrame.readSqlQuery(
this,
sqlQueryOrTableName,
limit,
inferNullability,
dbType,
strictValidation,
configureStatement,
)
isSqlTableName(sqlQueryOrTableName) -> DataFrame.readSqlTable(
this,
sqlQueryOrTableName,
limit,
inferNullability,
dbType,
strictValidation,
configureStatement,
)
else -> throw IllegalArgumentException(
"$sqlQueryOrTableName should be SQL query or name of one of the existing SQL tables!",
)
}
}
/**
* Converts the result of an SQL query or SQL table (by name) to the DataFrame.
*
* @param [sqlQueryOrTableName] the SQL query to execute or name of the SQL table.
* It should be a name of one of the existing SQL tables,
* or the SQL query should start from SELECT and contain one query for reading data without any manipulation.
* It should not contain `;` symbol.
* @param [limit] the maximum number of rows to retrieve from the result of the SQL query execution.
* `null` (default) means no limit - all available rows will be fetched
* or positive integer (e.g., `100`) - fetch at most that many rows
* @param [inferNullability] indicates how the column nullability should be inferred.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [Connection].
* @param [strictValidation] if `true`, the method validates that the provided query or table name is in a valid format.
* Default is `true` for strict validation.
* @param [configureStatement] optional lambda to configure the [PreparedStatement] before execution.
* This allows for custom tuning of fetch size, query timeout, and other JDBC parameters.
* @return the DataFrame containing the result of the SQL query.
*/
public fun Connection.readDataFrame(
sqlQueryOrTableName: String,
limit: Int? = null,
inferNullability: Boolean = true,
dbType: DbType? = null,
strictValidation: Boolean = true,
configureStatement: (PreparedStatement) -> Unit = {},
): AnyFrame {
validateLimit(limit)
return when {
isSqlQuery(sqlQueryOrTableName) -> DataFrame.readSqlQuery(
this,
sqlQueryOrTableName,
limit,
inferNullability,
dbType,
strictValidation,
configureStatement,
)
isSqlTableName(sqlQueryOrTableName) -> DataFrame.readSqlTable(
this,
sqlQueryOrTableName,
limit,
inferNullability,
dbType,
strictValidation,
configureStatement,
)
else -> throw IllegalArgumentException(
"$sqlQueryOrTableName should be SQL query or name of one of the existing SQL tables!",
)
}
}
/**
* Converts the result of an SQL query or SQL table (by name) to the DataFrame.
*
* ### Example with HikariCP:
* ```kotlin
* import com.zaxxer.hikari.HikariConfig
* import com.zaxxer.hikari.HikariDataSource
*
* val config = HikariConfig().apply {
* jdbcUrl = "jdbc:postgresql://localhost:5432/mydb"
* username = "user"
* password = "password"
* }
* val dataSource = HikariDataSource(config)
*
* // Read from a table
* val customersDF = dataSource.readDataFrame("customers", limit = 100)
*
* // Or execute a query
* val queryDF = dataSource.readDataFrame("SELECT * FROM orders WHERE amount > 100")
* ```
*
* @param [sqlQueryOrTableName] the SQL query to execute or name of the SQL table.
* It should be a name of one of the existing SQL tables,
* or the SQL query should start from SELECT and contain one query for reading data without any manipulation.
* It should not contain `;` symbol.
* @param [limit] the maximum number of rows to retrieve from the result of the SQL query execution.
* `null` (default) means no limit - all available rows will be fetched
* or positive integer (e.g., `100`) - fetch at most that many rows
* @param [inferNullability] indicates how the column nullability should be inferred.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [DataSource].
* @param [strictValidation] if `true`, the method validates that the provided query or table name is in a valid format.
* Default is `true` for strict validation.
* @param [configureStatement] optional lambda to configure the [PreparedStatement] before execution.
* This allows for custom tuning of fetch size, query timeout, and other JDBC parameters.
* @return the DataFrame containing the result of the SQL query.
*
* @see [DataSource.getConnection]
*/
public fun DataSource.readDataFrame(
sqlQueryOrTableName: String,
limit: Int? = null,
inferNullability: Boolean = true,
dbType: DbType? = null,
strictValidation: Boolean = true,
configureStatement: (PreparedStatement) -> Unit = {},
): AnyFrame {
validateLimit(limit)
connection.use { conn ->
return when {
isSqlQuery(sqlQueryOrTableName) -> DataFrame.readSqlQuery(
conn,
sqlQueryOrTableName,
limit,
inferNullability,
dbType,
strictValidation,
configureStatement,
)
isSqlTableName(sqlQueryOrTableName) -> DataFrame.readSqlTable(
conn,
sqlQueryOrTableName,
limit,
inferNullability,
dbType,
strictValidation,
configureStatement,
)
else -> throw IllegalArgumentException(
"$sqlQueryOrTableName should be SQL query or name of one of the existing SQL tables!",
)
}
}
}
/**
* Reads the data from a [ResultSet][java.sql.ResultSet] and converts it into a DataFrame.
*
* A [ResultSet][java.sql.ResultSet] object maintains a cursor pointing to its current row of data.
* By default, a ResultSet object is not updatable and has a cursor that can only move forward.
* Therefore, you can iterate through it only once, from the first row to the last row.
*
* For more details, refer to the official Java documentation on [ResultSet][java.sql.ResultSet].
*
* NOTE: Reading from the [ResultSet][java.sql.ResultSet] could potentially change its state.
*
* @param [resultSet] the [ResultSet][java.sql.ResultSet] containing the data to read.
* Its state may be altered after the read operation.
* @param [dbType] the type of database that the [ResultSet] belongs to.
* @param [limit] the maximum number of rows to read from the [ResultSet][java.sql.ResultSet].
* `null` (default) means no limit - all available rows will be fetched
* or positive integer (e.g., `100`) - fetch at most that many rows
* @param [inferNullability] indicates how the column nullability should be inferred.
* @return the DataFrame generated from the [ResultSet][java.sql.ResultSet] data.
*
* @see [java.sql.ResultSet]
*/
public fun DataFrame.Companion.readResultSet(
resultSet: ResultSet,
dbType: DbType,
limit: Int? = null,
inferNullability: Boolean = true,
): AnyFrame {
validateLimit(limit)
val tableColumns = getTableColumnsMetadata(resultSet, dbType)
return fetchAndConvertDataFromResultSet(tableColumns, resultSet, dbType, limit, inferNullability)
}
/**
* Reads the data from a [ResultSet][java.sql.ResultSet] and converts it into a DataFrame.
*
* A [ResultSet][java.sql.ResultSet] object maintains a cursor pointing to its current row of data.
* By default, a ResultSet object is not updatable and has a cursor that can only move forward.
* Therefore, you can iterate through it only once, from the first row to the last row.
*
* For more details, refer to the official Java documentation on [ResultSet][java.sql.ResultSet].
*
* NOTE: Reading from the [ResultSet][java.sql.ResultSet] could potentially change its state.
*
* @param [dbType] the type of database that the [ResultSet] belongs to.
* @param [limit] the maximum number of rows to read from the [ResultSet][java.sql.ResultSet].
* `null` (default) means no limit - all available rows will be fetched
* or positive integer (e.g., `100`) - fetch at most that many rows
* @param [inferNullability] indicates how the column nullability should be inferred.
* @return the DataFrame generated from the [ResultSet][java.sql.ResultSet] data.
*
* @see [java.sql.ResultSet]
*/
public fun ResultSet.readDataFrame(dbType: DbType, limit: Int? = null, inferNullability: Boolean = true): AnyFrame {
validateLimit(limit)
return DataFrame.readResultSet(this, dbType, limit, inferNullability)
}
/**
* Reads the data from a [ResultSet][java.sql.ResultSet] and converts it into a DataFrame.
*
* A [ResultSet][java.sql.ResultSet] object maintains a cursor pointing to its current row of data.
* By default, a ResultSet object is not updatable and has a cursor that can only move forward.
* Therefore, you can iterate through it only once, from the first row to the last row.
*
* For more details, refer to the official Java documentation on [ResultSet][java.sql.ResultSet].
*
* __NOTE:__ Reading from the [ResultSet][java.sql.ResultSet] could potentially change its state.
*
* @param [resultSet] the [ResultSet][java.sql.ResultSet] containing the data to read.
* Its state may be altered after the read operation.
* @param [connection] the connection to the database (it's required to extract the database type)
* that the [ResultSet] belongs to.
* @param [limit] the maximum number of rows to read from the [ResultSet][java.sql.ResultSet].
* `null` (default) means no limit - all available rows will be fetched
* or positive integer (e.g., `100`) - fetch at most that many rows
* @param [inferNullability] indicates how the column nullability should be inferred.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [resultSet].
* @return the DataFrame generated from the [ResultSet][java.sql.ResultSet] data.
*
* @see [java.sql.ResultSet]
*/
public fun DataFrame.Companion.readResultSet(
resultSet: ResultSet,
connection: Connection,
limit: Int? = null,
inferNullability: Boolean = true,
dbType: DbType? = null,
): AnyFrame {
validateLimit(limit)
val determinedDbType = dbType ?: extractDBTypeFromConnection(connection)
return readResultSet(resultSet, determinedDbType, limit, inferNullability)
}
/**
* Reads the data from a [ResultSet][java.sql.ResultSet] and converts it into a DataFrame.
*
* A [ResultSet][java.sql.ResultSet] object maintains a cursor pointing to its current row of data.
* By default, a ResultSet object is not updatable and has a cursor that can only move forward.
* Therefore, you can iterate through it only once, from the first row to the last row.
*
* For more details, refer to the official Java documentation on [ResultSet][java.sql.ResultSet].
*
* __NOTE:__ Reading from the [ResultSet][java.sql.ResultSet] could potentially change its state.
*
* @param [connection] the connection to the database (it's required to extract the database type)
* that the [ResultSet] belongs to.
* @param [limit] the maximum number of rows to read from the [ResultSet][java.sql.ResultSet].
* `null` (default) means no limit - all available rows will be fetched
* or positive integer (e.g., `100`) - fetch at most that many rows
* @param [inferNullability] indicates how the column nullability should be inferred.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [ResultSet].
* @return the DataFrame generated from the [ResultSet][java.sql.ResultSet] data.
*
* @see [java.sql.ResultSet]
*/
public fun ResultSet.readDataFrame(
connection: Connection,
limit: Int? = null,
inferNullability: Boolean = true,
dbType: DbType? = null,
): AnyFrame {
validateLimit(limit)
return DataFrame.readResultSet(this, connection, limit, inferNullability, dbType)
}
/**
* Reads all non-system tables from a database and returns them
* as a map of SQL tables and corresponding dataframes using the provided database configuration and limit.
*
* ### Default Behavior:
* If [DbConnectionConfig.readOnly] is `true` (which is the default), the connection will be:
* - explicitly set as read-only via `Connection.setReadOnly(true)`
* - used with `autoCommit = false`
* - automatically rolled back after reading, ensuring no changes to the database
*
* Even if [DbConnectionConfig.readOnly] is set to `false`, the library still prevents data-modifying queries
* and only permits safe `SELECT` operations internally.
*
* @param [dbConfig] the database configuration to connect to the database, including URL, user, and password.
* @param [limit] the maximum number of rows to read from each table.
* `null` (default) means no limit - all available rows will be fetched
* or positive integer (e.g., `100`) - fetch at most that many rows
* @param [catalogue] a name of the catalog from which tables will be retrieved. A null value retrieves tables from all catalogs.
* @param [inferNullability] indicates how the column nullability should be inferred.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [dbConfig].
* @param [configureStatement] optional lambda to configure the [PreparedStatement] before execution.
* This allows for custom tuning of fetch size, query timeout, and other JDBC parameters.
* @return a map of [String] to [AnyFrame] objects representing the non-system tables from the database.
*/
public fun DataFrame.Companion.readAllSqlTables(
dbConfig: DbConnectionConfig,
catalogue: String? = null,
limit: Int? = null,
inferNullability: Boolean = true,
dbType: DbType? = null,
configureStatement: (PreparedStatement) -> Unit = {},
): Map<String, AnyFrame> {
validateLimit(limit)
return withReadOnlyConnection(dbConfig, dbType) { connection ->
readAllSqlTables(connection, catalogue, limit, inferNullability, dbType, configureStatement)
}
}
/**
* Reads all non-system tables from a database and returns them
* as a map of SQL tables and corresponding dataframes.
*
* ### Example with HikariCP:
* ```kotlin
* import com.zaxxer.hikari.HikariConfig
* import com.zaxxer.hikari.HikariDataSource
*
* val config = HikariConfig().apply {
* jdbcUrl = "jdbc:postgresql://localhost:5432/mydb"
* username = "user"
* password = "password"
* }
* val dataSource = HikariDataSource(config)
*
* // Read all tables from the database
* val allTables = DataFrame.readAllSqlTables(dataSource, limit = 100)
*
* // Access individual tables
* val customersDF = allTables["customers"]
* val ordersDF = allTables["orders"]
* ```
*
* @param [dataSource] the [DataSource] to get a database connection from.
* @param [catalogue] a name of the catalog from which tables will be retrieved. A null value retrieves tables from all catalogs.
* @param [limit] the maximum number of rows to read from each table.
* `null` (default) means no limit - all available rows will be fetched
* or positive integer (e.g., `100`) - fetch at most that many rows
* @param [inferNullability] indicates how the column nullability should be inferred.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [dataSource].
* @param [configureStatement] optional lambda to configure the [PreparedStatement] before execution.
* This allows for custom tuning of fetch size, query timeout, and other JDBC parameters.
* @return a map of [String] to [AnyFrame] objects representing the non-system tables from the database.
*
* @see [DataSource.getConnection]
*/
public fun DataFrame.Companion.readAllSqlTables(
dataSource: DataSource,
catalogue: String? = null,
limit: Int? = null,
inferNullability: Boolean = true,
dbType: DbType? = null,
configureStatement: (PreparedStatement) -> Unit = {},
): Map<String, AnyFrame> {
validateLimit(limit)
dataSource.connection.use { connection ->
return readAllSqlTables(connection, catalogue, limit, inferNullability, dbType, configureStatement)
}
}
/**
* Reads all non-system tables from a database and returns them
* as a map of SQL tables and corresponding dataframes.
*
* @param [connection] the database connection to read tables from.
* @param [limit] the maximum number of rows to read from each table.
* `null` (default) means no limit - all available rows will be fetched
* or positive integer (e.g., `100`) - fetch at most that many rows
* @param [catalogue] a name of the catalog from which tables will be retrieved. A null value retrieves tables from all catalogs.
* @param [inferNullability] indicates how the column nullability should be inferred.
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
* in that case the [dbType] will be recognized from the [connection].
* @param [configureStatement] optional lambda to configure the [PreparedStatement] before execution.
* This allows for custom tuning of fetch size, query timeout, and other JDBC parameters.
* @return a map of [String] to [AnyFrame] objects representing the non-system tables from the database.
*
* @see [DriverManager.getConnection]
*/
public fun DataFrame.Companion.readAllSqlTables(
connection: Connection,
catalogue: String? = null,
limit: Int? = null,
inferNullability: Boolean = true,
dbType: DbType? = null,
configureStatement: (PreparedStatement) -> Unit = {},
): Map<String, AnyFrame> {
validateLimit(limit)
val determinedDbType = dbType ?: extractDBTypeFromConnection(connection)
val metaData = connection.metaData
val tablesResultSet = retrieveTableMetadata(metaData, catalogue, determinedDbType)
return buildMap {
while (tablesResultSet.next()) {
val tableMetadata = determinedDbType.buildTableMetadata(tablesResultSet)
// We filter here a second time because of specific logic with SQLite and possible issues with future databases
if (determinedDbType.isSystemTable(tableMetadata)) {
continue
}
val fullTableName = buildFullTableName(catalogue, tableMetadata.schemaName, tableMetadata.name)
val dataFrame = readTableAsDataFrame(
connection,
fullTableName,
limit,
inferNullability,
dbType,
configureStatement,
)
put(fullTableName, dataFrame)
}
}
}
private fun retrieveTableMetadata(metaData: DatabaseMetaData, catalogue: String?, dbType: DbType): ResultSet {
// Exclude system- and other tables without data (it looks like it is supported badly for many databases)
val tableTypes = dbType.tableTypes?.toTypedArray()
return metaData.getTables(catalogue, null, null, tableTypes)
}
private fun buildFullTableName(catalogue: String?, schemaName: String?, tableName: String): String {
// TODO: both cases is schema specified or not in URL
// in h2 database name is recognized as a schema name https://www.h2database.com/html/features.html#database_url
// https://stackoverflow.com/questions/20896935/spring-hibernate-h2-database-schema-not-found
// could be Dialect/Database specific
return when {
catalogue != null && schemaName != null -> "$catalogue.$schemaName.$tableName"
catalogue != null -> "$catalogue.$tableName"
else -> tableName
}
}
private fun readTableAsDataFrame(
connection: Connection,
tableName: String,
limit: Int?,
inferNullability: Boolean,
dbType: DbType?,
configureStatement: (PreparedStatement) -> Unit = {},
): AnyFrame {
logger.debug { "Reading table: $tableName" }
val dataFrame = DataFrame.readSqlTable(
connection,
tableName,
limit,
inferNullability,
dbType,
true,
configureStatement,
)
logger.debug { "Finished reading table: $tableName" }
return dataFrame
}
internal fun getTableColumnsMetadata(resultSet: ResultSet, dbType: DbType): MutableList<TableColumnMetadata> =
dbType.getTableColumnsMetadata(resultSet).toMutableList()
/**
* Fetches and converts data from a ResultSet into a mutable map.
*
* @param [tableColumns] a list containing the column metadata for the table.
* @param [rs] the ResultSet object containing the data to be fetched and converted.
* @param [dbType] the type of the database.
* @param [limit] the maximum number of rows to retrieve from the table.
* `null` (default) means no limit - all available rows will be fetched.
* @param [inferNullability] indicates how the column nullability should be inferred.
* @return A mutable map containing the fetched and converted data.
*/
internal fun fetchAndConvertDataFromResultSet(
tableColumns: MutableList<TableColumnMetadata>,
rs: ResultSet,
dbType: DbType,
limit: Int?,
inferNullability: Boolean,
): AnyFrame {
val columnKTypes = buildColumnKTypes(tableColumns, dbType)
val columnData = readAllRowsFromResultSet(rs, tableColumns, columnKTypes, dbType, limit)
val dataFrame = buildDataFrameFromColumnData(columnData, tableColumns, columnKTypes, dbType, inferNullability)
logger.debug {
"DataFrame with ${dataFrame.rowsCount()} rows and ${dataFrame.columnsCount()} columns created as a result of SQL query."
}
return dataFrame
}
/**
* Builds a map of column indices to their Kotlin types.
*/
private fun buildColumnKTypes(tableColumns: List<TableColumnMetadata>, dbType: DbType): Map<Int, KType> =
tableColumns.indices.associateWith { index ->
generateKType(dbType, tableColumns[index])
}
/**
* Reads all rows from ResultSet and returns a column-oriented data structure.
* Returns mutable lists to allow efficient post-processing without copying.
*/
private fun readAllRowsFromResultSet(
rs: ResultSet,
tableColumns: List<TableColumnMetadata>,
columnKTypes: Map<Int, KType>,
dbType: DbType,
limit: Int?,
): List<MutableList<Any?>> {
val columnsCount = tableColumns.size
val columnData = List(columnsCount) { mutableListOf<Any?>() }
var rowsRead = 0
while (rs.next() && (limit == null || rowsRead < limit)) {
repeat(columnsCount) { columnIndex ->
val value = dbType.extractValueFromResultSet(
rs = rs,
columnIndex = columnIndex,
columnMetadata = tableColumns[columnIndex],
kType = columnKTypes.getValue(columnIndex),
)
columnData[columnIndex].add(value)
}
rowsRead++
// if (rowsRead % 1000 == 0) logger.debug { "Loaded $rowsRead rows." } // TODO: https://github.com/Kotlin/dataframe/issues/455
}
return columnData
}
/**
* Builds DataFrame from column-oriented data.
* Accepts mutable lists to enable efficient in-place transformations.
*/
private fun buildDataFrameFromColumnData(
columnData: List<MutableList<Any?>>,
tableColumns: List<TableColumnMetadata>,
columnKTypes: Map<Int, KType>,
dbType: DbType,
inferNullability: Boolean,
): AnyFrame =
columnData.mapIndexed { index, values ->
dbType.buildDataColumn(
name = tableColumns[index].name,
values = values,
kType = columnKTypes.getValue(index),
inferNullability = inferNullability,
)
}.toDataFrame()
/**
* Generates a KType based on the given database type and table column metadata.
*
* @param dbType The database type.
* @param tableColumnMetadata The table column metadata.
*
* @return The generated KType.
*/
internal fun generateKType(dbType: DbType, tableColumnMetadata: TableColumnMetadata): KType =
dbType.convertSqlTypeToKType(tableColumnMetadata)
?: dbType.makeCommonSqlToKTypeMapping(tableColumnMetadata)
@@ -0,0 +1,250 @@
package org.jetbrains.kotlinx.dataframe.io
import io.github.oshai.kotlinlogging.KotlinLogging
import org.jetbrains.kotlinx.dataframe.io.db.DbType
import org.jetbrains.kotlinx.dataframe.io.db.extractDBTypeFromUrl
import java.sql.Connection
import java.sql.SQLException
private val logger = KotlinLogging.logger {}
/**
* Validates the provided limit to ensure it is either null or a positive integer.
* Throws an [IllegalArgumentException] if the limit is negative or zero.
*
* @param limit The maximum allowed number of rows. Use `null` for unlimited rows.
*/
internal fun validateLimit(limit: Int?) {
require(limit == null || limit > 0) {
"Parameter 'limit' must be positive, but was: $limit. Use null for unlimited rows."
}
}
/**
* Executes the given block with a managed JDBC connection created from [DbConnectionConfig].
*
* If [DbConnectionConfig.readOnly] is `true` (default), the connection will be:
* - explicitly marked as read-only
* - used with auto-commit disabled
* - rolled back after execution to prevent unintended modifications
*
* This utility guarantees proper closing of the connection and safe rollback in read-only mode.
* It should be used when the user does not manually manage JDBC connections.
*
* @param [dbConfig] The configuration used to create the connection.
* @param [dbType] Optional database type (not used here but can be passed through for logging or future extensions).
* @param [block] A lambda with receiver that runs with an open and managed [java.sql.Connection].
* @return The result of the [block] execution.
*/
internal inline fun <T> withReadOnlyConnection(
dbConfig: DbConnectionConfig,
dbType: DbType? = null,
block: (Connection) -> T,
): T {
val actualDbType = dbType ?: extractDBTypeFromUrl(dbConfig.url)
val connection = actualDbType.createConnection(dbConfig)
return connection.use { conn ->
try {
if (dbConfig.readOnly) {
conn.autoCommit = false
}
block(conn)
} finally {
if (dbConfig.readOnly) {
try {
conn.rollback()
} catch (e: SQLException) {
logger.warn(e) {
"Failed to rollback read-only transaction (url=${dbConfig.url})"
}
}
}
}
}
}
/**
* A regular expression defining the valid pattern for SQL table names.
*
* This pattern enforces that table names must:
* - Contain only Unicode letters, Unicode digits, or underscores.
* - Optionally be segmented by dots to indicate schema and table separation.
*
* It ensures compatibility with most SQL database naming conventions, thus minimizing risks of invalid names
* or injection vulnerabilities.
*
* Example of valid table names:
* - `my_table`
* - `schema1.table2`
*
* Example of invalid table names:
* - `my-table` (contains a dash)
* - `table!name` (contains special characters)
* - `.startWithDot` (cannot start with a dot)
*/
internal const val TABLE_NAME_VALID_PATTERN = "^[\\p{L}\\p{N}_]+(\\.[\\p{L}\\p{N}_]+)*$"
internal fun isSqlQuery(sqlQueryOrTableName: String): Boolean {
val queryPattern = Regex("(?i)\\b(SELECT)\\b")
return queryPattern.containsMatchIn(sqlQueryOrTableName.trim())
}
/**
* SQL table name pattern matching: __catalog.schema.table__
* Allows alphanumeric characters and underscores, must start with letter or underscore
*/
private val SQL_TABLE_NAME_PATTERN = Regex("^[a-zA-Z_][a-zA-Z0-9_]*(\\.[a-zA-Z_][a-zA-Z0-9_]*){0,2}$")
internal fun isSqlTableName(sqlQueryOrTableName: String): Boolean {
// Match table names with optional schema and catalog (e.g., catalog.schema.table)
return SQL_TABLE_NAME_PATTERN.matches(sqlQueryOrTableName.trim())
}
internal val FORBIDDEN_PATTERNS_REGEX = listOf(
";", // Separator for SQL statements
"--", // Single-line comments
"/\\*", // Start of multi-line comments
"\\*/", // End of multi-line comments
"\\bDROP\\b", // DROP as a full word
"\\bDELETE\\b", // DELETE as a full word
"\\bINSERT\\b", // INSERT as a full word
"\\bUPDATE\\b", // UPDATE as a full word
"\\bEXEC\\b", // EXEC as a full word
"\\bEXECUTE\\b", // EXECUTE as a full word
"\\bCREATE\\b", // CREATE as a full word
"\\bALTER\\b", // ALTER as a full word
"\\bGRANT\\b", // GRANT as a full word
"\\bREVOKE\\b", // REVOKE as a full word
"\\bMERGE\\b", // MERGE as a full word
).map { Regex(it, RegexOption.IGNORE_CASE) }
/**
* Checks if a given string contains forbidden patterns or keywords.
* Logs a clear and friendly message if any forbidden pattern is found.
*
* ### Forbidden SQL Examples:
* 1. **Single-line comment** (using `--`):
* - `SELECT * FROM Sale WHERE amount = 100.0 -- AND id = 5`
*
* 2. **Multi-line comment** (using `/* */`):
* - `SELECT * FROM Customer /* Possible malicious comment */ WHERE id = 1`
*
* 3. **Multiple statements separated by semicolon (`;`)**:
* - `SELECT * FROM Sale WHERE amount = 500.0; DROP TABLE Customer`
*
* 4. **Potentially malicious SQL with single quotes for injection**:
* - `SELECT * FROM Sale WHERE id = 1 AND amount = 100.0 OR '1'='1`
*
* 5. **Usage of dangerous commands like `DROP`, `DELETE`, `ALTER`, etc.**:
* - `DROP TABLE Customer; SELECT * FROM Sale`
*
* ### Allowed SQL Examples:
* 1. Query with names containing reserved words as parts of identifiers:
* - `SELECT last_update FROM HELLO_ALTER`
*
* 2. Query with fully valid syntax:
* - `SELECT id, name FROM Customers WHERE age > 25`
*
* 3. Query with identifiers resembling commands but not in forbidden contexts:
* - `SELECT id, amount FROM TRANSACTION_DROP`
*
* 4. Query with case-insensitive identifiers:
* - `select Id, Name from Hello_Table`
*
* ### Key Notes:
* - Reserved keywords like `DROP`, `DELETE`, `ALTER`, etc., are forbidden **only when they appear as standalone commands**.
* - Reserved words as parts of table or column names (e.g., `HELLO_ALTER`, `myDropTable`) **are allowed**.
* - Inline or multi-line comments (`--` or `/* */`) are restricted to prevent potential SQL injection attacks.
* - Multiple SQL statements separated by semicolons (`;`) are not allowed to prevent the execution of unintended commands.
*/
internal fun hasForbiddenPatterns(input: String): Boolean {
for (regex in FORBIDDEN_PATTERNS_REGEX) {
if (regex.containsMatchIn(input)) {
logger.error {
"Validation failed: The input contains a forbidden element matching '${regex.pattern}'. Please review the input: '$input'."
}
return true
}
}
return false
}
/**
* Allowed list of SQL operators
*/
internal val ALLOWED_SQL_OPERATORS = listOf("SELECT", "WITH", "VALUES", "TABLE")
/**
* Validates if the SQL query is safe and starts with SELECT.
* Ensures a proper syntax structure, checks for balanced quotes, and disallows dangerous commands or patterns.
*/
internal fun isValidSqlQuery(sqlQuery: String): Boolean {
val normalizedSqlQuery = sqlQuery.trim().uppercase()
// Log the query being validated
logger.debug { "Validating SQL query: '$sqlQuery'" }
// Ensure the query starts from one of the allowed SQL operators
if (ALLOWED_SQL_OPERATORS.none { normalizedSqlQuery.startsWith(it) }) {
logger.error {
"Validation failed: The SQL query must start with one of: $ALLOWED_SQL_OPERATORS. Given query: '$sqlQuery'."
}
return false
}
// Validate against forbidden patterns
if (hasForbiddenPatterns(normalizedSqlQuery)) {
return false
}
// Check if there are balanced quotes (single and double)
val singleQuotes = sqlQuery.count { it == '\'' }
val doubleQuotes = sqlQuery.count { it == '"' }
if (singleQuotes % 2 != 0) {
logger.error {
"Validation failed: Unbalanced single quotes in the SQL query. " +
"Please correct the query: '$sqlQuery'."
}
return false
}
if (doubleQuotes % 2 != 0) {
logger.error {
"Validation failed: Unbalanced double quotes in the SQL query. " +
"Please correct the query: '$sqlQuery'."
}
return false
}
logger.debug { "SQL query validation succeeded for query: '$sqlQuery'." }
return true
}
/**
* Validates if the given SQL table name is safe and logs any validation violations.
*/
internal fun isValidTableName(tableName: String): Boolean {
val normalizedTableName = tableName.trim().uppercase()
// Log the table name being validated
logger.debug { "Validating SQL table name: '$tableName'" }
// Validate against forbidden patterns
if (hasForbiddenPatterns(normalizedTableName)) {
return false
}
// Validate the table name structure: letters, numbers, underscores, and dots are allowed
val tableNameRegex = Regex(TABLE_NAME_VALID_PATTERN)
if (!tableNameRegex.matches(normalizedTableName)) {
logger.error {
"Validation failed: The table name contains invalid characters. " +
"Only letters, numbers, underscores, and dots are allowed. Provided name: '$tableName'."
}
return false
}
logger.debug { "Table name validation passed for table: '$tableName'." }
return true
}
@@ -0,0 +1 @@
org.jetbrains.kotlinx.dataframe.io.Jdbc
@@ -0,0 +1,160 @@
package org.jetbrains.kotlinx.dataframe.io
import io.kotest.assertions.withClue
import io.kotest.matchers.shouldBe
import org.intellij.lang.annotations.Language
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.inferType
import org.jetbrains.kotlinx.dataframe.api.schema
import org.jetbrains.kotlinx.dataframe.io.db.MsSql
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import java.sql.Connection
import java.sql.ResultSet
import kotlin.reflect.typeOf
private const val TEST_TABLE_NAME = "testtable123"
internal fun inferNullability(connection: Connection) {
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS $TEST_TABLE_NAME") }
// prepare tables and data
@Language("SQL")
val createTestTable1Query = """
CREATE TABLE $TEST_TABLE_NAME (
id INT PRIMARY KEY,
name VARCHAR(50),
surname VARCHAR(50),
age INT NOT NULL
)
"""
connection.createStatement().use { st -> st.execute(createTestTable1Query) }
connection.createStatement()
.execute("INSERT INTO $TEST_TABLE_NAME (id, name, surname, age) VALUES (1, 'John', 'Crawford', 40)")
connection.createStatement()
.execute("INSERT INTO $TEST_TABLE_NAME (id, name, surname, age) VALUES (2, 'Alice', 'Smith', 25)")
connection.createStatement()
.execute("INSERT INTO $TEST_TABLE_NAME (id, name, surname, age) VALUES (3, 'Bob', 'Johnson', 47)")
connection.createStatement()
.execute("INSERT INTO $TEST_TABLE_NAME (id, name, surname, age) VALUES (4, 'Sam', NULL, 15)")
// start testing `readSqlTable` method
// with default inferNullability: Boolean = true
val df = DataFrame.readSqlTable(connection, TEST_TABLE_NAME)
df.schema().columns["id"]!!.type shouldBe typeOf<Int>()
df.schema().columns["name"]!!.type shouldBe typeOf<String>()
df.schema().columns["surname"]!!.type shouldBe typeOf<String?>()
df.schema().columns["age"]!!.type shouldBe typeOf<Int>()
val dataSchema = DataFrameSchema.readSqlTable(connection, TEST_TABLE_NAME)
dataSchema.columns.size shouldBe 4
dataSchema.columns["id"]!!.type shouldBe typeOf<Int>()
dataSchema.columns["name"]!!.type shouldBe typeOf<String?>()
dataSchema.columns["surname"]!!.type shouldBe typeOf<String?>()
dataSchema.columns["age"]!!.type shouldBe typeOf<Int>()
// with inferNullability: Boolean = false
val df1 = DataFrame.readSqlTable(connection, TEST_TABLE_NAME, inferNullability = false)
df1.schema().columns["id"]!!.type shouldBe typeOf<Int>()
// this column changed a type because it doesn't contain nulls
df1.schema().columns["name"]!!.type shouldBe typeOf<String?>()
df1.schema().columns["surname"]!!.type shouldBe typeOf<String?>()
df1.schema().columns["age"]!!.type shouldBe typeOf<Int>()
// end testing `readSqlTable` method
// start testing `readSQLQuery` method
// ith default inferNullability: Boolean = true
@Language("SQL")
val sqlQuery =
"""
SELECT name, surname, age FROM $TEST_TABLE_NAME
""".trimIndent()
val df2 = DataFrame.readSqlQuery(connection, sqlQuery)
df2.schema().columns["name"]!!.type shouldBe typeOf<String>()
df2.schema().columns["surname"]!!.type shouldBe typeOf<String?>()
df2.schema().columns["age"]!!.type shouldBe typeOf<Int>()
val dataSchema2 = DataFrameSchema.readSqlQuery(connection, sqlQuery)
dataSchema2.columns.size shouldBe 3
dataSchema2.columns["name"]!!.type shouldBe typeOf<String?>()
dataSchema2.columns["surname"]!!.type shouldBe typeOf<String?>()
dataSchema2.columns["age"]!!.type shouldBe typeOf<Int>()
// with inferNullability: Boolean = false
val df3 = DataFrame.readSqlQuery(connection, sqlQuery, inferNullability = false)
// this column changed a type because it doesn't contain nulls
df3.schema().columns["name"]!!.type shouldBe typeOf<String?>()
df3.schema().columns["surname"]!!.type shouldBe typeOf<String?>()
df3.schema().columns["age"]!!.type shouldBe typeOf<Int>()
// end testing `readSQLQuery` method
// start testing `readResultSet` method
connection.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE, ResultSet.CONCUR_UPDATABLE).use { st ->
@Language("SQL")
val selectStatement = "SELECT * FROM $TEST_TABLE_NAME"
st.executeQuery(selectStatement).use { rs ->
// ith default inferNullability: Boolean = true
val df4 = DataFrame.readResultSet(rs, MsSql)
df4.schema().columns["id"]!!.type shouldBe typeOf<Int>()
df4.schema().columns["name"]!!.type shouldBe typeOf<String>()
df4.schema().columns["surname"]!!.type shouldBe typeOf<String?>()
df4.schema().columns["age"]!!.type shouldBe typeOf<Int>()
rs.beforeFirst()
val dataSchema3 = DataFrameSchema.readResultSet(rs, MsSql)
dataSchema3.columns.size shouldBe 4
dataSchema3.columns["id"]!!.type shouldBe typeOf<Int>()
dataSchema3.columns["name"]!!.type shouldBe typeOf<String?>()
dataSchema3.columns["surname"]!!.type shouldBe typeOf<String?>()
dataSchema3.columns["age"]!!.type shouldBe typeOf<Int>()
// with inferNullability: Boolean = false
rs.beforeFirst()
val df5 = DataFrame.readResultSet(rs, MsSql, inferNullability = false)
df5.schema().columns["id"]!!.type shouldBe typeOf<Int>()
// this column changed a type because it doesn't contain nulls
df5.schema().columns["name"]!!.type shouldBe typeOf<String?>()
df5.schema().columns["surname"]!!.type shouldBe typeOf<String?>()
df5.schema().columns["age"]!!.type shouldBe typeOf<Int>()
}
}
// end testing `readResultSet` method
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS $TEST_TABLE_NAME") }
}
/**
* Helper to check whether the provided schema matches the inferred schema.
*
* It must hold that all types in the provided schema are equal or super to
* the corresponding types in the inferred schema.
*/
@Suppress("INVISIBLE_REFERENCE")
fun AnyFrame.assertInferredTypesMatchSchema() {
withClue({
"""
|Inferred schema must be <: Provided schema
|
|Inferred Schema:
|${inferType().schema().toString().lines().joinToString("\n|")}
|
|Provided Schema:
|${schema().toString().lines().joinToString("\n|")}
""".trimMargin()
}) {
schema().compare(inferType().schema()).isSuperOrMatches() shouldBe true
}
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,425 @@
package org.jetbrains.kotlinx.dataframe.io.h2
import io.kotest.matchers.shouldBe
import org.intellij.lang.annotations.Language
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.api.add
import org.jetbrains.kotlinx.dataframe.api.cast
import org.jetbrains.kotlinx.dataframe.api.filter
import org.jetbrains.kotlinx.dataframe.api.select
import org.jetbrains.kotlinx.dataframe.io.inferNullability
import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
import org.jetbrains.kotlinx.dataframe.io.readSqlQuery
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import org.junit.AfterClass
import org.junit.BeforeClass
import org.junit.Test
import java.math.BigDecimal
import java.sql.Blob
import java.sql.Connection
import java.sql.DriverManager
import java.sql.SQLException
import java.util.Date
import kotlin.reflect.typeOf
private const val URL = "jdbc:h2:mem:test1;DB_CLOSE_DELAY=-1;MODE=MariaDB;DATABASE_TO_LOWER=TRUE"
@DataSchema
interface Table1MariaDb {
val id: Int
val bitCol: Boolean
val tinyintcol: Int
val smallintcol: Short?
val mediumintcol: Int
val mediumintunsignedcol: Int
val integercol: Int
val intCol: Int
val integerunsignedcol: Long
val bigintcol: Long
val floatcol: Float
val doublecol: Double
val decimalcol: BigDecimal
val dateCol: String
val datetimeCol: String
val timestampCol: String
val timeCol: String
val yearCol: String
val varcharCol: String
val charCol: String
val binaryCol: ByteArray
val varbinaryCol: ByteArray
val tinyblobCol: ByteArray
val blobCol: ByteArray
val mediumblobCol: ByteArray
val longblobCol: ByteArray
val textCol: String
val mediumtextCol: String
val longtextCol: String
val enumCol: String
val jsonCol: String
}
@DataSchema
interface Table2MariaDb {
val id: Int
val bitCol: Boolean?
val tinyintCol: Int?
val smallintCol: Int?
val mediumintCol: Int?
val mediumintUnsignedCol: Int?
val integercol: Int?
val intCol: Int?
val integerUnsignedCol: Long?
val bigintCol: Long?
val floatCol: Float?
val doubleCol: Double?
val decimalCol: Double?
val dateCol: String?
val datetimeCol: String?
val timestampCol: String?
val timeCol: String?
val yearCol: String?
val varcharCol: String?
val charCol: String?
val binaryCol: ByteArray?
val varbinaryCol: ByteArray?
val tinyblobCol: ByteArray?
val blobCol: ByteArray?
val mediumblobCol: ByteArray?
val longblobCol: ByteArray?
val textCol: String?
val mediumtextCol: String?
val longtextCol: String?
val enumCol: String?
val jsonCol: String?
}
@DataSchema
interface Table3MariaDb {
val id: Int
val enumCol: String
val setCol: Char?
}
private const val JSON_STRING =
"{\"details\": {\"foodType\": \"Pizza\", \"menu\": \"https://www.loumalnatis.com/our-menu\"}, \n" +
" \t\"favorites\": [{\"description\": \"Pepperoni deep dish\", \"price\": 18.75}, \n" +
"{\"description\": \"The Lou\", \"price\": 24.75}]}"
class MariadbH2Test {
companion object {
private lateinit var connection: Connection
@BeforeClass
@JvmStatic
fun setUpClass() {
connection = DriverManager.getConnection(URL)
@Language("SQL")
val createTableQuery = """
CREATE TABLE IF NOT EXISTS table1 (
id INT AUTO_INCREMENT PRIMARY KEY,
bitCol BIT NOT NULL,
tinyintCol TINYINT NOT NULL,
smallintCol SMALLINT,
mediumintCol MEDIUMINT NOT NULL,
mediumintUnsignedCol MEDIUMINT UNSIGNED NOT NULL,
integerCol INTEGER NOT NULL,
intCol INT NOT NULL,
integerUnsignedCol INTEGER UNSIGNED NOT NULL,
bigintCol BIGINT NOT NULL,
floatCol FLOAT NOT NULL,
doubleCol DOUBLE NOT NULL,
decimalCol DECIMAL NOT NULL,
dateCol DATE NOT NULL,
datetimeCol DATETIME NOT NULL,
timestampCol TIMESTAMP NOT NULL,
timeCol TIME NOT NULL,
yearCol YEAR NOT NULL,
varcharCol VARCHAR(255) NOT NULL,
charCol CHAR(10) NOT NULL,
binaryCol BINARY(64) NOT NULL,
varbinaryCol VARBINARY(128) NOT NULL,
tinyblobCol TINYBLOB NOT NULL,
blobCol BLOB NOT NULL,
mediumblobCol MEDIUMBLOB NOT NULL ,
longblobCol LONGBLOB NOT NULL,
textCol TEXT NOT NULL,
mediumtextCol MEDIUMTEXT NOT NULL,
longtextCol LONGTEXT NOT NULL,
enumCol ENUM('Value1', 'Value2', 'Value3') NOT NULL,
jsonCol JSON NOT NULL
)
"""
connection.createStatement().execute(createTableQuery.trimIndent())
@Language("SQL")
val createTableQuery2 = """
CREATE TABLE IF NOT EXISTS table2 (
id INT AUTO_INCREMENT PRIMARY KEY,
bitCol BIT,
tinyintCol TINYINT,
smallintCol SMALLINT,
mediumintCol MEDIUMINT,
mediumintUnsignedCol MEDIUMINT UNSIGNED,
integerCol INTEGER,
intCol INT,
integerUnsignedCol INTEGER UNSIGNED,
bigintCol BIGINT,
floatCol FLOAT,
doubleCol DOUBLE,
decimalCol DECIMAL,
dateCol DATE,
datetimeCol DATETIME,
timestampCol TIMESTAMP,
timeCol TIME,
yearCol YEAR,
varcharCol VARCHAR(255),
charCol CHAR(10),
binaryCol BINARY(64),
varbinaryCol VARBINARY(128),
tinyblobCol TINYBLOB,
blobCol BLOB,
mediumblobCol MEDIUMBLOB,
longblobCol LONGBLOB,
textCol TEXT,
mediumtextCol MEDIUMTEXT,
longtextCol LONGTEXT,
enumCol ENUM('Value1', 'Value2', 'Value3')
)
"""
connection.createStatement().execute(createTableQuery2.trimIndent())
@Language("SQL")
val insertData1 =
"""
INSERT INTO table1 (
bitCol, tinyintCol, smallintCol, mediumintCol, mediumintUnsignedCol, integerCol, intCol,
integerUnsignedCol, bigintCol, floatCol, doubleCol, decimalCol, dateCol, datetimeCol, timestampCol,
timeCol, yearCol, varcharCol, charCol, binaryCol, varbinaryCol, tinyblobCol, blobCol,
mediumblobCol, longblobCol, textCol, mediumtextCol, longtextCol, enumCol, jsonCol
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""".trimIndent()
@Language("SQL")
val insertData2 =
"""
INSERT INTO table2 (
bitCol, tinyintCol, smallintCol, mediumintCol, mediumintUnsignedCol, integerCol, intCol,
integerUnsignedCol, bigintCol, floatCol, doubleCol, decimalCol, dateCol, datetimeCol, timestampCol,
timeCol, yearCol, varcharCol, charCol, binaryCol, varbinaryCol, tinyblobCol, blobCol,
mediumblobCol, longblobCol, textCol, mediumtextCol, longtextCol, enumCol
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""".trimIndent()
connection.prepareStatement(insertData1).use { st ->
// Insert data into table1
for (i in 1..3) {
st.setBoolean(1, true)
st.setByte(2, i.toByte())
st.setShort(3, (i * 10).toShort())
st.setInt(4, i * 100)
st.setInt(5, i * 100)
st.setInt(6, i * 100)
st.setInt(7, i * 100)
st.setInt(8, i * 100)
st.setInt(9, i * 100)
st.setFloat(10, i * 10.0f)
st.setDouble(11, i * 10.0)
st.setBigDecimal(12, BigDecimal(i * 10))
st.setDate(13, java.sql.Date(System.currentTimeMillis()))
st.setTimestamp(14, java.sql.Timestamp(System.currentTimeMillis()))
st.setTimestamp(15, java.sql.Timestamp(System.currentTimeMillis()))
st.setTime(16, java.sql.Time(System.currentTimeMillis()))
st.setInt(17, 2023)
st.setString(18, "varcharValue$i")
st.setString(19, "charValue$i")
st.setBytes(20, "binaryValue".toByteArray())
st.setBytes(21, "varbinaryValue".toByteArray())
st.setBytes(22, "tinyblobValue".toByteArray())
st.setBytes(23, "blobValue".toByteArray())
st.setBytes(24, "mediumblobValue".toByteArray())
st.setBytes(25, "longblobValue".toByteArray())
st.setString(26, "textValue$i")
st.setString(27, "mediumtextValue$i")
st.setString(28, "longtextValue$i")
st.setString(29, "Value$i")
st.setString(30, JSON_STRING)
st.executeUpdate()
}
}
connection.prepareStatement(insertData2).use { st ->
// Insert data into table2
for (i in 1..3) {
st.setBoolean(1, false)
st.setByte(2, (i * 2).toByte())
st.setShort(3, (i * 20).toShort())
st.setInt(4, i * 200)
st.setInt(5, i * 200)
st.setInt(6, i * 200)
st.setInt(7, i * 200)
st.setInt(8, i * 200)
st.setInt(9, i * 200)
st.setFloat(10, i * 20.0f)
st.setDouble(11, i * 20.0)
st.setBigDecimal(12, BigDecimal(i * 20))
st.setDate(13, java.sql.Date(System.currentTimeMillis()))
st.setTimestamp(14, java.sql.Timestamp(System.currentTimeMillis()))
st.setTimestamp(15, java.sql.Timestamp(System.currentTimeMillis()))
st.setTime(16, java.sql.Time(System.currentTimeMillis()))
st.setInt(17, 2023)
st.setString(18, "varcharValue$i")
st.setString(19, "charValue$i")
st.setBytes(20, "binaryValue".toByteArray())
st.setBytes(21, "varbinaryValue".toByteArray())
st.setBytes(22, "tinyblobValue".toByteArray())
st.setBytes(23, "blobValue".toByteArray())
st.setBytes(24, "mediumblobValue".toByteArray())
st.setBytes(25, "longblobValue".toByteArray())
st.setString(26, null)
st.setString(27, null)
st.setString(28, "longtextValue$i")
st.setString(29, "Value$i")
st.executeUpdate()
}
}
}
@AfterClass
@JvmStatic
fun tearDownClass() {
try {
connection.close()
} catch (e: SQLException) {
e.printStackTrace()
}
}
}
@Test
fun `basic test for reading sql tables`() {
val df1 = DataFrame.readSqlTable(connection, "table1").cast<Table1MariaDb>()
val result = df1.filter { it[Table1MariaDb::id] == 1 }
result[0][26] shouldBe "textValue1"
val byteArray = "tinyblobValue".toByteArray()
(result[0][22] as Blob).getBytes(1, byteArray.size) contentEquals byteArray
val schema = DataFrameSchema.readSqlTable(connection, "table1")
schema.columns["id"]!!.type shouldBe typeOf<Int>()
schema.columns["textcol"]!!.type shouldBe typeOf<String>()
schema.columns["varbinarycol"]!!.type shouldBe typeOf<ByteArray>()
schema.columns["binarycol"]!!.type shouldBe typeOf<ByteArray>()
schema.columns["longblobcol"]!!.type shouldBe typeOf<Blob>()
schema.columns["tinyblobcol"]!!.type shouldBe typeOf<Blob>()
schema.columns["datecol"]!!.type shouldBe typeOf<Date>()
schema.columns["datetimecol"]!!.type shouldBe typeOf<java.sql.Timestamp>()
schema.columns["timestampcol"]!!.type shouldBe typeOf<java.sql.Timestamp>()
schema.columns["timecol"]!!.type shouldBe typeOf<java.sql.Time>()
schema.columns["yearcol"]!!.type shouldBe typeOf<Int>()
val df2 = DataFrame.readSqlTable(connection, "table2").cast<Table2MariaDb>()
val result2 = df2.filter { it[Table2MariaDb::id] == 1 }
result2[0][26] shouldBe null
val schema2 = DataFrameSchema.readSqlTable(connection, "table2")
schema2.columns["id"]!!.type shouldBe typeOf<Int>()
schema2.columns["textcol"]!!.type shouldBe typeOf<String?>()
}
@Test
fun `read from sql query`() {
@Language("SQL")
val sqlQuery =
"""
SELECT
t1.id,
t1.enumCol
FROM table1 t1
JOIN table2 t2 ON t1.id = t2.id
""".trimIndent()
val df = DataFrame.readSqlQuery(connection, sqlQuery = sqlQuery).cast<Table3MariaDb>()
val result = df.filter { it[Table3MariaDb::id] == 1 }
result[0][1] shouldBe "Value1"
val schema = DataFrameSchema.readSqlQuery(connection, sqlQuery = sqlQuery)
schema.columns["id"]!!.type shouldBe typeOf<Int>()
schema.columns["enumcol"]!!.type shouldBe typeOf<Any>()
}
@Test
fun `read from all tables`() {
val dataframes = DataFrame.readAllSqlTables(connection, limit = 1000).values.toList()
val table1Df = dataframes[0].cast<Table1MariaDb>()
table1Df.rowsCount() shouldBe 3
table1Df.filter { it[Table1MariaDb::integercol] > 100 }.rowsCount() shouldBe 2
table1Df[0][11] shouldBe 10.0
table1Df[0][26] shouldBe "textValue1"
val table2Df = dataframes[1].cast<Table2MariaDb>()
table2Df.rowsCount() shouldBe 3
table2Df.filter {
it[Table2MariaDb::integercol] != null && it[Table2MariaDb::integercol]!! > 400
}.rowsCount() shouldBe 1
table2Df[0][11] shouldBe 20.0
table2Df[0][26] shouldBe null
}
@Test
fun `reading numeric types`() {
val df1 = DataFrame.readSqlTable(connection, "table1").cast<Table1MariaDb>()
val result = df1.select("tinyintcol")
.add("tinyintcol2") { it[Table1MariaDb::tinyintcol] }
result[0][1] shouldBe 1
val result2 = df1.select("mediumintcol")
.add("mediumintcol2") { it[Table1MariaDb::mediumintcol] }
result2[0][1] shouldBe 100
val result3 = df1.select("mediumintunsignedcol")
.add("mediumintunsignedcol2") { it[Table1MariaDb::mediumintunsignedcol] }
result3[0][1] shouldBe 100
val result5 = df1.select("bigintcol")
.add("bigintcol2") { it[Table1MariaDb::bigintcol] }
result5[0][1] shouldBe 100
val result7 = df1.select("doublecol")
.add("doublecol2") { it[Table1MariaDb::doublecol] }
result7[0][1] shouldBe 10.0
val result8 = df1.select("decimalcol")
.add("decimalcol2") { it[Table1MariaDb::decimalcol] }
result8[0][1] shouldBe BigDecimal("10")
val schema = DataFrameSchema.readSqlTable(connection, "table1")
schema.columns["tinyintcol"]!!.type shouldBe typeOf<Int>()
schema.columns["smallintcol"]!!.type shouldBe typeOf<Int?>()
schema.columns["mediumintcol"]!!.type shouldBe typeOf<Int>()
schema.columns["mediumintunsignedcol"]!!.type shouldBe typeOf<Int>()
schema.columns["bigintcol"]!!.type shouldBe typeOf<Long>()
schema.columns["floatcol"]!!.type shouldBe typeOf<Double>()
schema.columns["doublecol"]!!.type shouldBe typeOf<Double>()
schema.columns["decimalcol"]!!.type shouldBe typeOf<BigDecimal>()
}
@Test
fun `infer nullability`() {
inferNullability(connection)
}
}
@@ -0,0 +1,250 @@
package org.jetbrains.kotlinx.dataframe.io.h2
import io.kotest.matchers.shouldBe
import org.intellij.lang.annotations.Language
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.api.cast
import org.jetbrains.kotlinx.dataframe.api.filter
import org.jetbrains.kotlinx.dataframe.io.inferNullability
import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
import org.jetbrains.kotlinx.dataframe.io.readSqlQuery
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import org.junit.AfterClass
import org.junit.BeforeClass
import org.junit.Test
import java.math.BigDecimal
import java.sql.Connection
import java.sql.DriverManager
import java.sql.SQLException
import java.util.Date
import java.util.UUID
import kotlin.reflect.typeOf
private const val URL =
"jdbc:h2:mem:testmssql;DB_CLOSE_DELAY=-1;MODE=MSSQLServer;DATABASE_TO_UPPER=FALSE;CASE_INSENSITIVE_IDENTIFIERS=TRUE"
@DataSchema
interface Table1MSSSQL {
val id: Int
val bigintColumn: Long
val binaryColumn: ByteArray
val bitColumn: Boolean
val charColumn: Char
val dateColumn: Date
val datetime3Column: java.sql.Timestamp
val datetime2Column: java.sql.Timestamp
val decimalColumn: BigDecimal
val floatColumn: Double
val imageColumn: ByteArray?
val intColumn: Int
val moneyColumn: BigDecimal
val ncharColumn: Char
val ntextColumn: String
val numericColumn: BigDecimal
val nvarcharColumn: String
val nvarcharMaxColumn: String
val realColumn: Float
val smalldatetimeColumn: java.sql.Timestamp
val smallintColumn: Int
val smallmoneyColumn: BigDecimal
val timeColumn: java.sql.Time
val timestampColumn: java.sql.Timestamp
val tinyintColumn: Int
val uniqueidentifierColumn: Char
val varbinaryColumn: ByteArray
val varbinaryMaxColumn: ByteArray
val varcharColumn: String
val varcharMaxColumn: String
val geometryColumn: String
val geographyColumn: String
}
class MSSQLH2Test {
companion object {
private lateinit var connection: Connection
@BeforeClass
@JvmStatic
fun setUpClass() {
connection = DriverManager.getConnection(URL)
@Language("SQL")
val createTableQuery = """
CREATE TABLE Table1 (
id INT NOT NULL IDENTITY PRIMARY KEY,
bigintColumn BIGINT,
binaryColumn BINARY(50),
bitColumn BIT,
charColumn CHAR(10),
dateColumn DATE,
datetime3Column DATETIME2(3),
datetime2Column DATETIME2,
decimalColumn DECIMAL(10,2),
floatColumn FLOAT,
imageColumn IMAGE,
intColumn INT,
moneyColumn MONEY,
ncharColumn NCHAR(10),
ntextColumn NTEXT,
numericColumn NUMERIC(10,2),
nvarcharColumn NVARCHAR(50),
nvarcharMaxColumn NVARCHAR(MAX),
realColumn REAL,
smalldatetimeColumn SMALLDATETIME,
smallintColumn SMALLINT,
smallmoneyColumn SMALLMONEY,
textColumn TEXT,
timeColumn TIME,
timestampColumn DATETIME2,
tinyintColumn TINYINT,
uniqueidentifierColumn UNIQUEIDENTIFIER,
varbinaryColumn VARBINARY(50),
varbinaryMaxColumn VARBINARY(MAX),
varcharColumn VARCHAR(50),
varcharMaxColumn VARCHAR(MAX)
);
"""
connection.createStatement().execute(createTableQuery.trimIndent())
@Language("SQL")
val insertData1 =
"""
INSERT INTO Table1 (
bigintColumn, binaryColumn, bitColumn, charColumn, dateColumn, datetime3Column, datetime2Column,
decimalColumn, floatColumn, imageColumn, intColumn, moneyColumn, ncharColumn,
ntextColumn, numericColumn, nvarcharColumn, nvarcharMaxColumn, realColumn, smalldatetimeColumn,
smallintColumn, smallmoneyColumn, textColumn, timeColumn, timestampColumn, tinyintColumn,
uniqueidentifierColumn, varbinaryColumn, varbinaryMaxColumn, varcharColumn, varcharMaxColumn
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""".trimIndent()
connection.prepareStatement(insertData1).use { st ->
for (i in 1..5) {
st.setLong(1, 123456789012345L) // bigintColumn
st.setBytes(2, byteArrayOf(0x01, 0x23, 0x45, 0x67, 0x67, 0x67, 0x67, 0x67)) // binaryColumn
st.setBoolean(3, true) // bitColumn
st.setString(4, "Sample") // charColumn
st.setDate(5, java.sql.Date(System.currentTimeMillis())) // dateColumn
st.setTimestamp(6, java.sql.Timestamp(System.currentTimeMillis())) // datetime3Column
st.setTimestamp(7, java.sql.Timestamp(System.currentTimeMillis())) // datetime2Column
st.setBigDecimal(8, BigDecimal("12345.67")) // decimalColumn
st.setFloat(9, 123.45f) // floatColumn
st.setNull(10, java.sql.Types.NULL) // imageColumn (assuming nullable)
st.setInt(11, 123456) // intColumn
st.setBigDecimal(12, BigDecimal("123.45")) // moneyColumn
st.setString(13, "Sample") // ncharColumn
st.setString(14, "Sample$i text") // ntextColumn
st.setBigDecimal(15, BigDecimal("1234.56")) // numericColumn
st.setString(16, "Sample") // nvarcharColumn
st.setString(17, "Sample$i text") // nvarcharMaxColumn
st.setFloat(18, 123.45f) // realColumn
st.setTimestamp(19, java.sql.Timestamp(System.currentTimeMillis())) // smalldatetimeColumn
st.setInt(20, 123) // smallintColumn
st.setBigDecimal(21, BigDecimal("123.45")) // smallmoneyColumn
st.setString(22, "Sample$i text") // textColumn
st.setTime(23, java.sql.Time(System.currentTimeMillis())) // timeColumn
st.setTimestamp(24, java.sql.Timestamp(System.currentTimeMillis())) // timestampColumn
st.setInt(25, 123) // tinyintColumn
// st.setObject(27, null) // udtColumn (assuming nullable)
st.setObject(26, UUID.randomUUID()) // uniqueidentifierColumn
st.setBytes(27, byteArrayOf(0x01, 0x23, 0x45, 0x67, 0x67, 0x67, 0x67, 0x67)) // varbinaryColumn
st.setBytes(28, byteArrayOf(0x01, 0x23, 0x45, 0x67, 0x67, 0x67, 0x67, 0x67)) // varbinaryMaxColumn
st.setString(29, "Sample$i") // varcharColumn
st.setString(30, "Sample$i text") // varcharMaxColumn
st.executeUpdate()
}
}
}
@AfterClass
@JvmStatic
fun tearDownClass() {
try {
connection.close()
} catch (e: SQLException) {
e.printStackTrace()
}
}
}
@Test
fun `basic test for reading sql tables`() {
val df1 = DataFrame.readSqlTable(connection, "table1", limit = 5).cast<Table1MSSSQL>()
val result = df1.filter { it[Table1MSSSQL::id] == 1 }
result[0][30] shouldBe "Sample1 text"
result[0][Table1MSSSQL::bigintColumn] shouldBe 123456789012345L
result[0][Table1MSSSQL::bitColumn] shouldBe true
result[0][Table1MSSSQL::intColumn] shouldBe 123456
result[0][Table1MSSSQL::ntextColumn] shouldBe "Sample1 text"
val schema = DataFrameSchema.readSqlTable(connection, "table1")
schema.columns["id"]!!.type shouldBe typeOf<Int>()
schema.columns["bigintColumn"]!!.type shouldBe typeOf<Long?>()
schema.columns["binaryColumn"]!!.type shouldBe typeOf<ByteArray?>()
schema.columns["bitColumn"]!!.type shouldBe typeOf<Boolean?>()
schema.columns["charColumn"]!!.type shouldBe typeOf<String?>()
schema.columns["dateColumn"]!!.type shouldBe typeOf<Date?>()
schema.columns["datetime3Column"]!!.type shouldBe typeOf<java.sql.Timestamp?>()
schema.columns["datetime2Column"]!!.type shouldBe typeOf<java.sql.Timestamp?>()
schema.columns["decimalColumn"]!!.type shouldBe typeOf<BigDecimal?>()
schema.columns["floatColumn"]!!.type shouldBe typeOf<Double?>()
schema.columns["intColumn"]!!.type shouldBe typeOf<Int?>()
schema.columns["moneyColumn"]!!.type shouldBe typeOf<BigDecimal?>()
schema.columns["ncharColumn"]!!.type shouldBe typeOf<String?>()
schema.columns["ntextColumn"]!!.type shouldBe typeOf<String?>()
schema.columns["numericColumn"]!!.type shouldBe typeOf<BigDecimal?>()
schema.columns["nvarcharColumn"]!!.type shouldBe typeOf<String?>()
schema.columns["nvarcharMaxColumn"]!!.type shouldBe typeOf<String?>()
schema.columns["realColumn"]!!.type shouldBe typeOf<Float?>()
schema.columns["smalldatetimeColumn"]!!.type shouldBe typeOf<java.sql.Timestamp?>()
schema.columns["smallintColumn"]!!.type shouldBe typeOf<Int?>()
schema.columns["smallmoneyColumn"]!!.type shouldBe typeOf<BigDecimal?>()
schema.columns["timeColumn"]!!.type shouldBe typeOf<java.sql.Time?>()
schema.columns["timestampColumn"]!!.type shouldBe typeOf<java.sql.Timestamp?>()
schema.columns["tinyintColumn"]!!.type shouldBe typeOf<Int?>()
schema.columns["varbinaryColumn"]!!.type shouldBe typeOf<ByteArray?>()
schema.columns["varbinaryMaxColumn"]!!.type shouldBe typeOf<ByteArray?>()
schema.columns["varcharColumn"]!!.type shouldBe typeOf<String?>()
schema.columns["varcharMaxColumn"]!!.type shouldBe typeOf<String?>()
}
@Test
fun `read from sql query`() {
@Language("SQL")
val sqlQuery =
"""
SELECT
Table1.id,
Table1.bigintColumn
FROM Table1
""".trimIndent()
val df = DataFrame.readSqlQuery(connection, sqlQuery = sqlQuery, limit = 3).cast<Table1MSSSQL>()
val result = df.filter { it[Table1MSSSQL::id] == 1 }
result[0][Table1MSSSQL::bigintColumn] shouldBe 123456789012345L
val schema = DataFrameSchema.readSqlQuery(connection, sqlQuery = sqlQuery)
schema.columns["id"]!!.type shouldBe typeOf<Int>()
schema.columns["bigintColumn"]!!.type shouldBe typeOf<Long?>()
}
@Test
fun `read from all tables`() {
val dataframes = DataFrame.readAllSqlTables(connection, limit = 4).values.toList()
val table1Df = dataframes[0].cast<Table1MSSSQL>()
table1Df.rowsCount() shouldBe 4
table1Df.filter { it[Table1MSSSQL::id] > 2 }.rowsCount() shouldBe 2
table1Df[0][Table1MSSSQL::bigintColumn] shouldBe 123456789012345L
}
@Test
fun `infer nullability`() {
inferNullability(connection)
}
}
@@ -0,0 +1,425 @@
package org.jetbrains.kotlinx.dataframe.io.h2
import io.kotest.matchers.shouldBe
import org.intellij.lang.annotations.Language
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.api.add
import org.jetbrains.kotlinx.dataframe.api.cast
import org.jetbrains.kotlinx.dataframe.api.filter
import org.jetbrains.kotlinx.dataframe.api.select
import org.jetbrains.kotlinx.dataframe.io.inferNullability
import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
import org.jetbrains.kotlinx.dataframe.io.readSqlQuery
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import org.junit.AfterClass
import org.junit.BeforeClass
import org.junit.Test
import java.math.BigDecimal
import java.sql.Connection
import java.sql.DriverManager
import java.sql.SQLException
import java.util.Date
import kotlin.reflect.typeOf
// NOTE: the names of testing databases should be different to avoid collisions and should not contain the system names itself
private const val URL = "jdbc:h2:mem:test2;DB_CLOSE_DELAY=-1;MODE=MySQL;DATABASE_TO_LOWER=TRUE"
@DataSchema
interface Table1MySql {
val id: Int
val bitCol: Boolean
val tinyintcol: Int
val smallintcol: Int
val mediumintcol: Int
val mediumintunsignedcol: Int
val integercol: Int
val intcol: Int
val integerunsignedcol: Long
val bigintcol: Long
val floatcol: Float
val doublecol: Double
val decimalcol: BigDecimal
val datecol: String
val datetimecol: String
val timestampcol: String
val timecol: String
val yearcol: String
val varcharcol: String
val charcol: String
val binarycol: ByteArray
val varbinarycol: ByteArray
val tinyblobcol: ByteArray
val blobcol: ByteArray
val mediumblobcol: ByteArray
val longblobcol: ByteArray
val textcol: String
val mediumtextcol: String
val longtextcol: String
val enumcol: String
val setcol: Char
}
@DataSchema
interface Table2MySql {
val id: Int
val bitcol: Boolean?
val tinyintcol: Int?
val smallintcol: Int?
val mediumintcol: Int?
val mediumintUnsignedcol: Int?
val integercol: Int?
val intcol: Int?
val integerUnsignedcol: Long?
val bigintcol: Long?
val floatcol: Float?
val doublecol: Double?
val decimalcol: Double?
val datecol: String?
val datetimecol: String?
val timestampcol: String?
val timecol: String?
val yearcol: String?
val varcharcol: String?
val charcol: String?
val binarycol: ByteArray?
val varbinarycol: ByteArray?
val tinyblobcol: ByteArray?
val blobcol: ByteArray?
val mediumblobcol: ByteArray?
val longblobcol: ByteArray?
val textcol: String?
val mediumtextcol: String?
val longtextcol: String?
val enumcol: String?
val setcol: Char?
val jsoncol: String?
}
@DataSchema
interface Table3MySql {
val id: Int
val enumcol: String
}
class MySqlH2Test {
companion object {
private lateinit var connection: Connection
@BeforeClass
@JvmStatic
fun setUpClass() {
connection = DriverManager.getConnection(URL)
@Language("SQL")
val createTableQuery = """
CREATE TABLE IF NOT EXISTS table1 (
id INT AUTO_INCREMENT PRIMARY KEY,
bitCol BIT NOT NULL,
tinyintCol TINYINT NOT NULL,
smallintCol SMALLINT NOT NULL,
mediumintCol MEDIUMINT NOT NULL,
mediumintUnsignedCol MEDIUMINT UNSIGNED NOT NULL,
integerCol INTEGER NOT NULL,
intCol INT NOT NULL,
integerUnsignedCol INTEGER UNSIGNED NOT NULL,
bigintCol BIGINT NOT NULL,
floatCol FLOAT NOT NULL,
doubleCol DOUBLE NOT NULL,
decimalCol DECIMAL NOT NULL,
dateCol DATE NOT NULL,
datetimeCol DATETIME NOT NULL,
timestampCol TIMESTAMP NOT NULL,
timeCol TIME NOT NULL,
yearCol YEAR NOT NULL,
varcharCol VARCHAR(255) NOT NULL,
charCol CHAR(10) NOT NULL,
binaryCol BINARY(64) NOT NULL,
varbinaryCol VARBINARY(128) NOT NULL,
tinyblobCol TINYBLOB NOT NULL,
blobCol BLOB NOT NULL,
mediumblobCol MEDIUMBLOB NOT NULL ,
longblobCol LONGBLOB NOT NULL,
textCol TEXT NOT NULL,
mediumtextCol MEDIUMTEXT NOT NULL,
longtextCol LONGTEXT NOT NULL,
enumCol ENUM('Value1', 'Value2', 'Value3') NOT NULL,
data JSON
)
"""
connection.createStatement().execute(createTableQuery.trimIndent())
@Language("SQL")
val createTableQuery2 = """
CREATE TABLE IF NOT EXISTS table2 (
id INT AUTO_INCREMENT PRIMARY KEY,
bitCol BIT,
tinyintCol TINYINT,
smallintCol SMALLINT,
mediumintCol MEDIUMINT,
mediumintUnsignedCol MEDIUMINT UNSIGNED,
integerCol INTEGER,
intCol INT,
integerUnsignedCol INTEGER UNSIGNED,
bigintCol BIGINT,
floatCol FLOAT,
doubleCol DOUBLE,
decimalCol DECIMAL,
dateCol DATE,
datetimeCol DATETIME,
timestampCol TIMESTAMP,
timeCol TIME,
yearCol YEAR,
varcharCol VARCHAR(255),
charCol CHAR(10),
binaryCol BINARY(64),
varbinaryCol VARBINARY(128),
tinyblobCol TINYBLOB,
blobCol BLOB,
mediumblobCol MEDIUMBLOB,
longblobCol LONGBLOB,
textCol TEXT,
mediumtextCol MEDIUMTEXT,
longtextCol LONGTEXT,
enumCol ENUM('Value1', 'Value2', 'Value3'),
data JSON
)
"""
connection.createStatement().execute(createTableQuery2.trimIndent())
@Language("SQL")
val insertData1 =
"""
INSERT INTO table1 (
bitCol, tinyintCol, smallintCol, mediumintCol, mediumintUnsignedCol, integerCol, intCol,
integerUnsignedCol, bigintCol, floatCol, doubleCol, decimalCol, dateCol, datetimeCol, timestampCol,
timeCol, yearCol, varcharCol, charCol, binaryCol, varbinaryCol, tinyblobCol, blobCol,
mediumblobCol, longblobCol, textCol, mediumtextCol, longtextCol, enumCol, data
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""".trimIndent()
@Language("SQL")
val insertData2 =
"""
INSERT INTO table2 (
bitCol, tinyintCol, smallintCol, mediumintCol, mediumintUnsignedCol, integerCol, intCol,
integerUnsignedCol, bigintCol, floatCol, doubleCol, decimalCol, dateCol, datetimeCol, timestampCol,
timeCol, yearCol, varcharCol, charCol, binaryCol, varbinaryCol, tinyblobCol, blobCol,
mediumblobCol, longblobCol, textCol, mediumtextCol, longtextCol, enumCol, data
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""".trimIndent()
connection.prepareStatement(insertData1).use { st ->
// Insert data into table1
for (i in 1..3) {
st.setBoolean(1, true)
st.setByte(2, i.toByte())
st.setShort(3, (i * 10).toShort())
st.setInt(4, i * 100)
st.setInt(5, i * 100)
st.setInt(6, i * 100)
st.setInt(7, i * 100)
st.setInt(8, i * 100)
st.setInt(9, i * 100)
st.setFloat(10, i * 10.0f)
st.setDouble(11, i * 10.0)
st.setBigDecimal(12, BigDecimal(i * 10))
st.setDate(13, java.sql.Date(System.currentTimeMillis()))
st.setTimestamp(14, java.sql.Timestamp(System.currentTimeMillis()))
st.setTimestamp(15, java.sql.Timestamp(System.currentTimeMillis()))
st.setTime(16, java.sql.Time(System.currentTimeMillis()))
st.setInt(17, 2023)
st.setString(18, "varcharValue$i")
st.setString(19, "charValue$i")
st.setBytes(20, "binaryValue".toByteArray())
st.setBytes(21, "varbinaryValue".toByteArray())
st.setBytes(22, "tinyblobValue".toByteArray())
st.setBytes(23, "blobValue".toByteArray())
st.setBytes(24, "mediumblobValue".toByteArray())
st.setBytes(25, "longblobValue".toByteArray())
st.setString(26, "textValue$i")
st.setString(27, "mediumtextValue$i")
st.setString(28, "longtextValue$i")
st.setString(29, "Value$i")
st.setString(30, "{\"key\": \"value\"}")
st.executeUpdate()
}
}
connection.prepareStatement(insertData2).use { st ->
// Insert data into table2
for (i in 1..3) {
st.setBoolean(1, false)
st.setByte(2, (i * 2).toByte())
st.setShort(3, (i * 20).toShort())
st.setInt(4, i * 200)
st.setInt(5, i * 200)
st.setInt(6, i * 200)
st.setInt(7, i * 200)
st.setInt(8, i * 200)
st.setInt(9, i * 200)
st.setFloat(10, i * 20.0f)
st.setDouble(11, i * 20.0)
st.setBigDecimal(12, BigDecimal(i * 20))
st.setDate(13, java.sql.Date(System.currentTimeMillis()))
st.setTimestamp(14, java.sql.Timestamp(System.currentTimeMillis()))
st.setTimestamp(15, java.sql.Timestamp(System.currentTimeMillis()))
st.setTime(16, java.sql.Time(System.currentTimeMillis()))
st.setInt(17, 2023)
st.setString(18, "varcharValue$i")
st.setString(19, "charValue$i")
st.setBytes(20, "binaryValue".toByteArray())
st.setBytes(21, "varbinaryValue".toByteArray())
st.setBytes(22, "tinyblobValue".toByteArray())
st.setBytes(23, "blobValue".toByteArray())
st.setBytes(24, "mediumblobValue".toByteArray())
st.setBytes(25, "longblobValue".toByteArray())
st.setString(26, null)
st.setString(27, null)
st.setString(28, "longtextValue$i")
st.setString(29, "Value$i")
st.setString(30, "{\"key\": \"value\"}")
st.executeUpdate()
}
}
}
@AfterClass
@JvmStatic
fun tearDownClass() {
try {
connection.close()
} catch (e: SQLException) {
e.printStackTrace()
}
}
}
@Test
fun `basic test for reading sql tables`() {
val df1 = DataFrame.readSqlTable(connection, "table1").cast<Table1MySql>()
val result = df1.filter { it[Table1MySql::id] == 1 }
result[0][26] shouldBe "textValue1"
val schema = DataFrameSchema.readSqlTable(connection, "table1")
schema.columns["id"]!!.type shouldBe typeOf<Int>()
schema.columns["textcol"]!!.type shouldBe typeOf<String>()
schema.columns["datecol"]!!.type shouldBe typeOf<Date>()
schema.columns["datetimecol"]!!.type shouldBe typeOf<java.sql.Timestamp>()
schema.columns["timestampcol"]!!.type shouldBe typeOf<java.sql.Timestamp>()
schema.columns["timecol"]!!.type shouldBe typeOf<java.sql.Time>()
schema.columns["yearcol"]!!.type shouldBe typeOf<Int>()
schema.columns["varbinarycol"]!!.type shouldBe typeOf<ByteArray>()
schema.columns["binarycol"]!!.type shouldBe typeOf<ByteArray>()
schema.columns["longblobcol"]!!.type shouldBe typeOf<java.sql.Blob>()
schema.columns["tinyblobcol"]!!.type shouldBe typeOf<java.sql.Blob>()
val df2 = DataFrame.readSqlTable(connection, "table2").cast<Table2MySql>()
val result2 = df2.filter { it[Table2MySql::id] == 1 }
result2[0][26] shouldBe null
val schema2 = DataFrameSchema.readSqlTable(connection, "table2")
schema2.columns["id"]!!.type shouldBe typeOf<Int>()
schema2.columns["textcol"]!!.type shouldBe typeOf<String?>()
}
@Test
fun `read from sql query`() {
@Language("SQL")
val sqlQuery =
"""
SELECT
t1.id,
t1.enumCol
FROM table1 t1
JOIN table2 t2 ON t1.id = t2.id
""".trimIndent()
val df = DataFrame.readSqlQuery(connection, sqlQuery = sqlQuery).cast<Table3MySql>()
val result = df.filter { it[Table3MySql::id] == 1 }
result[0][1] shouldBe "Value1"
val schema = DataFrameSchema.readSqlQuery(connection, sqlQuery = sqlQuery)
schema.columns["id"]!!.type shouldBe typeOf<Int>()
schema.columns["enumcol"]!!.type shouldBe typeOf<Any>()
}
@Test
fun `read from all tables`() {
val dataframes = DataFrame.readAllSqlTables(connection).values.toList()
val table1Df = dataframes[0].cast<Table1MySql>()
table1Df.rowsCount() shouldBe 3
table1Df.filter { it[Table1MySql::integercol] > 100 }.rowsCount() shouldBe 2
table1Df[0][11] shouldBe 10.0
table1Df[0][26] shouldBe "textValue1"
val table2Df = dataframes[1].cast<Table2MySql>()
table2Df.rowsCount() shouldBe 3
table2Df.filter {
it[Table2MySql::integercol] != null && it[Table2MySql::integercol]!! > 400
}.rowsCount() shouldBe 1
table2Df[0][11] shouldBe 20.0
table2Df[0][26] shouldBe null
}
@Test
fun `reading numeric types`() {
val df1 = DataFrame.readSqlTable(connection, "table1").cast<Table1MySql>()
val result = df1.select("tinyintcol").add("tinyintcol2") { it[Table1MySql::tinyintcol] }
result[0][1] shouldBe 1.toByte()
val result1 = df1.select("smallintcol")
.add("smallintcol2") { it[Table1MySql::smallintcol] }
result1[0][1] shouldBe 10.toShort()
val result2 = df1.select("mediumintcol")
.add("mediumintcol2") { it[Table1MySql::mediumintcol] }
result2[0][1] shouldBe 100
val result3 = df1.select("mediumintunsignedcol")
.add("mediumintunsignedcol2") { it[Table1MySql::mediumintunsignedcol] }
result3[0][1] shouldBe 100
val result5 = df1.select("bigintcol")
.add("bigintcol2") { it[Table1MySql::bigintcol] }
result5[0][1] shouldBe 100
val result7 = df1.select("doublecol")
.add("doublecol2") { it[Table1MySql::doublecol] }
result7[0][1] shouldBe 10.0
val result8 = df1.select("decimalcol")
.add("decimalcol2") { it[Table1MySql::decimalcol] }
result8[0][1] shouldBe BigDecimal("10")
val schema = DataFrameSchema.readSqlTable(connection, "table1")
schema.columns["tinyintcol"]!!.type shouldBe typeOf<Int>()
schema.columns["smallintcol"]!!.type shouldBe typeOf<Int>()
schema.columns["mediumintcol"]!!.type shouldBe typeOf<Int>()
schema.columns["mediumintunsignedcol"]!!.type shouldBe typeOf<Int>()
schema.columns["bigintcol"]!!.type shouldBe typeOf<Long>()
schema.columns["floatcol"]!!.type shouldBe typeOf<Double>()
schema.columns["doublecol"]!!.type shouldBe typeOf<Double>()
schema.columns["decimalcol"]!!.type shouldBe typeOf<BigDecimal>()
}
@Test
fun `infer nullability`() {
inferNullability(connection)
}
}
@@ -0,0 +1,388 @@
package org.jetbrains.kotlinx.dataframe.io.h2
import io.kotest.matchers.shouldBe
import org.intellij.lang.annotations.Language
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.api.add
import org.jetbrains.kotlinx.dataframe.api.cast
import org.jetbrains.kotlinx.dataframe.api.filter
import org.jetbrains.kotlinx.dataframe.api.select
import org.jetbrains.kotlinx.dataframe.io.inferNullability
import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
import org.jetbrains.kotlinx.dataframe.io.readSqlQuery
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import org.junit.AfterClass
import org.junit.BeforeClass
import org.junit.Test
import java.math.BigDecimal
import java.sql.Connection
import java.sql.DriverManager
import java.sql.SQLException
import java.util.UUID
import kotlin.reflect.typeOf
private const val URL =
"jdbc:h2:mem:test3;DB_CLOSE_DELAY=-1;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;DEFAULT_NULL_ORDERING=HIGH"
@DataSchema
interface Table1 {
val id: Int
val bigintcol: Long
val smallintcol: Int
val bigserialcol: Long
val booleancol: Boolean
val byteacol: ByteArray
val charactercol: String
val characterncol: String
val charcol: String
val datecol: java.sql.Date
val doublecol: Double
val integercol: Int?
val jsoncol: String
val jsonbcol: String
}
@DataSchema
interface Table2 {
val id: Int
val moneycol: String
val numericcol: BigDecimal
val realcol: Float
val smallintcol: Int
val serialcol: Int
val textcol: String?
val timecol: String
val timewithzonecol: String
val timestampcol: String
val timestampwithzonecol: String
val uuidcol: String
}
@DataSchema
interface ViewTable {
val id: Int
val bigintcol: Long
val textCol: String?
}
class PostgresH2Test {
companion object {
private lateinit var connection: Connection
@BeforeClass
@JvmStatic
fun setUpClass() {
connection = DriverManager.getConnection(URL)
@Language("SQL")
val createTableStatement =
"""
CREATE TABLE IF NOT EXISTS table1 (
id serial PRIMARY KEY,
bigintCol bigint not null,
smallintCol smallint not null,
bigserialCol bigserial not null,
booleanCol boolean not null,
byteaCol bytea not null,
characterCol character not null,
characterNCol character(10) not null,
charCol char not null,
dateCol date not null,
doubleCol double precision not null,
integerCol integer,
intArrayCol integer array,
doubleArrayCol double precision array,
dateArrayCol date array,
textArrayCol text array,
booleanArrayCol boolean array
)
""".trimIndent()
connection.createStatement().execute(createTableStatement.trimIndent())
@Language("SQL")
val createTableQuery =
"""
CREATE TABLE IF NOT EXISTS table2 (
id serial PRIMARY KEY,
moneyCol money not null,
numericCol numeric not null,
realCol real not null,
smallintCol smallint not null,
serialCol serial not null,
textCol text,
timeCol time not null,
timeWithZoneCol time with time zone not null,
timestampCol timestamp not null,
timestampWithZoneCol timestamp with time zone not null,
uuidCol uuid not null
)
""".trimIndent()
connection.createStatement().execute(createTableQuery.trimIndent())
@Language("SQL")
val insertData1 =
"""
INSERT INTO table1 (
bigintCol, smallintCol, bigserialCol, booleanCol,
byteaCol, characterCol, characterNCol, charCol,
dateCol, doubleCol,
integerCol, intArrayCol,
doubleArrayCol, dateArrayCol, textArrayCol, booleanArrayCol
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""".trimIndent()
@Language("SQL")
val insertData2 =
"""
INSERT INTO table2 (
moneyCol, numericCol,
realCol, smallintCol,
serialCol, textCol, timeCol,
timeWithZoneCol, timestampCol, timestampWithZoneCol,
uuidCol
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""".trimIndent()
val intArray = connection.createArrayOf("INTEGER", arrayOf(1, 2, 3))
val doubleArray = connection.createArrayOf("DOUBLE", arrayOf(1.1, 2.2, 3.3))
val dateArray = connection.createArrayOf(
"DATE",
arrayOf(java.sql.Date.valueOf("2023-08-01"), java.sql.Date.valueOf("2023-08-02")),
)
val textArray = connection.createArrayOf("TEXT", arrayOf("Hello", "World"))
val booleanArray = connection.createArrayOf("BOOLEAN", arrayOf(true, false, true))
connection.prepareStatement(insertData1).use { st ->
// Insert data into table1
for (i in 1..3) {
st.setLong(1, i * 1000L)
st.setShort(2, 11.toShort())
st.setLong(3, 1000000000L + i)
st.setBoolean(4, i % 2 == 1)
st.setBytes(5, byteArrayOf(1, 2, 3))
st.setString(6, "A")
st.setString(7, "Hello")
st.setString(8, "A")
st.setDate(9, java.sql.Date.valueOf("2023-08-01"))
st.setDouble(10, 12.34)
st.setInt(11, 12345 * i)
st.setArray(12, intArray)
st.setArray(13, doubleArray)
st.setArray(14, dateArray)
st.setArray(15, textArray)
st.setArray(16, booleanArray)
st.executeUpdate()
}
}
connection.prepareStatement(insertData2).use { st ->
// Insert data into table2
for (i in 1..3) {
st.setBigDecimal(1, BigDecimal("123.45"))
st.setBigDecimal(2, BigDecimal("12.34"))
st.setFloat(3, 12.34f)
st.setInt(4, 1000 + i)
st.setInt(5, 1000000 + i)
st.setString(6, null)
st.setTime(7, java.sql.Time.valueOf("12:34:56"))
st.setTimestamp(8, java.sql.Timestamp(System.currentTimeMillis()))
st.setTimestamp(9, java.sql.Timestamp(System.currentTimeMillis()))
st.setTimestamp(10, java.sql.Timestamp(System.currentTimeMillis()))
st.setObject(11, UUID.randomUUID(), java.sql.Types.OTHER)
st.executeUpdate()
}
}
}
@AfterClass
@JvmStatic
fun tearDownClass() {
try {
connection.close()
} catch (e: SQLException) {
e.printStackTrace()
}
}
}
@Test
fun `read from tables`() {
val tableName1 = "table1"
val df1 = DataFrame.readSqlTable(connection, tableName1).cast<Table1>()
val result = df1.filter { it[Table1::id] == 1 }
result[0][0] shouldBe 1
result[0][8] shouldBe "A"
result[0][12] shouldBe arrayOf(1, 2, 3)
result[0][13] shouldBe arrayOf(1.1, 2.2, 3.3)
result[0][14] shouldBe arrayOf(java.sql.Date.valueOf("2023-08-01"), java.sql.Date.valueOf("2023-08-02"))
result[0][15] shouldBe arrayOf("Hello", "World")
result[0][16] shouldBe arrayOf(true, false, true)
val schema = DataFrameSchema.readSqlTable(connection, tableName1)
schema.columns["id"]!!.type shouldBe typeOf<Int>()
schema.columns["integercol"]!!.type shouldBe typeOf<Int?>()
schema.columns["smallintcol"]!!.type shouldBe typeOf<Int>()
schema.columns["intarraycol"]!!.type.classifier shouldBe kotlin.Array::class
schema.columns["doublearraycol"]!!.type.classifier shouldBe kotlin.Array::class
schema.columns["datearraycol"]!!.type.classifier shouldBe kotlin.Array::class
schema.columns["textarraycol"]!!.type.classifier shouldBe kotlin.Array::class
schema.columns["booleanarraycol"]!!.type.classifier shouldBe kotlin.Array::class
val tableName2 = "table2"
val df2 = DataFrame.readSqlTable(connection, tableName2).cast<Table2>()
val result2 = df2.filter { it[Table2::id] == 1 }
result2[0][4] shouldBe 1001
val schema2 = DataFrameSchema.readSqlTable(connection, tableName2)
schema2.columns["id"]!!.type shouldBe typeOf<Int>()
schema2.columns["textcol"]!!.type shouldBe typeOf<String?>()
}
@Test
fun `read from sql query`() {
@Language("SQL")
val sqlQuery =
"""
SELECT
t1.id,
t1.bigintCol,
t2.textCol
FROM table1 t1
JOIN table2 t2 ON t1.id = t2.id
""".trimIndent()
val df = DataFrame.readSqlQuery(connection, sqlQuery = sqlQuery).cast<ViewTable>()
val result = df.filter { it[ViewTable::id] == 1 }
result[0][2] shouldBe null
val schema = DataFrameSchema.readSqlQuery(connection, sqlQuery = sqlQuery)
schema.columns["id"]!!.type shouldBe typeOf<Int>()
schema.columns["bigintcol"]!!.type shouldBe typeOf<Long>()
schema.columns["textcol"]!!.type shouldBe typeOf<String?>()
}
@Test
fun `read from all tables`() {
val dataframes = DataFrame.readAllSqlTables(connection).values.toList()
val table1Df = dataframes[0].cast<Table1>()
table1Df.rowsCount() shouldBe 3
table1Df.filter { it[Table1::integercol] != null && it[Table1::integercol]!! > 12345 }.rowsCount() shouldBe 2
table1Df[0][1] shouldBe 1000L
table1Df[0][2] shouldBe 11
val table2Df = dataframes[1].cast<Table2>()
table2Df.rowsCount() shouldBe 3
table2Df.filter {
it[Table2::realcol] == 12.34f
}.rowsCount() shouldBe 3
table2Df[0][4] shouldBe 1001
}
@Test
fun `read columns of different types to check type mapping`() {
val tableName1 = "table1"
val df1 = DataFrame.readSqlTable(connection, tableName1).cast<Table1>()
val result = df1.select("smallintcol")
.add("smallintcol2") { it[Table1::smallintcol] }
result[0][1] shouldBe 11
val result1 = df1.select("bigserialcol")
.add("bigserialcol2") { it[Table1::bigserialcol] }
result1[0][1] shouldBe 1000000001L
val result2 = df1.select("doublecol")
.add("doublecol2") { it[Table1::doublecol] }
result2[0][1] shouldBe 12.34
val tableName2 = "table2"
val df2 = DataFrame.readSqlTable(connection, tableName2).cast<Table2>()
val result4 = df2.select("numericcol")
.add("numericcol2") { it[Table2::numericcol] }
result4[0][1] shouldBe BigDecimal("12.34")
val result5 = df2.select("realcol")
.add("realcol2") { it[Table2::realcol] }
result5[0][1] shouldBe 12.34f
val result8 = df2.select("serialcol")
.add("serialcol2") { it[Table2::serialcol] }
result8[0][1] shouldBe 1000001
val schema = DataFrameSchema.readSqlTable(connection, tableName1)
schema.columns["smallintcol"]!!.type shouldBe typeOf<Int>()
schema.columns["bigserialcol"]!!.type shouldBe typeOf<Long>()
schema.columns["doublecol"]!!.type shouldBe typeOf<Double>()
val schema1 = DataFrameSchema.readSqlTable(connection, tableName2)
schema1.columns["numericcol"]!!.type shouldBe typeOf<BigDecimal>()
schema1.columns["realcol"]!!.type shouldBe typeOf<Float>()
schema1.columns["serialcol"]!!.type shouldBe typeOf<Int>()
}
@Test
fun `infer nullability`() {
inferNullability(connection)
}
@Test
fun `readSqlQuery should execute a WITH clause and return results`() {
try {
// Step 1: Create a temporary table
@Language("SQL")
val createTableQuery =
"""
CREATE TABLE employees (
id INT PRIMARY KEY,
name VARCHAR(100),
salary DOUBLE
)
""".trimIndent()
connection.createStatement().execute(createTableQuery)
// Step 2: Insert data into the table
@Language("SQL")
val insertDataQuery =
"""
INSERT INTO employees (id, name, salary) VALUES
(1, 'Alice', 60000.0),
(2, 'Bob', 50000.0),
(3, 'Charlie', 70000.0)
""".trimIndent()
connection.createStatement().execute(insertDataQuery)
// Step 3: Execute the query with a WITH clause
@Language("SQL")
val queryWithClause =
"""
WITH high_earners AS (
SELECT name, salary
FROM employees
WHERE salary > 55000.0
)
SELECT * FROM high_earners
""".trimIndent()
val resultDataFrame = DataFrame.readSqlQuery(connection, queryWithClause)
// Step 4: Validate the results
resultDataFrame.rowsCount() shouldBe 2
resultDataFrame[0][0] shouldBe "Alice"
resultDataFrame[1][0] shouldBe "Charlie"
} finally {
// Step 5: Clean up the temporary table
@Language("SQL")
val dropTableQuery = "DROP TABLE IF EXISTS employees"
connection.createStatement().execute(dropTableQuery)
}
}
}
@@ -0,0 +1,656 @@
@file:Suppress("SqlDialectInspection")
package org.jetbrains.kotlinx.dataframe.io.local
import io.kotest.matchers.shouldBe
import org.duckdb.DuckDBConnection
import org.duckdb.DuckDBResultSet
import org.duckdb.JsonNode
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.ColumnName
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.api.cast
import org.jetbrains.kotlinx.dataframe.api.colsOf
import org.jetbrains.kotlinx.dataframe.api.convert
import org.jetbrains.kotlinx.dataframe.api.reorderColumnsByName
import org.jetbrains.kotlinx.dataframe.api.schema
import org.jetbrains.kotlinx.dataframe.api.single
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
import org.jetbrains.kotlinx.dataframe.api.values
import org.jetbrains.kotlinx.dataframe.api.with
import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig
import org.jetbrains.kotlinx.dataframe.io.assertInferredTypesMatchSchema
import org.jetbrains.kotlinx.dataframe.io.db.DuckDb
import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
import org.jetbrains.kotlinx.dataframe.io.readDataFrame
import org.jetbrains.kotlinx.dataframe.io.readResultSet
import org.jetbrains.kotlinx.dataframe.io.readSqlQuery
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import org.junit.Test
import java.math.BigDecimal
import java.math.BigInteger
import java.nio.ByteBuffer
import java.nio.file.Files
import java.sql.Blob
import java.sql.DriverManager
import java.sql.Timestamp
import java.time.LocalDate
import java.time.LocalTime
import java.time.OffsetDateTime
import java.util.UUID
import kotlin.io.path.createTempDirectory
private const val URL = "jdbc:duckdb:"
class DuckDbTest {
// region expected schemas
@DataSchema
data class Person(
val id: Int,
val name: String,
val age: Int,
val salary: Double,
@ColumnName("hire_date")
val hireDate: LocalDate,
) {
companion object {
val expected = listOf(
Person(1, "John Doe", 30, 50000.0, LocalDate.of(2020, 1, 15)),
Person(2, "Jane Smith", 28, 55000.0, LocalDate.of(2021, 3, 20)),
Person(3, "Bob Johnson", 35, 65000.0, LocalDate.of(2019, 11, 10)),
Person(4, "Alice Brown", 32, 60000.0, LocalDate.of(2020, 7, 1)),
).toDataFrame()
}
}
@DataSchema
data class GeneralPurposeTypes(
@ColumnName("bigint_col")
val bigintCol: Long,
@ColumnName("binary_col")
val binaryCol: Blob,
@ColumnName("bit_col")
val bitCol: String,
@ColumnName("bitstring_col")
val bitstringCol: String,
@ColumnName("blob_col")
val blobCol: Blob,
@ColumnName("bool_col")
val boolCol: Boolean,
@ColumnName("boolean_col")
val booleanCol: Boolean,
@ColumnName("bpchar_col")
val bpcharCol: String,
@ColumnName("bytea_col")
val byteaCol: Blob,
@ColumnName("char_col")
val charCol: String,
@ColumnName("date_col")
val dateCol: LocalDate,
@ColumnName("datetime_col")
val datetimeCol: Timestamp,
@ColumnName("decimal_col")
val decimalCol: BigDecimal,
@ColumnName("double_col")
val doubleCol: Double,
@ColumnName("enum_col")
val enumCol: String,
@ColumnName("float4_col")
val float4Col: Float,
@ColumnName("float8_col")
val float8Col: Double,
@ColumnName("float_col")
val floatCol: Float,
@ColumnName("hugeint_col")
val hugeintCol: BigInteger,
@ColumnName("int128_col")
val int128Col: BigInteger,
@ColumnName("int16_col")
val int16Col: Short,
@ColumnName("int1_col")
val int1Col: Byte,
@ColumnName("int2_col")
val int2Col: Short,
@ColumnName("int32_col")
val int32Col: Int,
@ColumnName("int4_col")
val int4Col: Int,
@ColumnName("int64_col")
val int64Col: Long,
@ColumnName("int8_col")
val int8Col: Long,
@ColumnName("int_col")
val intCol: Int,
@ColumnName("integer_col")
val integerCol: Int,
@ColumnName("interval_col")
val intervalCol: String,
@ColumnName("json_col")
val jsonCol: JsonNode,
@ColumnName("logical_col")
val logicalCol: Boolean,
@ColumnName("long_col")
val longCol: Long,
@ColumnName("numeric_col")
val numericCol: BigDecimal,
@ColumnName("real_col")
val realCol: Float,
@ColumnName("short_col")
val shortCol: Short,
@ColumnName("signed_col")
val signedCol: Int,
@ColumnName("smallint_col")
val smallintCol: Short,
@ColumnName("string_col")
val stringCol: String,
@ColumnName("text_col")
val textCol: String,
@ColumnName("time_col")
val timeCol: LocalTime,
@ColumnName("timestamp_col")
val timestampCol: Timestamp,
@ColumnName("timestamptz_col")
val timestamptzCol: OffsetDateTime,
@ColumnName("timestampwtz_col")
val timestampwtzCol: OffsetDateTime,
@ColumnName("tinyint_col")
val tinyintCol: Byte,
@ColumnName("ubigint_col")
val ubigintCol: BigInteger,
@ColumnName("uhugeint_col")
val uhugeintCol: BigInteger,
@ColumnName("uint128_col")
val uint128Col: BigInteger,
@ColumnName("uint16_col")
val uint16Col: Int,
@ColumnName("uint32_col")
val uint32Col: Long,
@ColumnName("uint64_col")
val uint64Col: BigInteger,
@ColumnName("uint8_col")
val uint8Col: Short,
@ColumnName("uint_col")
val uintCol: Long,
@ColumnName("usmallint_col")
val usmallintCol: Int,
@ColumnName("utinyint_col")
val utinyintCol: Short,
@ColumnName("uuid_col")
val uuidCol: UUID,
@ColumnName("varbinary_col")
val varbinaryCol: Blob,
@ColumnName("varchar_col")
val varcharCol: String,
) {
companion object {
val expected = listOf(
GeneralPurposeTypes(
bigintCol = 9223372036854775807L,
binaryCol = DuckDBResultSet.DuckDBBlobResult(ByteBuffer.wrap("DEADBEEF".toByteArray())),
bitCol = "1010",
bitstringCol = "1010",
blobCol = DuckDBResultSet.DuckDBBlobResult(ByteBuffer.wrap("DEADBEEF".toByteArray())),
boolCol = true,
booleanCol = true,
bpcharCol = "test",
byteaCol = DuckDBResultSet.DuckDBBlobResult(ByteBuffer.wrap("DEADBEEF".toByteArray())),
charCol = "test",
dateCol = LocalDate.parse("2025-06-19"),
datetimeCol = Timestamp.valueOf("2025-06-19 12:34:56"),
decimalCol = BigDecimal("123.45"),
doubleCol = 3.14159,
enumCol = "female",
float4Col = 3.14f,
float8Col = 3.14159,
floatCol = 3.14f,
hugeintCol = BigInteger("170141183460469231731687303715884105727"),
int128Col = BigInteger("170141183460469231731687303715884105727"),
int16Col = 32767,
int1Col = 127,
int2Col = 32767,
int32Col = 2147483647,
int4Col = 2147483647,
int64Col = 9223372036854775807L,
int8Col = 9223372036854775807L,
intCol = 2147483647,
integerCol = 2147483647,
intervalCol = "1 year",
jsonCol = JsonNode("{\"key\": \"value\"}"),
logicalCol = true,
longCol = 9223372036854775807L,
numericCol = BigDecimal("123.45"),
realCol = 3.14f,
shortCol = 32767,
signedCol = 2147483647,
smallintCol = 32767,
stringCol = "test string",
textCol = "test text",
timeCol = LocalTime.parse("12:34:56"),
timestampCol = Timestamp.valueOf("2025-06-19 12:34:56"),
timestamptzCol = OffsetDateTime.parse("2025-06-19T12:34:56+02:00"),
timestampwtzCol = OffsetDateTime.parse("2025-06-19T12:34:56+02:00"),
tinyintCol = 127,
ubigintCol = BigInteger("18446744073709551615"),
uhugeintCol = BigInteger("340282366920938463463374607431768211455"),
uint128Col = BigInteger("340282366920938463463374607431768211455"),
uint16Col = 65535,
uint32Col = 4294967295L,
uint64Col = BigInteger("18446744073709551615"),
uint8Col = 255,
uintCol = 4294967295L,
usmallintCol = 65535,
utinyintCol = 255,
uuidCol = UUID.fromString("a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11"),
varbinaryCol = DuckDBResultSet.DuckDBBlobResult(ByteBuffer.wrap("DEADBEEF".toByteArray())),
varcharCol = "test string",
),
).toDataFrame()
}
}
@DataSchema
data class NestedTypes(
@ColumnName("ijstruct_col")
val ijstructCol: java.sql.Struct,
@ColumnName("intarray_col")
val intarrayCol: java.sql.Array,
@ColumnName("intlist_col")
val intlistCol: java.sql.Array,
@ColumnName("intstringmap_col")
val intstringmapCol: Map<Int, String?>,
@ColumnName("intstrinstinggmap_col")
val intstrinstinggmapCol: Map<Int, Map<String, String?>?>,
@ColumnName("stringarray_col")
val stringarrayCol: java.sql.Array,
@ColumnName("stringlist_col")
val stringlistCol: java.sql.Array,
@ColumnName("stringlistlist_col")
val stringlistlistCol: java.sql.Array,
@ColumnName("union_col")
val unionCol: Any,
)
// endregion
@Test
fun `read simple dataframe from DuckDB`() {
val df: AnyFrame
val schema: DataFrameSchema
val subset: AnyFrame
DriverManager.getConnection(URL).use { connection ->
connection.prepareStatement(
"""
CREATE TABLE IF NOT EXISTS test_table (
id INTEGER PRIMARY KEY,
name VARCHAR,
age INTEGER,
salary DOUBLE,
hire_date DATE
)
""".trimIndent(),
).executeUpdate()
connection.prepareStatement(
"""
INSERT INTO test_table (id, name, age, salary, hire_date)
VALUES
(1, 'John Doe', 30, 50000.00, '2020-01-15'),
(2, 'Jane Smith', 28, 55000.00, '2021-03-20'),
(3, 'Bob Johnson', 35, 65000.00, '2019-11-10'),
(4, 'Alice Brown', 32, 60000.00, '2020-07-01')
""".trimIndent(),
).executeUpdate()
df = DataFrame.readSqlTable(connection, "test_table")
schema = DataFrameSchema.readSqlTable(connection, "test_table")
subset = DataFrame.readSqlQuery(connection, """SELECT test_table.name, test_table.age FROM test_table""")
}
schema.compare(Person.expected.schema()).isSuperOrMatches() shouldBe true
df.cast<Person>(verify = true) shouldBe Person.expected
df.assertInferredTypesMatchSchema()
subset.assertInferredTypesMatchSchema()
subset["name"] shouldBe df["name"]
subset["age"] shouldBe df["age"]
subset.columnsCount() shouldBe 2
}
@Test
fun `read simple dataframe from DuckDB ResultSet`() {
val df: AnyFrame
val schema: DataFrameSchema
DriverManager.getConnection(URL).use { connection ->
connection.prepareStatement(
"""
CREATE TABLE IF NOT EXISTS test_table (
id INTEGER PRIMARY KEY,
name VARCHAR,
age INTEGER,
salary DOUBLE,
hire_date DATE
)
""".trimIndent(),
).executeUpdate()
connection.prepareStatement(
"""
INSERT INTO test_table (id, name, age, salary, hire_date)
VALUES
(1, 'John Doe', 30, 50000.00, '2020-01-15'),
(2, 'Jane Smith', 28, 55000.00, '2021-03-20'),
(3, 'Bob Johnson', 35, 65000.00, '2019-11-10'),
(4, 'Alice Brown', 32, 60000.00, '2020-07-01')
""".trimIndent(),
).executeUpdate()
connection.prepareStatement("SELECT * FROM test_table").executeQuery().use { rs ->
df = DataFrame.readResultSet(rs, DuckDb)
schema = DataFrameSchema.readResultSet(rs, DuckDb)
}
}
schema.compare(Person.expected.schema()).isSuperOrMatches() shouldBe true
df.cast<Person>(verify = true) shouldBe Person.expected
df.assertInferredTypesMatchSchema()
}
@Test
fun `read all tables`() {
val dfs: Map<String, AnyFrame>
val schemas: Map<String, DataFrameSchema>
DriverManager.getConnection(URL).use { connection ->
connection.prepareStatement(
"""
CREATE TABLE IF NOT EXISTS test_table (
id INTEGER PRIMARY KEY,
name VARCHAR,
age INTEGER,
salary DOUBLE,
hire_date DATE
)
""".trimIndent(),
).executeUpdate()
connection.prepareStatement(
"""
INSERT INTO test_table (id, name, age, salary, hire_date)
VALUES
(1, 'John Doe', 30, 50000.00, '2020-01-15'),
(2, 'Jane Smith', 28, 55000.00, '2021-03-20'),
(3, 'Bob Johnson', 35, 65000.00, '2019-11-10'),
(4, 'Alice Brown', 32, 60000.00, '2020-07-01')
""".trimIndent(),
).executeUpdate()
dfs = DataFrame.readAllSqlTables(connection = connection)
schemas = DataFrameSchema.readAllSqlTables(connection = connection)
}
val df = dfs["test_table"]!!
val schema = schemas["test_table"]!!
schema.compare(Person.expected.schema()).isSuperOrMatches() shouldBe true
df.cast<Person>(verify = true) shouldBe Person.expected
df.assertInferredTypesMatchSchema()
}
/**
* https://duckdb.org/docs/stable/sql/data_types/overview.html
*/
@Test
fun `read each general-purpose DuckDB type`() {
val df: AnyFrame
val schema: DataFrameSchema
DriverManager.getConnection(URL).use { connection ->
connection as DuckDBConnection
connection.prepareStatement(
"""
CREATE TABLE IF NOT EXISTS table1 (
bigint_col BIGINT,
int8_col INT8,
int64_col INT64,
long_col LONG,
bit_col BIT,
bitstring_col BITSTRING,
blob_col BLOB,
bytea_col BYTEA,
binary_col BINARY,
varbinary_col VARBINARY,
boolean_col BOOLEAN,
bool_col BOOL,
logical_col LOGICAL,
date_col DATE,
decimal_col DECIMAL(10,2),
numeric_col NUMERIC(10,2),
double_col DOUBLE,
float8_col FLOAT8,
float_col FLOAT,
float4_col FLOAT4,
real_col REAL,
hugeint_col HUGEINT,
int128_col INT128,
integer_col INTEGER,
int4_col INT4,
int32_col INT32,
int_col INT,
signed_col SIGNED,
interval_col INTERVAL,
json_col JSON,
smallint_col SMALLINT,
int2_col INT2,
int16_col INT16,
short_col SHORT,
time_col TIME,
timestampwtz_col TIMESTAMP WITH TIME ZONE,
timestamptz_col TIMESTAMPTZ,
timestamp_col TIMESTAMP,
datetime_col DATETIME,
tinyint_col TINYINT,
int1_col INT1,
ubigint_col UBIGINT,
uint64_col UINT64,
uhugeint_col UHUGEINT,
uint128_col UINT128,
uint_col UINTEGER,
uint32_col UINT32,
usmallint_col USMALLINT,
uint16_col UINT16,
utinyint_col UTINYINT,
uint8_col UINT8,
uuid_col UUID,
varchar_col VARCHAR,
char_col CHAR(10),
bpchar_col BPCHAR(10),
text_col TEXT,
string_col STRING,
enum_col ENUM('male', 'female', 'other')
)
""".trimIndent(),
).executeUpdate()
connection.prepareStatement(
"""
INSERT INTO table1 VALUES (
9223372036854775807, -- bigint
9223372036854775807, -- int8
9223372036854775807, -- int64
9223372036854775807, -- long
'1010', -- bit
'1010', -- bitstring
'DEADBEEF'::BLOB, -- blob
'DEADBEEF'::BLOB, -- bytea
'DEADBEEF'::BLOB, -- binary
'DEADBEEF'::BLOB, -- varbinary
true, -- boolean
true, -- bool
true, -- logical
'2025-06-19', -- date
123.45, -- decimal
123.45, -- numeric
3.14159, -- double
3.14159, -- float8
3.14, -- float
3.14, -- float4
3.14, -- real
'170141183460469231731687303715884105727', -- hugeint
'170141183460469231731687303715884105727', -- int128
2147483647, -- integer
2147483647, -- int4
2147483647, -- int32
2147483647, -- int
2147483647, -- signed
INTERVAL '1' YEAR, -- interval
'{"key": "value"}'::JSON, -- json
32767, -- smallint
32767, -- int2
32767, -- int16
32767, -- short
'12:34:56', -- time
'2025-06-19 12:34:56+02', -- timestampwtz
'2025-06-19 12:34:56+02', -- timestamptz
'2025-06-19 12:34:56', -- timestamp
'2025-06-19 12:34:56', -- datetime
127, -- tinyint
127, -- int1
18446744073709551615, -- ubigint
18446744073709551615, -- uint64
'340282366920938463463374607431768211455', -- uhugeint
'340282366920938463463374607431768211455', -- uint128
4294967295, -- uinteger
4294967295, -- uint32
65535, -- usmallint
65535, -- uint16
255, -- utinyint
255, -- uint8
'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11', -- uuid
'test string', -- varchar
'test', -- char
'test', -- bpchar
'test text', -- text
'test string', -- string
'female' -- enum
)
""".trimIndent(),
).executeUpdate()
schema = DataFrameSchema.readSqlTable(connection, "table1")
df = DataFrame.readSqlTable(connection, "table1").reorderColumnsByName()
}
schema.compare(GeneralPurposeTypes.expected.schema()).isSuperOrMatches() shouldBe true
// on some systems OffsetDateTime's get converted to UTC sometimes, let's compare them as Instant instead
fun AnyFrame.fixOffsetDateTime() = convert { colsOf<OffsetDateTime>() }.with { it.toInstant() }
df.cast<GeneralPurposeTypes>(verify = true).fixOffsetDateTime() shouldBe
GeneralPurposeTypes.expected.fixOffsetDateTime()
df.assertInferredTypesMatchSchema()
}
/**
* https://duckdb.org/docs/stable/sql/data_types/overview.html
*/
@Test
fun `read each nested DuckDB type`() {
val df: AnyFrame
val schema: DataFrameSchema
DriverManager.getConnection(URL).use { connection ->
connection as DuckDBConnection
connection.prepareStatement(
"""
CREATE TABLE IF NOT EXISTS table2 (
intarray_col INTEGER[3],
stringarray_col VARCHAR[3],
intlist_col INTEGER[],
stringlist_col VARCHAR[],
stringlistlist_col VARCHAR[][],
intstringmap_col MAP(INTEGER, VARCHAR),
intstrinstinggmap_col MAP(INTEGER, MAP(VARCHAR, VARCHAR)),
ijstruct_col STRUCT(i INTEGER, j VARCHAR),
union_col UNION(num INTEGER, text VARCHAR),
)
""".trimIndent(),
).executeUpdate()
connection.prepareStatement(
"""
INSERT INTO table2 VALUES (
array_value(1, 2, NULL), -- int array
array_value('a', 'ab', 'abc'), -- string array
list_value(1, 2, 3), -- int list
list_value('a', 'ab', 'abc'), -- string list
list_value(list_value('a', 'ab'), list_value('abc'), NULL), -- string list list
MAP { 1: 'value1', 200: 'value2' }, -- int string map
MAP { 1: MAP { 'value1': 'a', 'value2': 'b' }, 200: MAP { 'value1': 'c', 'value2': 'd' } }, -- int string string map
{ 'i': 42, 'j': 'answer' }, -- struct
union_value(num := 2), -- union
)
""".trimIndent(),
).executeUpdate()
schema = DataFrameSchema.readSqlTable(connection, "table2")
df = DataFrame.readSqlTable(connection, "table2")
}
df.assertInferredTypesMatchSchema()
df.cast<NestedTypes>(verify = true)
df as DataFrame<NestedTypes>
df.single().let {
it[{ "intarray_col"<java.sql.Array>() }].array shouldBe arrayOf(1, 2, null)
it[{ "stringarray_col"<java.sql.Array>() }].array shouldBe arrayOf("a", "ab", "abc")
it[{ "intlist_col"<java.sql.Array>() }].array shouldBe arrayOf(1, 2, 3)
it[{ "stringlist_col"<java.sql.Array>() }].array shouldBe arrayOf("a", "ab", "abc")
(it[{ "stringlistlist_col"<java.sql.Array>() }].array as Array<*>)
.map { (it as java.sql.Array?)?.array } shouldBe listOf(arrayOf("a", "ab"), arrayOf("abc"), null)
it[{ "intstringmap_col"<Map<Int, String?>>() }] shouldBe mapOf(1 to "value1", 200 to "value2")
it[{ "intstrinstinggmap_col"<Map<Int, Map<String, String?>>>() }] shouldBe mapOf(
1 to mapOf("value1" to "a", "value2" to "b"),
200 to mapOf("value1" to "c", "value2" to "d"),
)
it[{ "ijstruct_col"<java.sql.Struct>() }].attributes shouldBe arrayOf<Any>(42, "answer")
it[{ "union_col"<Any>() }] shouldBe 2
}
}
@Test
fun `change read mode`() {
// Test in-memory database (cannot be read-only)
val config = DbConnectionConfig("jdbc:duckdb:")
val df = config.readDataFrame("SELECT 1, 2, 3")
df.values().toList() shouldBe listOf(1, 2, 3)
}
@Test
fun `change read mode with persistent database`() {
// Test read-only mode with a temporary file
val tempDir = createTempDirectory("duckdb-test-")
val dbPath = tempDir.resolve("test.duckdb")
try {
// First, create the database with actual data using plain JDBC to allow DDL/DML
DriverManager.getConnection("jdbc:duckdb:${dbPath.toAbsolutePath()}").use { connection ->
connection.createStatement().use { st ->
st.executeUpdate("CREATE TABLE test_data(col1 INTEGER, col2 INTEGER, col3 INTEGER)")
st.executeUpdate("INSERT INTO test_data VALUES (1, 2, 3)")
}
}
// Now test read-only access via our API
val config = DbConnectionConfig("jdbc:duckdb:${dbPath.toAbsolutePath()}", readOnly = true)
val df = config.readDataFrame("SELECT col1, col2, col3 FROM test_data")
df.values().toList() shouldBe listOf(1, 2, 3)
} finally {
Files.deleteIfExists(dbPath)
Files.deleteIfExists(tempDir)
}
}
}
@@ -0,0 +1,116 @@
package org.jetbrains.kotlinx.dataframe.io.local
import io.kotest.matchers.shouldBe
import org.intellij.lang.annotations.Language
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.api.cast
import org.jetbrains.kotlinx.dataframe.api.filter
import org.jetbrains.kotlinx.dataframe.io.readSqlQuery
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import org.junit.Ignore
import org.junit.Test
import java.sql.DriverManager
import java.util.Properties
import kotlin.reflect.typeOf
private const val URL = "jdbc:mariadb://localhost:3307/imdb"
private const val URL2 = "jdbc:mariadb://localhost:3307"
private const val USER_NAME = "root"
private const val PASSWORD = "pass"
@DataSchema
interface ActorKDF {
val id: Int
val firstName: String?
val lastName: String?
val gender: String?
}
@DataSchema
interface RankedMoviesWithGenres {
val name: String?
val year: Int?
val rank: Float?
val genres: String?
}
@Ignore
class ImdbTestTest {
@Test
fun `read table`() {
val props = Properties()
props.setProperty("user", USER_NAME)
props.setProperty("password", PASSWORD)
// generate kdf schemas by database metadata (as interfaces or extensions)
// for gradle or as classes under the hood in KNB
val tableName = "actors"
DriverManager.getConnection(URL, props).use { connection ->
val df = DataFrame.readSqlTable(connection, tableName, 100).cast<ActorKDF>()
val result = df.filter { it[ActorKDF::id] in 11..19 }
result[0][1] shouldBe "Víctor"
val schema = DataFrameSchema.readSqlTable(connection, tableName)
schema.columns["id"]!!.type shouldBe typeOf<Int>()
schema.columns["first_name"]!!.type shouldBe typeOf<String?>()
}
}
@Test
fun `read table with schema name in table name`() {
val props = Properties()
props.setProperty("user", USER_NAME)
props.setProperty("password", PASSWORD)
// generate kdf schemas by database metadata (as interfaces or extensions)
// for gradle or as classes under the hood in KNB
val imdbTableName = "imdb.actors"
DriverManager.getConnection(URL2, props).use { connection ->
val df = DataFrame.readSqlTable(connection, imdbTableName, 100).cast<ActorKDF>()
val result = df.filter { it[ActorKDF::id] in 11..19 }
result[0][1] shouldBe "Víctor"
val schema = DataFrameSchema.readSqlTable(connection, imdbTableName)
schema.columns["id"]!!.type shouldBe typeOf<Int>()
schema.columns["first_name"]!!.type shouldBe typeOf<String?>()
}
}
@Test
fun `read sql query`() {
@Language("sql")
val sqlQuery =
"""
select name, year, rank,
GROUP_CONCAT (genre) as "genres"
from movies join movies_directors on movie_id = movies.id
join directors on directors.id=director_id left join movies_genres on movies.id = movies_genres.movie_id
where directors.first_name = "Quentin" and directors.last_name = "Tarantino"
and movies.name is not null and movies.name is not null
group by name, year, rank
order by year
""".trimIndent()
val props = Properties()
props.setProperty("user", USER_NAME)
props.setProperty("password", PASSWORD)
// generate kdf schemas by database metadata (as interfaces or extensions)
// for gradle or as classes under the hood in KNB
DriverManager.getConnection(URL, props).use { connection ->
val df = DataFrame.readSqlQuery(connection, sqlQuery).cast<RankedMoviesWithGenres>()
val result =
df.filter { it[RankedMoviesWithGenres::year] != null && it[RankedMoviesWithGenres::year]!! > 2000 }
result[0][1] shouldBe 2003
val schema = DataFrameSchema.readSqlQuery(connection, sqlQuery)
schema.columns["name"]!!.type shouldBe typeOf<String?>()
schema.columns["year"]!!.type shouldBe typeOf<Int?>()
}
}
}
@@ -0,0 +1,476 @@
package org.jetbrains.kotlinx.dataframe.io.local
import io.kotest.matchers.shouldBe
import org.intellij.lang.annotations.Language
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.api.add
import org.jetbrains.kotlinx.dataframe.api.cast
import org.jetbrains.kotlinx.dataframe.api.filter
import org.jetbrains.kotlinx.dataframe.api.select
import org.jetbrains.kotlinx.dataframe.io.inferNullability
import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
import org.jetbrains.kotlinx.dataframe.io.readSqlQuery
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import org.junit.AfterClass
import org.junit.BeforeClass
import org.junit.Ignore
import org.junit.Test
import java.math.BigDecimal
import java.sql.Blob
import java.sql.Connection
import java.sql.DriverManager
import java.sql.SQLException
import java.util.Date
import kotlin.reflect.typeOf
private const val URL = "jdbc:mariadb://localhost:3307"
private const val USER_NAME = "root"
private const val PASSWORD = "pass"
private const val TEST_DATABASE_NAME = "testKDFdatabase"
@DataSchema
interface Table1MariaDb {
val id: Int
val bitCol: Boolean
val tinyintCol: Int
val smallintCol: Short?
val mediumintCol: Int
val mediumintUnsignedCol: Int
val integerCol: Int
val intCol: Int
val integerUnsignedCol: Long
val bigintCol: Long
val floatCol: Float
val doubleCol: Double
val decimalCol: BigDecimal
val dateCol: String
val datetimeCol: String
val timestampCol: String
val timeCol: String
val yearCol: String
val varcharCol: String
val charCol: String
val binaryCol: ByteArray
val varbinaryCol: ByteArray
val tinyblobCol: ByteArray
val blobCol: ByteArray
val mediumblobCol: ByteArray
val longblobCol: ByteArray
val textCol: String
val mediumtextCol: String
val longtextCol: String
val enumCol: String
val setCol: Char
val jsonCol: String
}
@DataSchema
interface Table2MariaDb {
val id: Int
val bitCol: Boolean?
val tinyintCol: Int?
val smallintCol: Int?
val mediumintCol: Int?
val mediumintUnsignedCol: Int?
val integerCol: Int?
val intCol: Int?
val integerUnsignedCol: Long?
val bigintCol: Long?
val floatCol: Float?
val doubleCol: Double?
val decimalCol: Double?
val dateCol: String?
val datetimeCol: String?
val timestampCol: String?
val timeCol: String?
val yearCol: String?
val varcharCol: String?
val charCol: String?
val binaryCol: ByteArray?
val varbinaryCol: ByteArray?
val tinyblobCol: ByteArray?
val blobCol: ByteArray?
val mediumblobCol: ByteArray?
val longblobCol: ByteArray?
val textCol: String?
val mediumtextCol: String?
val longtextCol: String?
val enumCol: String?
val setCol: Char?
val jsonCol: String?
}
@DataSchema
interface Table3MariaDb {
val id: Int
val enumCol: String
val setCol: Char?
}
private const val JSON_STRING =
"{\"details\": {\"foodType\": \"Pizza\", \"menu\": \"https://www.loumalnatis.com/our-menu\"}, \n" +
" \t\"favorites\": [{\"description\": \"Pepperoni deep dish\", \"price\": 18.75}, \n" +
"{\"description\": \"The Lou\", \"price\": 24.75}]}"
@Ignore
class MariadbTest {
companion object {
private lateinit var connection: Connection
@BeforeClass
@JvmStatic
fun setUpClass() {
connection = DriverManager.getConnection(URL, USER_NAME, PASSWORD)
connection.createStatement().use { st ->
// Drop the test database if it exists
val dropDatabaseQuery = "DROP DATABASE IF EXISTS $TEST_DATABASE_NAME"
st.executeUpdate(dropDatabaseQuery)
// Create the test database
val createDatabaseQuery = "CREATE DATABASE $TEST_DATABASE_NAME"
st.executeUpdate(createDatabaseQuery)
// Use the newly created database
val useDatabaseQuery = "USE $TEST_DATABASE_NAME"
st.executeUpdate(useDatabaseQuery)
}
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table1") }
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table2") }
@Language("SQL")
val createTableQuery = """
CREATE TABLE IF NOT EXISTS table1 (
id INT AUTO_INCREMENT PRIMARY KEY,
bitCol BIT NOT NULL,
tinyintCol TINYINT NOT NULL,
smallintCol SMALLINT,
mediumintCol MEDIUMINT NOT NULL,
mediumintUnsignedCol MEDIUMINT UNSIGNED NOT NULL,
integerCol INTEGER NOT NULL,
intCol INT NOT NULL,
integerUnsignedCol INTEGER UNSIGNED NOT NULL,
bigintCol BIGINT NOT NULL,
floatCol FLOAT NOT NULL,
doubleCol DOUBLE NOT NULL,
decimalCol DECIMAL NOT NULL,
dateCol DATE NOT NULL,
datetimeCol DATETIME NOT NULL,
timestampCol TIMESTAMP NOT NULL,
timeCol TIME NOT NULL,
yearCol YEAR NOT NULL,
varcharCol VARCHAR(255) NOT NULL,
charCol CHAR(10) NOT NULL,
binaryCol BINARY(64) NOT NULL,
varbinaryCol VARBINARY(128) NOT NULL,
tinyblobCol TINYBLOB NOT NULL,
blobCol BLOB NOT NULL,
mediumblobCol MEDIUMBLOB NOT NULL ,
longblobCol LONGBLOB NOT NULL,
textCol TEXT NOT NULL,
mediumtextCol MEDIUMTEXT NOT NULL,
longtextCol LONGTEXT NOT NULL,
enumCol ENUM('Value1', 'Value2', 'Value3') NOT NULL,
setCol SET('Option1', 'Option2', 'Option3') NOT NULL,
jsonCol JSON NOT NULL
CHECK (JSON_VALID(jsonCol))
)
"""
connection.createStatement().execute(createTableQuery.trimIndent())
@Language("SQL")
val createTableQuery2 = """
CREATE TABLE IF NOT EXISTS table2 (
id INT AUTO_INCREMENT PRIMARY KEY,
bitCol BIT,
tinyintCol TINYINT,
smallintCol SMALLINT,
mediumintCol MEDIUMINT,
mediumintUnsignedCol MEDIUMINT UNSIGNED,
integerCol INTEGER,
intCol INT,
integerUnsignedCol INTEGER UNSIGNED,
bigintCol BIGINT,
floatCol FLOAT,
doubleCol DOUBLE,
decimalCol DECIMAL,
dateCol DATE,
datetimeCol DATETIME,
timestampCol TIMESTAMP,
timeCol TIME,
yearCol YEAR,
varcharCol VARCHAR(255),
charCol CHAR(10),
binaryCol BINARY(64),
varbinaryCol VARBINARY(128),
tinyblobCol TINYBLOB,
blobCol BLOB,
mediumblobCol MEDIUMBLOB,
longblobCol LONGBLOB,
textCol TEXT,
mediumtextCol MEDIUMTEXT,
longtextCol LONGTEXT,
enumCol ENUM('Value1', 'Value2', 'Value3'),
setCol SET('Option1', 'Option2', 'Option3')
)
"""
connection.createStatement().execute(createTableQuery2.trimIndent())
@Language("SQL")
val insertData1 =
"""
INSERT INTO table1 (
bitCol, tinyintCol, smallintCol, mediumintCol, mediumintUnsignedCol, integerCol, intCol,
integerUnsignedCol, bigintCol, floatCol, doubleCol, decimalCol, dateCol, datetimeCol, timestampCol,
timeCol, yearCol, varcharCol, charCol, binaryCol, varbinaryCol, tinyblobCol, blobCol,
mediumblobCol, longblobCol, textCol, mediumtextCol, longtextCol, enumCol, setCol, jsonCol
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""".trimIndent()
@Language("SQL")
val insertData2 =
"""
INSERT INTO table2 (
bitCol, tinyintCol, smallintCol, mediumintCol, mediumintUnsignedCol, integerCol, intCol,
integerUnsignedCol, bigintCol, floatCol, doubleCol, decimalCol, dateCol, datetimeCol, timestampCol,
timeCol, yearCol, varcharCol, charCol, binaryCol, varbinaryCol, tinyblobCol, blobCol,
mediumblobCol, longblobCol, textCol, mediumtextCol, longtextCol, enumCol, setCol
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""".trimIndent()
connection.prepareStatement(insertData1).use { st ->
// Insert data into table1
for (i in 1..3) {
st.setBoolean(1, true)
st.setByte(2, i.toByte())
st.setShort(3, (i * 10).toShort())
st.setInt(4, i * 100)
st.setInt(5, i * 100)
st.setInt(6, i * 100)
st.setInt(7, i * 100)
st.setInt(8, i * 100)
st.setInt(9, i * 100)
st.setFloat(10, i * 10.0f)
st.setDouble(11, i * 10.0)
st.setBigDecimal(12, BigDecimal(i * 10))
st.setDate(13, java.sql.Date(System.currentTimeMillis()))
st.setTimestamp(14, java.sql.Timestamp(System.currentTimeMillis()))
st.setTimestamp(15, java.sql.Timestamp(System.currentTimeMillis()))
st.setTime(16, java.sql.Time(System.currentTimeMillis()))
st.setInt(17, 2023)
st.setString(18, "varcharValue$i")
st.setString(19, "charValue$i")
st.setBytes(20, "binaryValue".toByteArray())
st.setBytes(21, "varbinaryValue".toByteArray())
st.setBytes(22, "tinyblobValue".toByteArray())
st.setBytes(23, "blobValue".toByteArray())
st.setBytes(24, "mediumblobValue".toByteArray())
st.setBytes(25, "longblobValue".toByteArray())
st.setString(26, "textValue$i")
st.setString(27, "mediumtextValue$i")
st.setString(28, "longtextValue$i")
st.setString(29, "Value$i")
st.setString(30, "Option$i")
st.setString(31, JSON_STRING)
st.executeUpdate()
}
}
connection.prepareStatement(insertData2).use { st ->
// Insert data into table2
for (i in 1..3) {
st.setBoolean(1, false)
st.setByte(2, (i * 2).toByte())
st.setShort(3, (i * 20).toShort())
st.setInt(4, i * 200)
st.setInt(5, i * 200)
st.setInt(6, i * 200)
st.setInt(7, i * 200)
st.setInt(8, i * 200)
st.setInt(9, i * 200)
st.setFloat(10, i * 20.0f)
st.setDouble(11, i * 20.0)
st.setBigDecimal(12, BigDecimal(i * 20))
st.setDate(13, java.sql.Date(System.currentTimeMillis()))
st.setTimestamp(14, java.sql.Timestamp(System.currentTimeMillis()))
st.setTimestamp(15, java.sql.Timestamp(System.currentTimeMillis()))
st.setTime(16, java.sql.Time(System.currentTimeMillis()))
st.setInt(17, 2023)
st.setString(18, "varcharValue$i")
st.setString(19, "charValue$i")
st.setBytes(20, "binaryValue".toByteArray())
st.setBytes(21, "varbinaryValue".toByteArray())
st.setBytes(22, "tinyblobValue".toByteArray())
st.setBytes(23, "blobValue".toByteArray())
st.setBytes(24, "mediumblobValue".toByteArray())
st.setBytes(25, "longblobValue".toByteArray())
st.setString(26, null)
st.setString(27, null)
st.setString(28, "longtextValue$i")
st.setString(29, "Value$i")
st.setString(30, "Option$i")
st.executeUpdate()
}
}
}
@AfterClass
@JvmStatic
fun tearDownClass() {
try {
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table1") }
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table2") }
connection.createStatement().use { st -> st.execute("DROP DATABASE IF EXISTS $TEST_DATABASE_NAME") }
connection.close()
} catch (e: SQLException) {
e.printStackTrace()
}
}
}
@Test
fun `basic test for reading sql tables`() {
val df1 = DataFrame.readSqlTable(connection, "table1").cast<Table1MariaDb>()
val result = df1.filter { it[Table1MariaDb::id] == 1 }
result[0][26] shouldBe "textValue1"
val byteArray = "tinyblobValue".toByteArray()
result[0][22] shouldBe byteArray
val schema = DataFrameSchema.readSqlTable(connection, "table1")
schema.columns["id"]!!.type shouldBe typeOf<Int>()
schema.columns["textCol"]!!.type shouldBe typeOf<String>()
schema.columns["varbinaryCol"]!!.type shouldBe typeOf<ByteArray>()
schema.columns["binaryCol"]!!.type shouldBe typeOf<ByteArray>()
schema.columns["longblobCol"]!!.type shouldBe typeOf<Blob>()
schema.columns["tinyblobCol"]!!.type shouldBe typeOf<Blob>()
schema.columns["dateCol"]!!.type shouldBe typeOf<Date>()
schema.columns["datetimeCol"]!!.type shouldBe typeOf<java.sql.Timestamp>()
schema.columns["timestampCol"]!!.type shouldBe typeOf<java.sql.Timestamp>()
schema.columns["timeCol"]!!.type shouldBe typeOf<java.sql.Time>()
schema.columns["yearCol"]!!.type shouldBe typeOf<Date>()
val df2 = DataFrame.readSqlTable(connection, "table2").cast<Table2MariaDb>()
val result2 = df2.filter { it[Table2MariaDb::id] == 1 }
result2[0][26] shouldBe null
val schema2 = DataFrameSchema.readSqlTable(connection, "table2")
schema2.columns["id"]!!.type shouldBe typeOf<Int>()
schema2.columns["textCol"]!!.type shouldBe typeOf<String?>()
}
@Test
fun `read from sql query`() {
@Language("SQL")
val sqlQuery =
"""
SELECT
t1.id,
t1.enumCol,
t2.setCol
FROM table1 t1
JOIN table2 t2 ON t1.id = t2.id
""".trimIndent()
val df = DataFrame.readSqlQuery(connection, sqlQuery = sqlQuery).cast<Table3MariaDb>()
val result = df.filter { it[Table3MariaDb::id] == 1 }
result[0][2] shouldBe "Option1"
val schema = DataFrameSchema.readSqlQuery(connection, sqlQuery = sqlQuery)
schema.columns["id"]!!.type shouldBe typeOf<Int>()
schema.columns["enumCol"]!!.type shouldBe typeOf<String>()
schema.columns["setCol"]!!.type shouldBe typeOf<String?>()
}
@Test
fun `read from all tables`() {
val dataframes = DataFrame.readAllSqlTables(connection, TEST_DATABASE_NAME, 1000).values.toList()
val table1Df = dataframes[0].cast<Table1MariaDb>()
table1Df.rowsCount() shouldBe 3
table1Df.filter { it[Table1MariaDb::integerCol] > 100 }.rowsCount() shouldBe 2
table1Df[0][11] shouldBe 10.0
table1Df[0][26] shouldBe "textValue1"
table1Df[0][31] shouldBe JSON_STRING // TODO: https://github.com/Kotlin/dataframe/issues/462
val table2Df = dataframes[1].cast<Table2MariaDb>()
table2Df.rowsCount() shouldBe 3
table2Df.filter {
it[Table2MariaDb::integerCol] != null && it[Table2MariaDb::integerCol]!! > 400
}.rowsCount() shouldBe 1
table2Df[0][11] shouldBe 20.0
table2Df[0][26] shouldBe null
}
@Test
fun `reading numeric types`() {
val df1 = DataFrame.readSqlTable(connection, "table1").cast<Table1MariaDb>()
val result = df1.select("tinyintCol")
.add("tinyintCol2") { it[Table1MariaDb::tinyintCol] }
result[0][1] shouldBe 1
val result1 = df1.select("smallintCol")
.add("smallintCol2") { it[Table1MariaDb::smallintCol] }
result1[0][1] shouldBe 10
val result2 = df1.select("mediumintCol")
.add("mediumintCol2") { it[Table1MariaDb::mediumintCol] }
result2[0][1] shouldBe 100
val result3 = df1.select("mediumintUnsignedCol")
.add("mediumintUnsignedCol2") { it[Table1MariaDb::mediumintUnsignedCol] }
result3[0][1] shouldBe 100
val result4 = df1.select("integerUnsignedCol")
.add("integerUnsignedCol2") { it[Table1MariaDb::integerUnsignedCol] }
result4[0][1] shouldBe 100L
val result5 = df1.select("bigintCol")
.add("bigintCol2") { it[Table1MariaDb::bigintCol] }
result5[0][1] shouldBe 100
val result6 = df1.select("floatCol")
.add("floatCol2") { it[Table1MariaDb::floatCol] }
result6[0][1] shouldBe 10.0f
val result7 = df1.select("doubleCol")
.add("doubleCol2") { it[Table1MariaDb::doubleCol] }
result7[0][1] shouldBe 10.0
val result8 = df1.select("decimalCol")
.add("decimalCol2") { it[Table1MariaDb::decimalCol] }
result8[0][1] shouldBe BigDecimal("10")
val schema = DataFrameSchema.readSqlTable(connection, "table1")
schema.columns["tinyintCol"]!!.type shouldBe typeOf<Int>()
schema.columns["smallintCol"]!!.type shouldBe typeOf<Short?>()
schema.columns["mediumintCol"]!!.type shouldBe typeOf<Int>()
schema.columns["mediumintUnsignedCol"]!!.type shouldBe typeOf<Int>()
schema.columns["integerUnsignedCol"]!!.type shouldBe typeOf<Long>()
schema.columns["bigintCol"]!!.type shouldBe typeOf<Long>()
schema.columns["floatCol"]!!.type shouldBe typeOf<Float>()
schema.columns["doubleCol"]!!.type shouldBe typeOf<Double>()
schema.columns["decimalCol"]!!.type shouldBe typeOf<BigDecimal>()
}
@Test
fun `infer nullability`() {
inferNullability(connection)
}
}
@@ -0,0 +1,298 @@
package org.jetbrains.kotlinx.dataframe.io.local
import io.kotest.matchers.shouldBe
import org.intellij.lang.annotations.Language
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.api.cast
import org.jetbrains.kotlinx.dataframe.api.filter
import org.jetbrains.kotlinx.dataframe.io.inferNullability
import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
import org.jetbrains.kotlinx.dataframe.io.readSqlQuery
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import org.junit.AfterClass
import org.junit.BeforeClass
import org.junit.Ignore
import org.junit.Test
import java.math.BigDecimal
import java.sql.Connection
import java.sql.DriverManager
import java.sql.SQLException
import java.util.Date
import java.util.UUID
import kotlin.reflect.typeOf
private const val URL = "jdbc:sqlserver://localhost:1433;encrypt=true;trustServerCertificate=true"
private const val USER_NAME = "root"
private const val PASSWORD = "pass"
private const val TEST_DATABASE_NAME = "testKDFdatabase"
@DataSchema
interface Table1MSSSQL {
val id: Int
val bigintColumn: Long
val binaryColumn: ByteArray
val bitColumn: Boolean
val charColumn: Char
val dateColumn: Date
val datetime3Column: java.sql.Timestamp
val datetime2Column: java.sql.Timestamp
val datetimeoffset2Column: String
val decimalColumn: BigDecimal
val floatColumn: Double
val imageColumn: ByteArray?
val intColumn: Int
val moneyColumn: BigDecimal
val ncharColumn: Char
val ntextColumn: String
val numericColumn: BigDecimal
val nvarcharColumn: String
val nvarcharMaxColumn: String
val realColumn: Float
val smalldatetimeColumn: java.sql.Timestamp
val smallintColumn: Int
val smallmoneyColumn: BigDecimal
val timeColumn: java.sql.Time
val timestampColumn: java.sql.Timestamp
val tinyintColumn: Int
val uniqueidentifierColumn: Char
val varbinaryColumn: ByteArray
val varbinaryMaxColumn: ByteArray
val varcharColumn: String
val varcharMaxColumn: String
val xmlColumn: String
val sqlvariantColumn: String
val geometryColumn: String
val geographyColumn: String
}
@Ignore
class MSSQLTest {
companion object {
private lateinit var connection: Connection
@BeforeClass
@JvmStatic
fun setUpClass() {
connection = DriverManager.getConnection(URL, USER_NAME, PASSWORD)
connection.createStatement().use { st ->
// Drop the test database if it exists
val dropDatabaseQuery = "IF DB_ID('$TEST_DATABASE_NAME') IS NOT NULL\n" +
"DROP DATABASE $TEST_DATABASE_NAME"
st.executeUpdate(dropDatabaseQuery)
// Create the test database
val createDatabaseQuery = "CREATE DATABASE $TEST_DATABASE_NAME"
st.executeUpdate(createDatabaseQuery)
// Use the newly created database
val useDatabaseQuery = "USE $TEST_DATABASE_NAME"
st.executeUpdate(useDatabaseQuery)
}
@Language("SQL")
val createTableQuery = """
CREATE TABLE Table1 (
id INT NOT NULL IDENTITY PRIMARY KEY,
bigintColumn BIGINT,
binaryColumn BINARY(50),
bitColumn BIT,
charColumn CHAR(10),
dateColumn DATE,
datetime3Column DATETIME2(3),
datetime2Column DATETIME2,
datetimeoffset2Column DATETIMEOFFSET(2),
decimalColumn DECIMAL(10,2),
floatColumn FLOAT,
imageColumn IMAGE,
intColumn INT,
moneyColumn MONEY,
ncharColumn NCHAR(10),
ntextColumn NTEXT,
numericColumn NUMERIC(10,2),
nvarcharColumn NVARCHAR(50),
nvarcharMaxColumn NVARCHAR(MAX),
realColumn REAL,
smalldatetimeColumn SMALLDATETIME,
smallintColumn SMALLINT,
smallmoneyColumn SMALLMONEY,
textColumn TEXT,
timeColumn TIME,
timestampColumn DATETIME2,
tinyintColumn TINYINT,
uniqueidentifierColumn UNIQUEIDENTIFIER,
varbinaryColumn VARBINARY(50),
varbinaryMaxColumn VARBINARY(MAX),
varcharColumn VARCHAR(50),
varcharMaxColumn VARCHAR(MAX),
xmlColumn XML,
sqlvariantColumn SQL_VARIANT,
geometryColumn GEOMETRY,
geographyColumn GEOGRAPHY
);
"""
connection.createStatement().execute(createTableQuery.trimIndent())
@Language("SQL")
val insertData1 =
"""
INSERT INTO Table1 (
bigintColumn, binaryColumn, bitColumn, charColumn, dateColumn, datetime3Column, datetime2Column,
datetimeoffset2Column, decimalColumn, floatColumn, imageColumn, intColumn, moneyColumn, ncharColumn,
ntextColumn, numericColumn, nvarcharColumn, nvarcharMaxColumn, realColumn, smalldatetimeColumn,
smallintColumn, smallmoneyColumn, textColumn, timeColumn, timestampColumn, tinyintColumn,
uniqueidentifierColumn, varbinaryColumn, varbinaryMaxColumn, varcharColumn, varcharMaxColumn,
xmlColumn, sqlvariantColumn, geometryColumn, geographyColumn
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""".trimIndent()
connection.prepareStatement(insertData1).use { st ->
for (i in 1..5) {
st.setLong(1, 123456789012345L) // bigintColumn
st.setBytes(2, byteArrayOf(0x01, 0x23, 0x45, 0x67, 0x67, 0x67, 0x67, 0x67)) // binaryColumn
st.setBoolean(3, true) // bitColumn
st.setString(4, "Sample") // charColumn
st.setDate(5, java.sql.Date(System.currentTimeMillis())) // dateColumn
st.setTimestamp(6, java.sql.Timestamp(System.currentTimeMillis())) // datetime3Column
st.setTimestamp(7, java.sql.Timestamp(System.currentTimeMillis())) // datetime2Column
st.setTimestamp(8, java.sql.Timestamp(System.currentTimeMillis())) // datetimeoffset2Column
st.setBigDecimal(9, BigDecimal("12345.67")) // decimalColumn
st.setFloat(10, 123.45f) // floatColumn
st.setNull(11, java.sql.Types.NULL) // imageColumn (assuming nullable)
st.setInt(12, 123456) // intColumn
st.setBigDecimal(13, BigDecimal("123.45")) // moneyColumn
st.setString(14, "Sample") // ncharColumn
st.setString(15, "Sample$i text") // ntextColumn
st.setBigDecimal(16, BigDecimal("1234.56")) // numericColumn
st.setString(17, "Sample") // nvarcharColumn
st.setString(18, "Sample$i text") // nvarcharMaxColumn
st.setFloat(19, 123.45f) // realColumn
st.setTimestamp(20, java.sql.Timestamp(System.currentTimeMillis())) // smalldatetimeColumn
st.setInt(21, 123) // smallintColumn
st.setBigDecimal(22, BigDecimal("123.45")) // smallmoneyColumn
st.setString(23, "Sample$i text") // textColumn
st.setTime(24, java.sql.Time(System.currentTimeMillis())) // timeColumn
st.setTimestamp(25, java.sql.Timestamp(System.currentTimeMillis())) // timestampColumn
st.setInt(26, 123) // tinyintColumn
// st.setObject(27, null) // udtColumn (assuming nullable)
st.setObject(27, UUID.randomUUID()) // uniqueidentifierColumn
st.setBytes(28, byteArrayOf(0x01, 0x23, 0x45, 0x67, 0x67, 0x67, 0x67, 0x67)) // varbinaryColumn
st.setBytes(29, byteArrayOf(0x01, 0x23, 0x45, 0x67, 0x67, 0x67, 0x67, 0x67)) // varbinaryMaxColumn
st.setString(30, "Sample$i") // varcharColumn
st.setString(31, "Sample$i text") // varcharMaxColumn
st.setString(32, "<xml>Sample$i</xml>") // xmlColumn
st.setString(33, "SQL_VARIANT") // sqlvariantColumn
st.setBytes(
34,
@Suppress("ktlint:standard:argument-list-wrapping")
byteArrayOf(
0xE6.toByte(), 0x10, 0x00, 0x00, 0x01, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
0x44, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x05, 0x4C, 0x0,
),
) // geometryColumn
st.setString(35, "POINT(1 1)") // geographyColumn
st.executeUpdate()
}
}
}
@AfterClass
@JvmStatic
fun tearDownClass() {
try {
connection.createStatement().use { st -> st.execute("DROP DATABASE IF EXISTS $TEST_DATABASE_NAME") }
connection.close()
} catch (e: SQLException) {
e.printStackTrace()
}
}
}
@Test
fun `basic test for reading sql tables`() {
val df1 = DataFrame.readSqlTable(connection, "table1", limit = 5).cast<Table1MSSSQL>()
val result = df1.filter { it[Table1MSSSQL::id] == 1 }
result[0][30] shouldBe "Sample1"
result[0][Table1MSSSQL::bigintColumn] shouldBe 123456789012345L
result[0][Table1MSSSQL::bitColumn] shouldBe true
result[0][Table1MSSSQL::intColumn] shouldBe 123456
result[0][Table1MSSSQL::ntextColumn] shouldBe "Sample1 text"
val schema = DataFrameSchema.readSqlTable(connection, "table1")
schema.columns["id"]!!.type shouldBe typeOf<Int>()
schema.columns["bigintColumn"]!!.type shouldBe typeOf<Long?>()
schema.columns["binaryColumn"]!!.type shouldBe typeOf<ByteArray?>()
schema.columns["bitColumn"]!!.type shouldBe typeOf<Boolean?>()
schema.columns["charColumn"]!!.type shouldBe typeOf<Char?>()
schema.columns["dateColumn"]!!.type shouldBe typeOf<Date?>()
schema.columns["datetime3Column"]!!.type shouldBe typeOf<java.sql.Timestamp?>()
schema.columns["datetime2Column"]!!.type shouldBe typeOf<java.sql.Timestamp?>()
schema.columns["datetimeoffset2Column"]!!.type shouldBe typeOf<String?>()
schema.columns["decimalColumn"]!!.type shouldBe typeOf<BigDecimal?>()
schema.columns["floatColumn"]!!.type shouldBe typeOf<Double?>()
schema.columns["imageColumn"]!!.type shouldBe typeOf<ByteArray?>()
schema.columns["intColumn"]!!.type shouldBe typeOf<Int?>()
schema.columns["moneyColumn"]!!.type shouldBe typeOf<BigDecimal?>()
schema.columns["ncharColumn"]!!.type shouldBe typeOf<Char?>()
schema.columns["ntextColumn"]!!.type shouldBe typeOf<String?>()
schema.columns["numericColumn"]!!.type shouldBe typeOf<BigDecimal?>()
schema.columns["nvarcharColumn"]!!.type shouldBe typeOf<String?>()
schema.columns["nvarcharMaxColumn"]!!.type shouldBe typeOf<String?>()
schema.columns["realColumn"]!!.type shouldBe typeOf<Float?>()
schema.columns["smalldatetimeColumn"]!!.type shouldBe typeOf<java.sql.Timestamp?>()
schema.columns["smallintColumn"]!!.type shouldBe typeOf<Int?>()
schema.columns["smallmoneyColumn"]!!.type shouldBe typeOf<BigDecimal?>()
schema.columns["timeColumn"]!!.type shouldBe typeOf<java.sql.Time?>()
schema.columns["timestampColumn"]!!.type shouldBe typeOf<java.sql.Timestamp?>()
schema.columns["tinyintColumn"]!!.type shouldBe typeOf<Int?>()
schema.columns["uniqueidentifierColumn"]!!.type shouldBe typeOf<Char?>()
schema.columns["varbinaryColumn"]!!.type shouldBe typeOf<ByteArray?>()
schema.columns["varbinaryMaxColumn"]!!.type shouldBe typeOf<ByteArray?>()
schema.columns["varcharColumn"]!!.type shouldBe typeOf<String?>()
schema.columns["varcharMaxColumn"]!!.type shouldBe typeOf<String?>()
schema.columns["xmlColumn"]!!.type shouldBe typeOf<String?>()
schema.columns["sqlvariantColumn"]!!.type shouldBe typeOf<String?>()
schema.columns["geometryColumn"]!!.type shouldBe typeOf<String?>()
schema.columns["geographyColumn"]!!.type shouldBe typeOf<String?>()
}
@Test
fun `read from sql query`() {
@Language("SQL")
val sqlQuery =
"""
SELECT
Table1.id,
Table1.bigintColumn
FROM Table1
""".trimIndent()
val df = DataFrame.readSqlQuery(connection, sqlQuery = sqlQuery, limit = 3).cast<Table1MSSSQL>()
val result = df.filter { it[Table1MSSSQL::id] == 1 }
result[0][Table1MSSSQL::bigintColumn] shouldBe 123456789012345L
val schema = DataFrameSchema.readSqlQuery(connection, sqlQuery = sqlQuery)
schema.columns["id"]!!.type shouldBe typeOf<Int>()
schema.columns["bigintColumn"]!!.type shouldBe typeOf<Long?>()
}
@Test
fun `read from all tables`() {
val dataframes = DataFrame.readAllSqlTables(connection, TEST_DATABASE_NAME, 4).values.toList()
val table1Df = dataframes[0].cast<Table1MSSSQL>()
table1Df.rowsCount() shouldBe 4
table1Df.filter { it[Table1MSSSQL::id] > 2 }.rowsCount() shouldBe 2
table1Df[0][Table1MSSSQL::bigintColumn] shouldBe 123456789012345L
}
@Test
fun `infer nullability`() {
inferNullability(connection)
}
}
@@ -0,0 +1,479 @@
package org.jetbrains.kotlinx.dataframe.io.local
import io.kotest.matchers.shouldBe
import org.intellij.lang.annotations.Language
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.api.add
import org.jetbrains.kotlinx.dataframe.api.cast
import org.jetbrains.kotlinx.dataframe.api.filter
import org.jetbrains.kotlinx.dataframe.api.select
import org.jetbrains.kotlinx.dataframe.io.inferNullability
import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
import org.jetbrains.kotlinx.dataframe.io.readSqlQuery
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import org.junit.AfterClass
import org.junit.BeforeClass
import org.junit.Ignore
import org.junit.Test
import java.math.BigDecimal
import java.sql.Connection
import java.sql.DriverManager
import java.sql.SQLException
import java.util.Date
import kotlin.reflect.typeOf
private const val URL = "jdbc:mysql://localhost:3306"
private const val USER_NAME = "root"
private const val PASSWORD = "pass"
private const val TEST_DATABASE_NAME = "testKDFdatabase"
private const val TIMESTAMP = 1726246245460
@DataSchema
interface Table1MySql {
val id: Int
val bitCol: Boolean
val tinyintCol: Int
val smallintCol: Int
val mediumintCol: Int
val mediumintUnsignedCol: Int
val integerCol: Int
val intCol: Int
val integerUnsignedCol: Long
val bigintCol: Long
val floatCol: Float
val doubleCol: Double
val decimalCol: BigDecimal
val dateCol: String
val datetimeCol: String
val timestampCol: String
val timeCol: String
val yearCol: String
val varcharCol: String
val charCol: String
val binaryCol: ByteArray
val varbinaryCol: ByteArray
val tinyblobCol: ByteArray
val blobCol: ByteArray
val mediumblobCol: ByteArray
val longblobCol: ByteArray
val textCol: String
val mediumtextCol: String
val longtextCol: String
val enumCol: String
val setCol: Char
}
@DataSchema
interface Table2MySql {
val id: Int
val bitCol: Boolean?
val tinyintCol: Int?
val smallintCol: Int?
val mediumintCol: Int?
val mediumintUnsignedCol: Int?
val integerCol: Int?
val intCol: Int?
val integerUnsignedCol: Long?
val bigintCol: Long?
val floatCol: Float?
val doubleCol: Double?
val decimalCol: Double?
val dateCol: String?
val datetimeCol: String?
val timestampCol: String?
val timeCol: String?
val yearCol: String?
val varcharCol: String?
val charCol: String?
val binaryCol: ByteArray?
val varbinaryCol: ByteArray?
val tinyblobCol: ByteArray?
val blobCol: ByteArray?
val mediumblobCol: ByteArray?
val longblobCol: ByteArray?
val textCol: String?
val mediumtextCol: String?
val longtextCol: String?
val enumCol: String?
val setCol: Char?
val jsonCol: String?
}
@DataSchema
interface Table3MySql {
val id: Int
val enumCol: String
val setCol: Char?
}
@Ignore
class MySqlTest {
companion object {
private lateinit var connection: Connection
@BeforeClass
@JvmStatic
fun setUpClass() {
connection = DriverManager.getConnection(URL, USER_NAME, PASSWORD)
connection.createStatement().use { st ->
// Drop the test database if it exists
val dropDatabaseQuery = "DROP DATABASE IF EXISTS $TEST_DATABASE_NAME"
st.executeUpdate(dropDatabaseQuery)
// Create the test database
val createDatabaseQuery = "CREATE DATABASE $TEST_DATABASE_NAME"
st.executeUpdate(createDatabaseQuery)
// Use the newly created database
val useDatabaseQuery = "USE $TEST_DATABASE_NAME"
st.executeUpdate(useDatabaseQuery)
}
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table1") }
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table2") }
@Language("SQL")
val createTableQuery = """
CREATE TABLE IF NOT EXISTS table1 (
id INT AUTO_INCREMENT PRIMARY KEY,
bitCol BIT NOT NULL,
tinyintCol TINYINT NOT NULL,
smallintCol SMALLINT NOT NULL,
mediumintCol MEDIUMINT NOT NULL,
mediumintUnsignedCol MEDIUMINT UNSIGNED NOT NULL,
integerCol INTEGER NOT NULL,
intCol INT NOT NULL,
integerUnsignedCol INTEGER UNSIGNED NOT NULL,
bigintCol BIGINT NOT NULL,
floatCol FLOAT NOT NULL,
doubleCol DOUBLE NOT NULL,
decimalCol DECIMAL NOT NULL,
dateCol DATE NOT NULL,
datetimeCol DATETIME NOT NULL,
timestampCol TIMESTAMP NOT NULL,
timeCol TIME NOT NULL,
yearCol YEAR NOT NULL,
varcharCol VARCHAR(255) NOT NULL,
charCol CHAR(10) NOT NULL,
binaryCol BINARY(64) NOT NULL,
varbinaryCol VARBINARY(128) NOT NULL,
tinyblobCol TINYBLOB NOT NULL,
blobCol BLOB NOT NULL,
mediumblobCol MEDIUMBLOB NOT NULL ,
longblobCol LONGBLOB NOT NULL,
textCol TEXT NOT NULL,
mediumtextCol MEDIUMTEXT NOT NULL,
longtextCol LONGTEXT NOT NULL,
enumCol ENUM('Value1', 'Value2', 'Value3') NOT NULL,
setCol SET('Option1', 'Option2', 'Option3') NOT NULL,
location GEOMETRY,
data JSON
CHECK (JSON_VALID(data))
)
"""
connection.createStatement().execute(createTableQuery.trimIndent())
@Language("SQL")
val createTableQuery2 = """
CREATE TABLE IF NOT EXISTS table2 (
id INT AUTO_INCREMENT PRIMARY KEY,
bitCol BIT,
tinyintCol TINYINT,
smallintCol SMALLINT,
mediumintCol MEDIUMINT,
mediumintUnsignedCol MEDIUMINT UNSIGNED,
integerCol INTEGER,
intCol INT,
integerUnsignedCol INTEGER UNSIGNED,
bigintCol BIGINT,
floatCol FLOAT,
doubleCol DOUBLE,
decimalCol DECIMAL,
dateCol DATE,
datetimeCol DATETIME,
timestampCol TIMESTAMP,
timeCol TIME,
yearCol YEAR,
varcharCol VARCHAR(255),
charCol CHAR(10),
binaryCol BINARY(64),
varbinaryCol VARBINARY(128),
tinyblobCol TINYBLOB,
blobCol BLOB,
mediumblobCol MEDIUMBLOB,
longblobCol LONGBLOB,
textCol TEXT,
mediumtextCol MEDIUMTEXT,
longtextCol LONGTEXT,
enumCol ENUM('Value1', 'Value2', 'Value3'),
setCol SET('Option1', 'Option2', 'Option3'),
location GEOMETRY,
data JSON
CHECK (JSON_VALID(data))
)
"""
connection.createStatement().execute(createTableQuery2.trimIndent())
@Language("SQL")
val insertData1 =
"""
INSERT INTO table1 (
bitCol, tinyintCol, smallintCol, mediumintCol, mediumintUnsignedCol, integerCol, intCol,
integerUnsignedCol, bigintCol, floatCol, doubleCol, decimalCol, dateCol, datetimeCol, timestampCol,
timeCol, yearCol, varcharCol, charCol, binaryCol, varbinaryCol, tinyblobCol, blobCol,
mediumblobCol, longblobCol, textCol, mediumtextCol, longtextCol, enumCol, setCol, location, data
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ST_GeomFromText('POINT(1 1)'), ?)
""".trimIndent()
@Language("SQL")
val insertData2 =
"""
INSERT INTO table2 (
bitCol, tinyintCol, smallintCol, mediumintCol, mediumintUnsignedCol, integerCol, intCol,
integerUnsignedCol, bigintCol, floatCol, doubleCol, decimalCol, dateCol, datetimeCol, timestampCol,
timeCol, yearCol, varcharCol, charCol, binaryCol, varbinaryCol, tinyblobCol, blobCol,
mediumblobCol, longblobCol, textCol, mediumtextCol, longtextCol, enumCol, setCol, location, data
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ST_GeomFromText('POINT(1 1)'), ?)
""".trimIndent()
connection.prepareStatement(insertData1).use { st ->
// Insert data into table1
for (i in 1..3) {
st.setBoolean(1, true)
st.setByte(2, i.toByte())
st.setShort(3, (i * 10).toShort())
st.setInt(4, i * 100)
st.setInt(5, i * 100)
st.setInt(6, i * 100)
st.setInt(7, i * 100)
st.setInt(8, i * 100)
st.setInt(9, i * 100)
st.setFloat(10, i * 10.0f)
st.setDouble(11, i * 10.0)
st.setBigDecimal(12, BigDecimal(i * 10))
st.setDate(13, java.sql.Date(TIMESTAMP))
st.setTimestamp(14, java.sql.Timestamp(TIMESTAMP))
st.setTimestamp(15, java.sql.Timestamp(TIMESTAMP))
st.setTime(16, java.sql.Time(TIMESTAMP))
st.setInt(17, 2023)
st.setString(18, "varcharValue$i")
st.setString(19, "charValue$i")
st.setBytes(20, "binaryValue".toByteArray())
st.setBytes(21, "varbinaryValue".toByteArray())
st.setBytes(22, "tinyblobValue".toByteArray())
st.setBytes(23, "blobValue".toByteArray())
st.setBytes(24, "mediumblobValue".toByteArray())
st.setBytes(25, "longblobValue".toByteArray())
st.setString(26, "textValue$i")
st.setString(27, "mediumtextValue$i")
st.setString(28, "longtextValue$i")
st.setString(29, "Value$i")
st.setString(30, "Option$i")
st.setString(31, "{\"key\": \"value\"}")
st.executeUpdate()
}
}
connection.prepareStatement(insertData2).use { st ->
// Insert data into table2
for (i in 1..3) {
st.setBoolean(1, false)
st.setByte(2, (i * 2).toByte())
st.setShort(3, (i * 20).toShort())
st.setInt(4, i * 200)
st.setInt(5, i * 200)
st.setInt(6, i * 200)
st.setInt(7, i * 200)
st.setInt(8, i * 200)
st.setInt(9, i * 200)
st.setFloat(10, i * 20.0f)
st.setDouble(11, i * 20.0)
st.setBigDecimal(12, BigDecimal(i * 20))
st.setDate(13, java.sql.Date(TIMESTAMP))
st.setTimestamp(14, java.sql.Timestamp(TIMESTAMP))
st.setTimestamp(15, java.sql.Timestamp(TIMESTAMP))
st.setTime(16, java.sql.Time(TIMESTAMP))
st.setInt(17, 2023)
st.setString(18, "varcharValue$i")
st.setString(19, "charValue$i")
st.setBytes(20, "binaryValue".toByteArray())
st.setBytes(21, "varbinaryValue".toByteArray())
st.setBytes(22, "tinyblobValue".toByteArray())
st.setBytes(23, "blobValue".toByteArray())
st.setBytes(24, "mediumblobValue".toByteArray())
st.setBytes(25, "longblobValue".toByteArray())
st.setString(26, null)
st.setString(27, null)
st.setString(28, "longtextValue$i")
st.setString(29, "Value$i")
st.setString(30, "Option$i")
st.setString(31, "{\"key\": \"value\"}")
st.executeUpdate()
}
}
}
@AfterClass
@JvmStatic
fun tearDownClass() {
try {
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table1") }
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table2") }
connection.createStatement().use { st -> st.execute("DROP DATABASE IF EXISTS $TEST_DATABASE_NAME") }
connection.close()
} catch (e: SQLException) {
e.printStackTrace()
}
}
}
@Test
fun `basic test for reading sql tables`() {
val df1 = DataFrame.readSqlTable(connection, "table1").cast<Table1MySql>()
val result = df1.filter { it[Table1MySql::id] == 1 }
result[0][26] shouldBe "textValue1"
result[0][22] shouldBe "tinyblobValue".toByteArray()
val schema = DataFrameSchema.readSqlTable(connection, "table1")
schema.columns["id"]!!.type shouldBe typeOf<Int>()
schema.columns["textCol"]!!.type shouldBe typeOf<String>()
schema.columns["dateCol"]!!.type shouldBe typeOf<Date>()
schema.columns["datetimeCol"]!!.type shouldBe typeOf<java.time.LocalDateTime>()
schema.columns["timestampCol"]!!.type shouldBe typeOf<java.sql.Timestamp>()
schema.columns["timeCol"]!!.type shouldBe typeOf<java.sql.Time>()
schema.columns["yearCol"]!!.type shouldBe typeOf<Date>()
schema.columns["textCol"]!!.type shouldBe typeOf<String>()
schema.columns["varbinaryCol"]!!.type shouldBe typeOf<ByteArray>()
schema.columns["binaryCol"]!!.type shouldBe typeOf<ByteArray>()
schema.columns["longblobCol"]!!.type shouldBe typeOf<ByteArray>()
schema.columns["tinyblobCol"]!!.type shouldBe typeOf<ByteArray>()
val df2 = DataFrame.readSqlTable(connection, "table2").cast<Table2MySql>()
val result2 = df2.filter { it[Table2MySql::id] == 1 }
result2[0][26] shouldBe null
val schema2 = DataFrameSchema.readSqlTable(connection, "table2")
schema2.columns["id"]!!.type shouldBe typeOf<Int>()
schema2.columns["textCol"]!!.type shouldBe typeOf<String?>()
}
@Test
fun `read from sql query`() {
@Language("SQL")
val sqlQuery =
"""
SELECT
t1.id,
t1.enumCol,
t2.setCol
FROM table1 t1
JOIN table2 t2 ON t1.id = t2.id
""".trimIndent()
val df = DataFrame.readSqlQuery(connection, sqlQuery = sqlQuery).cast<Table3MySql>()
val result = df.filter { it[Table3MySql::id] == 1 }
result[0][2] shouldBe "Option1"
val schema = DataFrameSchema.readSqlQuery(connection, sqlQuery = sqlQuery)
schema.columns["id"]!!.type shouldBe typeOf<Int>()
schema.columns["enumCol"]!!.type shouldBe typeOf<String>()
schema.columns["setCol"]!!.type shouldBe typeOf<String?>()
}
@Test
fun `read from all tables`() {
val dataframes = DataFrame.readAllSqlTables(connection).values.toList()
val table1Df = dataframes[0].cast<Table1MySql>()
table1Df.rowsCount() shouldBe 3
table1Df.filter { it[Table1MySql::integerCol] > 100 }.rowsCount() shouldBe 2
table1Df[0][11] shouldBe 10.0
table1Df[0][26] shouldBe "textValue1"
val table2Df = dataframes[1].cast<Table2MySql>()
table2Df.rowsCount() shouldBe 3
table2Df.filter {
it[Table2MySql::integerCol] != null && it[Table2MySql::integerCol]!! > 400
}.rowsCount() shouldBe 1
table2Df[0][11] shouldBe 20.0
table2Df[0][26] shouldBe null
}
@Test
fun `reading numeric types`() {
val df1 = DataFrame.readSqlTable(connection, "table1").cast<Table1MySql>()
val result = df1.select("tinyintCol").add("tinyintCol2") { it[Table1MySql::tinyintCol] }
result[0][1] shouldBe 1.toByte()
val result1 = df1.select("smallintCol")
.add("smallintCol2") { it[Table1MySql::smallintCol] }
result1[0][1] shouldBe 10.toShort()
val result2 = df1.select("mediumintCol")
.add("mediumintCol2") { it[Table1MySql::mediumintCol] }
result2[0][1] shouldBe 100
val result3 = df1.select("mediumintUnsignedCol")
.add("mediumintUnsignedCol2") { it[Table1MySql::mediumintUnsignedCol] }
result3[0][1] shouldBe 100
val result4 = df1.select("integerUnsignedCol")
.add("integerUnsignedCol2") { it[Table1MySql::integerUnsignedCol] }
result4[0][1] shouldBe 100L
val result5 = df1.select("bigintCol")
.add("bigintCol2") { it[Table1MySql::bigintCol] }
result5[0][1] shouldBe 100
val result6 = df1.select("floatCol")
.add("floatCol2") { it[Table1MySql::floatCol] }
result6[0][1] shouldBe 10.0f
val result7 = df1.select("doubleCol")
.add("doubleCol2") { it[Table1MySql::doubleCol] }
result7[0][1] shouldBe 10.0
val result8 = df1.select("decimalCol")
.add("decimalCol2") { it[Table1MySql::decimalCol] }
result8[0][1] shouldBe BigDecimal("10")
val schema = DataFrameSchema.readSqlTable(connection, "table1")
schema.columns["tinyintCol"]!!.type shouldBe typeOf<Int>()
schema.columns["smallintCol"]!!.type shouldBe typeOf<Int>()
schema.columns["mediumintCol"]!!.type shouldBe typeOf<Int>()
schema.columns["mediumintUnsignedCol"]!!.type shouldBe typeOf<Int>()
schema.columns["integerUnsignedCol"]!!.type shouldBe typeOf<Long>()
schema.columns["bigintCol"]!!.type shouldBe typeOf<Long>()
schema.columns["floatCol"]!!.type shouldBe typeOf<Float>()
schema.columns["doubleCol"]!!.type shouldBe typeOf<Double>()
schema.columns["decimalCol"]!!.type shouldBe typeOf<BigDecimal>()
// TODO: all unsigned types
// TODO: new mapping system based on class names
// validation after mapping in getObject
// getObject(i+1, type) catch getObject catch getString
// add direct mapping to getString and other methods
}
@Test
fun `infer nullability`() {
inferNullability(connection)
}
}
@@ -0,0 +1,108 @@
package org.jetbrains.kotlinx.dataframe.io.local
import io.kotest.assertions.throwables.shouldThrow
import io.kotest.matchers.shouldBe
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.cast
import org.jetbrains.kotlinx.dataframe.api.filter
import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig
import org.jetbrains.kotlinx.dataframe.io.readDataFrame
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
import org.junit.Ignore
import org.junit.Test
import java.sql.DriverManager
private const val URL_WITH_LOGIN_PASSWORD = "jdbc:postgresql://localhost:5432/test?" +
"user=postgres&password=pass&connectTimeout=10&tcpKeepAlive=true"
private const val URL_NO_LOGIN_PASSWORD = "jdbc:postgresql://localhost:5432/test?connectTimeout=10&tcpKeepAlive=true"
private const val URL_WITH_PASSWORD =
"jdbc:postgresql://localhost:5432/test?password=pass&connectTimeout=10&tcpKeepAlive=true"
private const val URL_WITH_LOGIN =
"jdbc:postgresql://localhost:5432/test?user=postgres&connectTimeout=10&tcpKeepAlive=true"
private const val TABLE_NAME = "table1"
@Ignore
class PostgresConnectionUrlTest {
@Test
fun `read from table with login and password in connection URL`() {
DriverManager.getConnection(URL_WITH_LOGIN_PASSWORD).use { connection ->
createTestData(connection)
val df1 = DataFrame.readSqlTable(connection, TABLE_NAME).cast<Table1>()
val result1 = df1.filter { it[Table1::id] == 1 }
result1[0][2] shouldBe 11
val df2 = connection.readDataFrame(TABLE_NAME).cast<Table1>()
val result2 = df2.filter { it[Table1::id] == 1 }
result2[0][2] shouldBe 11
clearTestData(connection)
}
}
@Test
fun `read from table with login and password in connection URL for DBConfig`() {
DriverManager.getConnection(URL_WITH_LOGIN_PASSWORD).use { connection ->
createTestData(connection)
val dbConfig = DbConnectionConfig(URL_WITH_LOGIN_PASSWORD)
val df1 = DataFrame.readSqlTable(dbConfig = dbConfig, TABLE_NAME).cast<Table1>()
val result1 = df1.filter { it[Table1::id] == 1 }
result1[0][2] shouldBe 11
val df2 = dbConfig.readDataFrame(TABLE_NAME).cast<Table1>()
val result2 = df2.filter { it[Table1::id] == 1 }
result2[0][2] shouldBe 11
clearTestData(connection)
}
}
@Test
fun `read from table without login and password`() {
val dbConfig = DbConnectionConfig(URL_NO_LOGIN_PASSWORD)
shouldThrow<org.postgresql.util.PSQLException> {
testReadFromTable(dbConfig)
}
}
@Test
fun `read from table with password only`() {
val dbConfig = DbConnectionConfig(URL_WITH_PASSWORD)
shouldThrow<org.postgresql.util.PSQLException> {
testReadFromTable(dbConfig)
}
}
@Test
fun `read from table with login only`() {
val dbConfig = DbConnectionConfig(URL_WITH_LOGIN)
shouldThrow<org.postgresql.util.PSQLException> {
testReadFromTable(dbConfig)
}
}
private fun testReadFromTable(dbConfig: DbConnectionConfig) {
DriverManager.getConnection(URL_WITH_LOGIN_PASSWORD).use { connection ->
createTestData(connection)
val df2 = dbConfig.readDataFrame(TABLE_NAME).cast<Table1>()
val result2 = df2.filter { it[Table1::id] == 1 }
result2[0][2] shouldBe 11
clearTestData(connection)
}
}
}
@@ -0,0 +1,421 @@
package org.jetbrains.kotlinx.dataframe.io.local
import io.kotest.matchers.shouldBe
import org.intellij.lang.annotations.Language
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.api.add
import org.jetbrains.kotlinx.dataframe.api.cast
import org.jetbrains.kotlinx.dataframe.api.filter
import org.jetbrains.kotlinx.dataframe.api.select
import org.jetbrains.kotlinx.dataframe.io.inferNullability
import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
import org.jetbrains.kotlinx.dataframe.io.readSqlQuery
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import org.junit.AfterClass
import org.junit.BeforeClass
import org.junit.Ignore
import org.junit.Test
import org.postgresql.geometric.PGbox
import org.postgresql.geometric.PGcircle
import org.postgresql.geometric.PGline
import org.postgresql.geometric.PGlseg
import org.postgresql.geometric.PGpath
import org.postgresql.geometric.PGpoint
import org.postgresql.geometric.PGpolygon
import org.postgresql.util.PGInterval
import org.postgresql.util.PGobject
import java.math.BigDecimal
import java.sql.Connection
import java.sql.Date
import java.sql.DriverManager
import java.sql.SQLException
import java.sql.Time
import java.sql.Timestamp
import java.sql.Types
import java.util.UUID
import kotlin.reflect.typeOf
private const val BASIC_URL = "jdbc:postgresql://localhost:5432/test"
private const val USER_NAME = "postgres"
private const val PASSWORD = "pass"
@DataSchema
interface Table1 {
val id: Int
val bigintcol: Long
val smallintcol: Int
val bigserialcol: Long
val booleancol: Boolean
val boxcol: String
val byteacol: ByteArray
val charactercol: String
val characterncol: String
val charcol: String
val circlecol: String
val datecol: java.sql.Date
val doublecol: Double
val integercol: Int?
val intervalcol: String
val jsoncol: String
val jsonbcol: String
}
@DataSchema
interface Table2 {
val id: Int
val linecol: org.postgresql.geometric.PGline
val lsegcol: String
val macaddrcol: String
val moneycol: String
val numericcol: BigDecimal
val pathcol: org.postgresql.geometric.PGpath
val pointcol: String
val polygoncol: String
val realcol: Float
val smallintcol: Int
val smallserialcol: Int
val serialcol: Int
val textcol: String?
val timecol: String
val timewithzonecol: String
val timestampcol: String
val timestampwithzonecol: String
val uuidcol: String
val xmlcol: String
}
@DataSchema
interface ViewTable {
val id: Int
val bigintcol: Long
val linecol: String
val textCol: String?
}
internal fun createTestData(connection: Connection) {
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table1") }
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table2") }
val createTableStatement = """
CREATE TABLE IF NOT EXISTS table1 (
id serial PRIMARY KEY,
bigintCol bigint not null,
smallintCol smallint not null,
bigserialCol bigserial not null,
booleanCol boolean not null,
boxCol box not null,
byteaCol bytea not null,
characterCol character not null,
characterNCol character(10) not null,
charCol char not null,
circleCol circle not null,
dateCol date not null,
doubleCol double precision not null,
integerCol integer,
intervalCol interval not null,
jsonCol json not null,
jsonbCol jsonb not null,
intArrayCol integer[],
doubleArrayCol double precision array,
dateArrayCol date array,
textArrayCol text array,
booleanArrayCol boolean array
)
"""
connection.createStatement().execute(createTableStatement.trimIndent())
val createTableQuery = """
CREATE TABLE IF NOT EXISTS table2 (
id serial PRIMARY KEY,
lineCol line not null,
lsegCol lseg not null,
macaddrCol macaddr not null,
moneyCol money not null,
numericCol numeric not null,
pathCol path not null,
pointCol point not null,
polygonCol polygon not null,
realCol real not null,
smallintCol smallint not null,
smallserialCol smallserial not null,
serialCol serial not null,
textCol text,
timeCol time not null,
timeWithZoneCol time with time zone not null,
timestampCol timestamp not null,
timestampWithZoneCol timestamp with time zone not null,
uuidCol uuid not null,
xmlCol xml not null
)
"""
connection.createStatement().execute(createTableQuery.trimIndent())
@Language("SQL")
val insertData1 = """
INSERT INTO table1 (
bigintCol, smallintCol, bigserialCol, booleanCol,
boxCol, byteaCol, characterCol, characterNCol, charCol,
circleCol, dateCol, doubleCol,
integerCol, intervalCol, jsonCol, jsonbCol, intArrayCol,
doubleArrayCol, dateArrayCol, textArrayCol, booleanArrayCol
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
"""
@Language("SQL")
val insertData2 = """
INSERT INTO table2 (
lineCol, lsegCol, macaddrCol, moneyCol, numericCol,
pathCol, pointCol, polygonCol, realCol, smallintCol,
smallserialCol, serialCol, textCol, timeCol,
timeWithZoneCol, timestampCol, timestampWithZoneCol,
uuidCol, xmlCol
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
"""
val intArray = connection.createArrayOf("INTEGER", arrayOf(1, 2, 3))
val doubleArray = connection.createArrayOf("DOUBLE", arrayOf(1.1, 2.2, 3.3))
val dateArray = connection.createArrayOf("DATE", arrayOf(Date.valueOf("2023-08-01"), Date.valueOf("2023-08-02")))
val textArray = connection.createArrayOf("TEXT", arrayOf("Hello", "World"))
val booleanArray = connection.createArrayOf("BOOLEAN", arrayOf(true, false, true))
connection.prepareStatement(insertData1).use { st ->
// Insert data into table1
for (i in 1..3) {
st.setLong(1, i * 1000L)
st.setShort(2, 11.toShort())
st.setLong(3, 1000000000L + i)
st.setBoolean(4, i % 2 == 1)
st.setObject(5, PGbox("(1,1),(2,2)"))
st.setBytes(6, byteArrayOf(1, 2, 3))
st.setString(7, "A")
st.setString(8, "Hello")
st.setString(9, "A")
st.setObject(10, PGcircle("<(1,2),3>"))
st.setDate(11, Date.valueOf("2023-08-01"))
st.setDouble(12, 12.34)
st.setInt(13, 12345 * i)
st.setObject(14, PGInterval("1 year"))
val jsonbObject = PGobject()
jsonbObject.type = "jsonb"
jsonbObject.value = "{\"key\": \"value\"}"
st.setObject(15, jsonbObject)
st.setObject(16, jsonbObject)
st.setArray(17, intArray)
st.setArray(18, doubleArray)
st.setArray(19, dateArray)
st.setArray(20, textArray)
st.setArray(21, booleanArray)
st.executeUpdate()
}
}
connection.prepareStatement(insertData2).use { st ->
// Insert data into table2
for (i in 1..3) {
st.setObject(1, PGline("{1,2,3}"))
st.setObject(2, PGlseg("[(-1,0),(1,0)]"))
val macaddrObject = PGobject()
macaddrObject.type = "macaddr"
macaddrObject.value = "00:00:00:00:00:0$i"
st.setObject(3, macaddrObject)
st.setBigDecimal(4, BigDecimal("123.45"))
st.setBigDecimal(5, BigDecimal("12.34"))
st.setObject(6, PGpath("((1,2),(3,$i))"))
st.setObject(7, PGpoint("(1,2)"))
st.setObject(8, PGpolygon("((1,1),(2,2),(3,3))"))
st.setFloat(9, 12.34f)
st.setShort(10, (i * 100).toShort())
st.setInt(11, 1000 + i)
st.setInt(12, 1000000 + i)
st.setString(13, null)
st.setTime(14, Time.valueOf("12:34:56"))
st.setTimestamp(15, Timestamp(System.currentTimeMillis()))
st.setTimestamp(16, Timestamp(System.currentTimeMillis()))
st.setTimestamp(17, Timestamp(System.currentTimeMillis()))
st.setObject(18, UUID.randomUUID(), Types.OTHER)
val xmlObject = PGobject()
xmlObject.type = "xml"
xmlObject.value = "<root><element>data</element></root>"
st.setObject(19, xmlObject)
st.executeUpdate()
}
}
}
internal fun clearTestData(connection: Connection) {
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table1") }
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table2") }
}
@Ignore
class PostgresTest {
companion object {
private lateinit var connection: Connection
@BeforeClass
@JvmStatic
fun setUpClass() {
connection = DriverManager.getConnection(BASIC_URL, USER_NAME, PASSWORD)
createTestData(connection)
}
@AfterClass
@JvmStatic
fun tearDownClass() {
try {
clearTestData(connection)
connection.close()
} catch (e: SQLException) {
e.printStackTrace()
}
}
}
@Test
fun `read from tables`() {
val tableName1 = "table1"
val df1 = DataFrame.readSqlTable(connection, tableName1).cast<Table1>()
val result = df1.filter { it[Table1::id] == 1 }
result[0][2] shouldBe 11
result[0][13] shouldBe 12345
result[0][17] shouldBe arrayOf(1, 2, 3)
result[0][18] shouldBe arrayOf(1.1, 2.2, 3.3)
result[0][19] shouldBe arrayOf(Date.valueOf("2023-08-01"), Date.valueOf("2023-08-02"))
result[0][20] shouldBe arrayOf("Hello", "World")
result[0][21] shouldBe arrayOf(true, false, true)
val schema = DataFrameSchema.readSqlTable(connection, tableName1)
schema.columns["id"]!!.type shouldBe typeOf<Int>()
schema.columns["integercol"]!!.type shouldBe typeOf<Int?>()
schema.columns["smallintcol"]!!.type shouldBe typeOf<Int>()
schema.columns["circlecol"]!!.type shouldBe typeOf<Any>()
schema.columns["intarraycol"]!!.type.classifier shouldBe kotlin.Array::class
schema.columns["doublearraycol"]!!.type.classifier shouldBe kotlin.Array::class
schema.columns["datearraycol"]!!.type.classifier shouldBe kotlin.Array::class
schema.columns["textarraycol"]!!.type.classifier shouldBe kotlin.Array::class
schema.columns["booleanarraycol"]!!.type.classifier shouldBe kotlin.Array::class
val tableName2 = "table2"
val df2 = DataFrame.readSqlTable(connection, tableName2).cast<Table2>()
val result2 = df2.filter { it[Table2::id] == 1 }
result2[0][11] shouldBe 1001
result2[0][13] shouldBe null
val schema2 = DataFrameSchema.readSqlTable(connection, tableName2)
schema2.columns["id"]!!.type shouldBe typeOf<Int>()
schema2.columns["pathcol"]!!.type shouldBe typeOf<Any>() // TODO: https://github.com/Kotlin/dataframe/issues/537
schema2.columns["textcol"]!!.type shouldBe typeOf<String?>()
schema2.columns["linecol"]!!.type shouldBe typeOf<Any>() // TODO: https://github.com/Kotlin/dataframe/issues/537
}
@Test
fun `read from sql query`() {
@Language("SQL")
val sqlQuery =
"""
SELECT
t1.id,
t1.bigintCol,
t2.lineCol,
t2.textCol
FROM table1 t1
JOIN table2 t2 ON t1.id = t2.id
""".trimIndent()
val df = DataFrame.readSqlQuery(connection, sqlQuery = sqlQuery).cast<ViewTable>()
val result = df.filter { it[ViewTable::id] == 1 }
result[0][3] shouldBe null
val schema = DataFrameSchema.readSqlQuery(connection, sqlQuery = sqlQuery)
schema.columns["id"]!!.type shouldBe typeOf<Int>()
schema.columns["bigintcol"]!!.type shouldBe typeOf<Long>()
schema.columns["textcol"]!!.type shouldBe typeOf<String?>()
}
@Test
fun `read from all tables`() {
val dataframes = DataFrame.readAllSqlTables(connection).values.toList()
val table1Df = dataframes[0].cast<Table1>()
table1Df.rowsCount() shouldBe 3
table1Df.filter { it[Table1::integercol] != null && it[Table1::integercol]!! > 12345 }.rowsCount() shouldBe 2
table1Df[0][1] shouldBe 1000L
table1Df[0][2] shouldBe 11
val table2Df = dataframes[1].cast<Table2>()
table2Df.rowsCount() shouldBe 3
table2Df.filter {
it[Table2::pathcol] == org.postgresql.geometric.PGpath("((1,2),(3,1))")
}.rowsCount() shouldBe 1
table2Df[0][11] shouldBe 1001
}
@Test
fun `read columns of different types to check type mapping`() {
val tableName1 = "table1"
val df1 = DataFrame.readSqlTable(connection, tableName1).cast<Table1>()
val result = df1.select("smallintcol")
.add("smallintcol2") { it[Table1::smallintcol] }
result[0][1] shouldBe 11
val result1 = df1.select("bigserialcol")
.add("bigserialcol2") { it[Table1::bigserialcol] }
result1[0][1] shouldBe 1000000001L
val result2 = df1.select("doublecol")
.add("doublecol2") { it[Table1::doublecol] }
result2[0][1] shouldBe 12.34
val tableName2 = "table2"
val df2 = DataFrame.readSqlTable(connection, tableName2).cast<Table2>()
val result3 = df2.select("moneycol")
.add("moneycol2") { it[Table2::moneycol] }
result3[0][1] shouldBe "123,45 ?" // TODO: weird mapping
val result4 = df2.select("numericcol")
.add("numericcol2") { it[Table2::numericcol] }
result4[0][1] shouldBe BigDecimal("12.34")
val result5 = df2.select("realcol")
.add("realcol2") { it[Table2::realcol] }
result5[0][1] shouldBe 12.34f
val result7 = df2.select("smallserialcol")
.add("smallserialcol2") { it[Table2::smallserialcol] }
result7[0][1] shouldBe 1001
val result8 = df2.select("serialcol")
.add("serialcol2") { it[Table2::serialcol] }
result8[0][1] shouldBe 1000001
val schema = DataFrameSchema.readSqlTable(connection, tableName1)
schema.columns["smallintcol"]!!.type shouldBe typeOf<Int>()
schema.columns["bigserialcol"]!!.type shouldBe typeOf<Long>()
schema.columns["doublecol"]!!.type shouldBe typeOf<Double>()
val schema1 = DataFrameSchema.readSqlTable(connection, tableName2)
schema1.columns["moneycol"]!!.type shouldBe typeOf<String>()
schema1.columns["numericcol"]!!.type shouldBe typeOf<BigDecimal>()
schema1.columns["realcol"]!!.type shouldBe typeOf<Float>()
schema1.columns["smallserialcol"]!!.type shouldBe typeOf<Int>()
schema1.columns["serialcol"]!!.type shouldBe typeOf<Int>()
}
@Test
fun `infer nullability`() {
inferNullability(connection)
}
}
@@ -0,0 +1,26 @@
package org.jetbrains.kotlinx.dataframe.io
import io.kotest.matchers.shouldBe
import org.jetbrains.kotlinx.dataframe.io.db.MsSql
import org.junit.AfterClass
import org.junit.BeforeClass
import org.junit.Test
class MsSqlTest {
companion object {
@BeforeClass
@JvmStatic
fun setUpClass() {
}
@AfterClass
@JvmStatic
fun tearDownClass() {
}
}
@Test
fun `test SQL Server TOP limit functionality`() {
MsSql.buildSqlQueryWithLimit("SELECT * FROM TestTable1", 1) shouldBe "SELECT TOP 1 * FROM TestTable1"
}
}
@@ -0,0 +1,264 @@
package org.jetbrains.kotlinx.dataframe.io
import io.kotest.matchers.shouldBe
import org.intellij.lang.annotations.Language
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.api.cast
import org.jetbrains.kotlinx.dataframe.api.filter
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import org.junit.AfterClass
import org.junit.BeforeClass
import org.junit.Test
import java.io.File
import java.nio.file.Files
import java.sql.Connection
import java.sql.DriverManager
import kotlin.reflect.typeOf
@DataSchema
interface CustomerSQLite {
val id: Int?
val name: String?
val age: Int?
val salary: Double
val profilePicture: ByteArray?
}
@DataSchema
interface OrderSQLite {
val id: Int?
val customerName: String?
val orderDate: String?
val totalAmount: Double
val orderDetails: ByteArray?
}
@DataSchema
interface CustomerOrderSQLite {
val customerId: Int?
val customerName: String?
val customerAge: Int?
val customerSalary: Double
val customerProfilePicture: ByteArray?
val orderId: Int?
val orderDate: String?
val totalAmount: Double
val orderDetails: ByteArray?
}
class SqliteTest {
companion object {
private lateinit var connection: Connection
/**
* We are using a temporary file because we need to test requests with DBConnectionConfig,
* which creates a connection under the hood and need to have access to the shared SQLite database
*/
private lateinit var testDbFile: File
private lateinit var databaseUrl: String
@BeforeClass
@JvmStatic
fun setUpClass() {
testDbFile = Files.createTempFile("dataframe_sqlite_test_", ".db").toFile()
testDbFile.deleteOnExit() // if fails
databaseUrl = "jdbc:sqlite:${testDbFile.absolutePath}"
connection = DriverManager.getConnection(databaseUrl)
@Language("SQL")
val createCustomersTableQuery = """
CREATE TABLE Customers (
id INTEGER PRIMARY KEY,
name TEXT,
age INTEGER,
salary REAL NOT NULL,
profilePicture BLOB
)
"""
connection.createStatement().execute(createCustomersTableQuery)
@Language("SQL")
val createOrderTableQuery = """
CREATE TABLE Orders (
id INTEGER PRIMARY KEY,
customerName TEXT,
orderDate TEXT,
totalAmount NUMERIC NOT NULL,
orderDetails BLOB
)
"""
connection.createStatement().execute(createOrderTableQuery)
val profilePicture = "SampleProfilePictureData".toByteArray()
val orderDetails = "OrderDetailsData".toByteArray()
connection.prepareStatement("INSERT INTO Customers (name, age, salary, profilePicture) VALUES (?, ?, ?, ?)")
.use {
it.setString(1, "John Doe")
it.setInt(2, 30)
it.setDouble(3, 2500.50)
it.setBytes(4, profilePicture)
it.executeUpdate()
}
connection.prepareStatement("INSERT INTO Customers (name, age, salary, profilePicture) VALUES (?, ?, ?, ?)")
.use {
it.setString(1, null)
it.setInt(2, 40)
it.setDouble(3, 1500.50)
it.setBytes(4, profilePicture)
it.executeUpdate()
}
connection.prepareStatement(
"INSERT INTO Orders (customerName, orderDate, totalAmount, orderDetails) VALUES (?, ?, ?, ?)",
).use {
it.setString(1, null)
it.setString(2, "2023-07-21")
it.setDouble(3, 150.75)
it.setBytes(4, orderDetails)
it.executeUpdate()
}
connection.prepareStatement(
"INSERT INTO Orders (customerName, orderDate, totalAmount, orderDetails) VALUES (?, ?, ?, ?)",
).use {
it.setString(1, "John Doe")
it.setString(2, "2023-08-21")
it.setDouble(3, 250.75)
it.setBytes(4, orderDetails)
it.executeUpdate()
}
}
@AfterClass
@JvmStatic
fun tearDownClass() {
try {
connection.close()
if (::testDbFile.isInitialized && testDbFile.exists()) {
testDbFile.delete()
}
} catch (e: Exception) {
// Log, but not fail
println("Warning: Could not clean up test database file: ${e.message}")
}
}
}
@Test
fun `read from tables`() {
val customerTableName = "Customers"
val df = DataFrame.readSqlTable(connection, customerTableName).cast<CustomerSQLite>()
val result = df.filter { it[CustomerSQLite::name] == "John Doe" }
result[0][2] shouldBe 30
val schema = DataFrameSchema.readSqlTable(connection, customerTableName)
schema.columns["id"]!!.type shouldBe typeOf<Int?>()
schema.columns["name"]!!.type shouldBe typeOf<String?>()
schema.columns["salary"]!!.type shouldBe typeOf<Double>()
schema.columns["profilePicture"]!!.type shouldBe typeOf<ByteArray?>()
val orderTableName = "Orders"
val df2 = DataFrame.readSqlTable(connection, orderTableName).cast<OrderSQLite>()
val result2 = df2.filter { it[OrderSQLite::totalAmount] > 10 }
result2[0][2] shouldBe "2023-07-21"
val schema2 = DataFrameSchema.readSqlTable(connection, orderTableName)
schema2.columns["id"]!!.type shouldBe typeOf<Int?>()
schema2.columns["customerName"]!!.type shouldBe typeOf<String?>()
schema2.columns["totalAmount"]!!.type shouldBe typeOf<Double>()
}
@Test
fun `read from tables with DBConnectionConfig`() {
val customerTableName = "Customers"
val dbConnectionConfig = DbConnectionConfig(databaseUrl)
val df = DataFrame.readSqlTable(dbConnectionConfig, customerTableName).cast<CustomerSQLite>()
val result = df.filter { it[CustomerSQLite::name] == "John Doe" }
result[0][2] shouldBe 30
val schema = DataFrameSchema.readSqlTable(dbConnectionConfig, customerTableName)
schema.columns["id"]!!.type shouldBe typeOf<Int?>()
schema.columns["name"]!!.type shouldBe typeOf<String?>()
schema.columns["salary"]!!.type shouldBe typeOf<Double>()
schema.columns["profilePicture"]!!.type shouldBe typeOf<ByteArray?>()
val orderTableName = "Orders"
val df2 = DataFrame.readSqlTable(dbConnectionConfig, orderTableName).cast<OrderSQLite>()
val result2 = df2.filter { it[OrderSQLite::totalAmount] > 10 }
result2[0][2] shouldBe "2023-07-21"
val schema2 = DataFrameSchema.readSqlTable(dbConnectionConfig, orderTableName)
schema2.columns["id"]!!.type shouldBe typeOf<Int?>()
schema2.columns["customerName"]!!.type shouldBe typeOf<String?>()
schema2.columns["totalAmount"]!!.type shouldBe typeOf<Double>()
}
@Language("SQL")
private val sqlQuery = """
SELECT
c.id AS customerId,
c.name AS customerName,
c.age AS customerAge,
c.salary AS customerSalary,
c.profilePicture AS customerProfilePicture,
o.id AS orderId,
o.orderDate AS orderDate,
o.totalAmount AS totalAmount,
o.orderDetails AS orderDetails
FROM Customers c
INNER JOIN Orders o ON c.name = o.customerName
"""
@Test
fun `read from sql query`() {
val df = DataFrame.readSqlQuery(connection, sqlQuery).cast<CustomerOrderSQLite>()
val result = df.filter { it[CustomerOrderSQLite::customerSalary] > 1 }
result[0][3] shouldBe 2500.5
val schema = DataFrameSchema.readSqlQuery(connection, sqlQuery = sqlQuery)
schema.columns["customerId"]!!.type shouldBe typeOf<Int?>()
schema.columns["customerName"]!!.type shouldBe typeOf<String?>()
schema.columns["customerAge"]!!.type shouldBe typeOf<Int?>()
schema.columns["totalAmount"]!!.type shouldBe typeOf<Double>()
}
@Test
fun `read from sql query with DBConnectionConfig`() {
val dbConnectionConfig = DbConnectionConfig(databaseUrl)
val df = DataFrame.readSqlQuery(dbConnectionConfig, sqlQuery).cast<CustomerOrderSQLite>()
val result = df.filter { it[CustomerOrderSQLite::customerSalary] > 1 }
result[0][3] shouldBe 2500.5
val schema = DataFrameSchema.readSqlQuery(dbConnectionConfig, sqlQuery = sqlQuery)
schema.columns["customerId"]!!.type shouldBe typeOf<Int?>()
schema.columns["customerName"]!!.type shouldBe typeOf<String?>()
schema.columns["customerAge"]!!.type shouldBe typeOf<Int?>()
schema.columns["totalAmount"]!!.type shouldBe typeOf<Double>()
}
@Test
fun `read from all tables`() {
val dataframes = DataFrame.readAllSqlTables(connection).values.toList()
val customerDf = dataframes[0].cast<CustomerSQLite>()
customerDf.rowsCount() shouldBe 2
customerDf.filter { it[CustomerSQLite::age] != null && it[CustomerSQLite::age]!! > 30 }.rowsCount() shouldBe 1
customerDf[0][1] shouldBe "John Doe"
val orderDf = dataframes[1].cast<OrderSQLite>()
orderDf.rowsCount() shouldBe 2
orderDf.filter { it[OrderSQLite::totalAmount] > 200 }.rowsCount() shouldBe 1
orderDf[0][1] shouldBe null
}
}
@@ -0,0 +1,34 @@
# SLF4J's SimpleLogger configuration file
# Simple implementation of Logger that sends all enabled log messages, for all defined loggers, to System.err.
# Default logging detail level for all instances of SimpleLogger.
# Must be one of ("trace", "debug", "info", "warn", or "error").
# If not specified, defaults to "info".
org.slf4j.simpleLogger.defaultLogLevel=debug
# Logging detail level for a SimpleLogger instance named "xxxxx".
# Must be one of ("trace", "debug", "info", "warn", or "error").
# If not specified, the default logging detail level is used.
#org.slf4j.simpleLogger.log.xxxxx=
# Set to true if you want the current date and time to be included in output messages.
# Default is false, and will output the number of milliseconds elapsed since startup.
org.slf4j.simpleLogger.showDateTime=true
# The date and time format to be used in the output messages.
# The pattern describing the date and time format is the same that is used in java.text.SimpleDateFormat.
# If the format is not specified or is invalid, the default format is used.
# The default format is yyyy-MM-dd HH:mm:ss:SSS Z.
org.slf4j.simpleLogger.dateTimeFormat=yyyy-MM-dd HH:mm:ss:SSS Z
# Set to true if you want to output the current thread name.
# Defaults to true.
org.slf4j.simpleLogger.showThreadName=true
# Set to true if you want the Logger instance name to be included in output messages.
# Defaults to true.
org.slf4j.simpleLogger.showLogName=true
# Set to true if you want the last component of the name to be included in output messages.
# Defaults to false.
#org.slf4j.simpleLogger.showShortLogName=false