init research
This commit is contained in:
Vendored
+7
@@ -0,0 +1,7 @@
|
||||
## :dataframe-jdbc
|
||||
|
||||
This module, published as `dataframe-jdbc`, contains all logic and tests for DataFrame to be able to work with
|
||||
JDBC data sources.
|
||||
|
||||
See [Read from SQL databases](https://kotlin.github.io/dataframe/readsqldatabases.html) for more information
|
||||
about how to use it.
|
||||
+255
@@ -0,0 +1,255 @@
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/DbConnectionConfig {
|
||||
public fun <init> (Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)V
|
||||
public synthetic fun <init> (Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;ZILkotlin/jvm/internal/DefaultConstructorMarker;)V
|
||||
public final fun copy (Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;
|
||||
public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;
|
||||
public fun equals (Ljava/lang/Object;)Z
|
||||
public final fun getPassword ()Ljava/lang/String;
|
||||
public final fun getReadOnly ()Z
|
||||
public final fun getUrl ()Ljava/lang/String;
|
||||
public final fun getUser ()Ljava/lang/String;
|
||||
public fun hashCode ()I
|
||||
public fun toString ()Ljava/lang/String;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/Jdbc : org/jetbrains/kotlinx/dataframe/io/SupportedCodeGenerationFormat, org/jetbrains/kotlinx/dataframe/io/SupportedDataFrameFormat {
|
||||
public fun <init> ()V
|
||||
public fun acceptsExtension (Ljava/lang/String;)Z
|
||||
public fun acceptsSample (Lorg/jetbrains/kotlinx/dataframe/io/SupportedFormatSample;)Z
|
||||
public fun createDefaultReadMethod (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethod;
|
||||
public fun getTestOrder ()I
|
||||
public fun readCodeForGeneration (Ljava/io/File;Ljava/lang/String;Z)Ljava/lang/String;
|
||||
public fun readCodeForGeneration (Ljava/io/InputStream;Ljava/lang/String;Z)Ljava/lang/String;
|
||||
public fun readDataFrame (Ljava/io/File;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public fun readDataFrame (Ljava/io/InputStream;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public fun readDataFrame (Ljava/nio/file/Path;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/JdbcSchemaKt {
|
||||
public static final fun buildCodeForDB (Ljava/net/URL;Ljava/lang/String;)Ljava/lang/String;
|
||||
public static final fun getDatabaseCodeGenReader (Lorg/jetbrains/kotlinx/dataframe/codeGen/CodeGenerator$Companion;)Lkotlin/jvm/functions/Function2;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/ReadDataFrameSchemaKt {
|
||||
public static final fun readAllSqlTables (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljava/sql/Connection;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Ljava/util/Map;
|
||||
public static final fun readAllSqlTables (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljavax/sql/DataSource;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Ljava/util/Map;
|
||||
public static final fun readAllSqlTables (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Ljava/util/Map;
|
||||
public static synthetic fun readAllSqlTables$default (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljava/sql/Connection;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Ljava/util/Map;
|
||||
public static synthetic fun readAllSqlTables$default (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljavax/sql/DataSource;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Ljava/util/Map;
|
||||
public static synthetic fun readAllSqlTables$default (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Ljava/util/Map;
|
||||
public static final fun readDataFrameSchema (Ljava/sql/Connection;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
|
||||
public static final fun readDataFrameSchema (Ljava/sql/ResultSet;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
|
||||
public static final fun readDataFrameSchema (Ljavax/sql/DataSource;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
|
||||
public static final fun readDataFrameSchema (Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
|
||||
public static synthetic fun readDataFrameSchema$default (Ljava/sql/Connection;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
|
||||
public static synthetic fun readDataFrameSchema$default (Ljavax/sql/DataSource;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
|
||||
public static synthetic fun readDataFrameSchema$default (Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
|
||||
public static final fun readResultSet (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljava/sql/ResultSet;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
|
||||
public static final fun readSqlQuery (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljava/sql/Connection;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
|
||||
public static final fun readSqlQuery (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljavax/sql/DataSource;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
|
||||
public static final fun readSqlQuery (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
|
||||
public static synthetic fun readSqlQuery$default (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljava/sql/Connection;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
|
||||
public static synthetic fun readSqlQuery$default (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljavax/sql/DataSource;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
|
||||
public static synthetic fun readSqlQuery$default (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
|
||||
public static final fun readSqlTable (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljava/sql/Connection;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
|
||||
public static final fun readSqlTable (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljavax/sql/DataSource;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
|
||||
public static final fun readSqlTable (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
|
||||
public static synthetic fun readSqlTable$default (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljava/sql/Connection;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
|
||||
public static synthetic fun readSqlTable$default (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Ljavax/sql/DataSource;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
|
||||
public static synthetic fun readSqlTable$default (Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/ReadJdbcKt {
|
||||
public static final fun readAllSqlTables (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;Lkotlin/jvm/functions/Function1;)Ljava/util/Map;
|
||||
public static final fun readAllSqlTables (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljavax/sql/DataSource;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;Lkotlin/jvm/functions/Function1;)Ljava/util/Map;
|
||||
public static final fun readAllSqlTables (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;Lkotlin/jvm/functions/Function1;)Ljava/util/Map;
|
||||
public static synthetic fun readAllSqlTables$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;Lkotlin/jvm/functions/Function1;ILjava/lang/Object;)Ljava/util/Map;
|
||||
public static synthetic fun readAllSqlTables$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljavax/sql/DataSource;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;Lkotlin/jvm/functions/Function1;ILjava/lang/Object;)Ljava/util/Map;
|
||||
public static synthetic fun readAllSqlTables$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;Lkotlin/jvm/functions/Function1;ILjava/lang/Object;)Ljava/util/Map;
|
||||
public static final fun readDataFrame (Ljava/sql/Connection;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readDataFrame (Ljava/sql/ResultSet;Ljava/sql/Connection;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readDataFrame (Ljava/sql/ResultSet;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;Ljava/lang/Integer;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readDataFrame (Ljavax/sql/DataSource;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readDataFrame (Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readDataFrame$default (Ljava/sql/Connection;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readDataFrame$default (Ljava/sql/ResultSet;Ljava/sql/Connection;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readDataFrame$default (Ljava/sql/ResultSet;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;Ljava/lang/Integer;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readDataFrame$default (Ljavax/sql/DataSource;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readDataFrame$default (Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readResultSet (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/ResultSet;Ljava/sql/Connection;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readResultSet (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/ResultSet;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;Ljava/lang/Integer;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readResultSet$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/ResultSet;Ljava/sql/Connection;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readResultSet$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/ResultSet;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;Ljava/lang/Integer;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readSqlQuery (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readSqlQuery (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljavax/sql/DataSource;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readSqlQuery (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readSqlQuery$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readSqlQuery$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljavax/sql/DataSource;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readSqlQuery$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readSqlTable (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readSqlTable (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljavax/sql/DataSource;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readSqlTable (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readSqlTable$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readSqlTable$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljavax/sql/DataSource;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readSqlTable$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Ljava/lang/Integer;ZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZLkotlin/jvm/functions/Function1;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
}
|
||||
|
||||
public abstract class org/jetbrains/kotlinx/dataframe/io/db/DbType {
|
||||
public fun <init> (Ljava/lang/String;)V
|
||||
public fun buildDataColumn (Ljava/lang/String;Ljava/util/List;Lkotlin/reflect/KType;Z)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
|
||||
public fun buildSelectTableQueryWithLimit (Ljava/lang/String;Ljava/lang/Integer;)Ljava/lang/String;
|
||||
public fun buildSqlQueryWithLimit (Ljava/lang/String;I)Ljava/lang/String;
|
||||
public static synthetic fun buildSqlQueryWithLimit$default (Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;Ljava/lang/String;IILjava/lang/Object;)Ljava/lang/String;
|
||||
public abstract fun buildTableMetadata (Ljava/sql/ResultSet;)Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;
|
||||
public fun configureReadStatement (Ljava/sql/PreparedStatement;)V
|
||||
public abstract fun convertSqlTypeToColumnSchemaValue (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema;
|
||||
public abstract fun convertSqlTypeToKType (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lkotlin/reflect/KType;
|
||||
public fun createConnection (Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;)Ljava/sql/Connection;
|
||||
public fun extractValueFromResultSet (Ljava/sql/ResultSet;ILorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;Lkotlin/reflect/KType;)Ljava/lang/Object;
|
||||
public final fun getDbTypeInJdbcUrl ()Ljava/lang/String;
|
||||
public fun getDefaultFetchSize ()I
|
||||
public fun getDefaultQueryTimeout ()Ljava/lang/Integer;
|
||||
public abstract fun getDriverClassName ()Ljava/lang/String;
|
||||
public fun getTableColumnsMetadata (Ljava/sql/ResultSet;)Ljava/util/List;
|
||||
public fun getTableTypes ()Ljava/util/List;
|
||||
public abstract fun isSystemTable (Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;)Z
|
||||
public fun makeCommonSqlToKTypeMapping (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lkotlin/reflect/KType;
|
||||
public fun quoteIdentifier (Ljava/lang/String;)Ljava/lang/String;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/db/DuckDb : org/jetbrains/kotlinx/dataframe/io/db/DbType {
|
||||
public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/io/db/DuckDb;
|
||||
public fun buildTableMetadata (Ljava/sql/ResultSet;)Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;
|
||||
public fun convertSqlTypeToColumnSchemaValue (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema;
|
||||
public fun convertSqlTypeToKType (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lkotlin/reflect/KType;
|
||||
public fun createConnection (Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;)Ljava/sql/Connection;
|
||||
public fun getDriverClassName ()Ljava/lang/String;
|
||||
public fun isSystemTable (Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;)Z
|
||||
}
|
||||
|
||||
public class org/jetbrains/kotlinx/dataframe/io/db/H2 : org/jetbrains/kotlinx/dataframe/io/db/DbType {
|
||||
public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Companion;
|
||||
public static final field MODE_MARIADB Ljava/lang/String;
|
||||
public static final field MODE_MSSQLSERVER Ljava/lang/String;
|
||||
public static final field MODE_MYSQL Ljava/lang/String;
|
||||
public static final field MODE_POSTGRESQL Ljava/lang/String;
|
||||
public fun <init> ()V
|
||||
public fun <init> (Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)V
|
||||
public fun <init> (Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode;)V
|
||||
public synthetic fun <init> (Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode;ILkotlin/jvm/internal/DefaultConstructorMarker;)V
|
||||
public fun buildSqlQueryWithLimit (Ljava/lang/String;I)Ljava/lang/String;
|
||||
public fun buildTableMetadata (Ljava/sql/ResultSet;)Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;
|
||||
public fun convertSqlTypeToColumnSchemaValue (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema;
|
||||
public fun convertSqlTypeToKType (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lkotlin/reflect/KType;
|
||||
public fun getDriverClassName ()Ljava/lang/String;
|
||||
public final fun getMode ()Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode;
|
||||
public fun isSystemTable (Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;)Z
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/db/H2$Companion {
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/db/H2$Mode : java/lang/Enum {
|
||||
public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode$Companion;
|
||||
public static final field MariaDb Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode;
|
||||
public static final field MsSqlServer Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode;
|
||||
public static final field MySql Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode;
|
||||
public static final field PostgreSql Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode;
|
||||
public static final field Regular Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode;
|
||||
public static fun getEntries ()Lkotlin/enums/EnumEntries;
|
||||
public final fun getValue ()Ljava/lang/String;
|
||||
public final fun toDbType ()Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;
|
||||
public static fun valueOf (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode;
|
||||
public static fun values ()[Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/db/H2$Mode$Companion {
|
||||
public final fun fromDbType (Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode;
|
||||
public final fun fromValue (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/io/db/H2$Mode;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/db/MariaDb : org/jetbrains/kotlinx/dataframe/io/db/DbType {
|
||||
public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/io/db/MariaDb;
|
||||
public fun buildTableMetadata (Ljava/sql/ResultSet;)Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;
|
||||
public fun convertSqlTypeToColumnSchemaValue (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema;
|
||||
public fun convertSqlTypeToKType (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lkotlin/reflect/KType;
|
||||
public fun getDriverClassName ()Ljava/lang/String;
|
||||
public fun isSystemTable (Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;)Z
|
||||
public fun quoteIdentifier (Ljava/lang/String;)Ljava/lang/String;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/db/MsSql : org/jetbrains/kotlinx/dataframe/io/db/DbType {
|
||||
public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/io/db/MsSql;
|
||||
public fun buildSqlQueryWithLimit (Ljava/lang/String;I)Ljava/lang/String;
|
||||
public fun buildTableMetadata (Ljava/sql/ResultSet;)Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;
|
||||
public fun convertSqlTypeToColumnSchemaValue (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema;
|
||||
public fun convertSqlTypeToKType (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lkotlin/reflect/KType;
|
||||
public fun getDriverClassName ()Ljava/lang/String;
|
||||
public fun isSystemTable (Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;)Z
|
||||
public fun quoteIdentifier (Ljava/lang/String;)Ljava/lang/String;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/db/MySql : org/jetbrains/kotlinx/dataframe/io/db/DbType {
|
||||
public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/io/db/MySql;
|
||||
public fun buildTableMetadata (Ljava/sql/ResultSet;)Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;
|
||||
public fun convertSqlTypeToColumnSchemaValue (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema;
|
||||
public fun convertSqlTypeToKType (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lkotlin/reflect/KType;
|
||||
public fun getDriverClassName ()Ljava/lang/String;
|
||||
public fun isSystemTable (Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;)Z
|
||||
public fun quoteIdentifier (Ljava/lang/String;)Ljava/lang/String;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/db/PostgreSql : org/jetbrains/kotlinx/dataframe/io/db/DbType {
|
||||
public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/io/db/PostgreSql;
|
||||
public fun buildTableMetadata (Ljava/sql/ResultSet;)Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;
|
||||
public fun convertSqlTypeToColumnSchemaValue (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema;
|
||||
public fun convertSqlTypeToKType (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lkotlin/reflect/KType;
|
||||
public fun getDriverClassName ()Ljava/lang/String;
|
||||
public fun isSystemTable (Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;)Z
|
||||
public fun quoteIdentifier (Ljava/lang/String;)Ljava/lang/String;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/db/Sqlite : org/jetbrains/kotlinx/dataframe/io/db/DbType {
|
||||
public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/io/db/Sqlite;
|
||||
public fun buildTableMetadata (Ljava/sql/ResultSet;)Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;
|
||||
public fun convertSqlTypeToColumnSchemaValue (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lorg/jetbrains/kotlinx/dataframe/schema/ColumnSchema;
|
||||
public fun convertSqlTypeToKType (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;)Lkotlin/reflect/KType;
|
||||
public fun createConnection (Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;)Ljava/sql/Connection;
|
||||
public fun getDriverClassName ()Ljava/lang/String;
|
||||
public fun isSystemTable (Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;)Z
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata {
|
||||
public fun <init> (Ljava/lang/String;Ljava/lang/String;IILjava/lang/String;Z)V
|
||||
public synthetic fun <init> (Ljava/lang/String;Ljava/lang/String;IILjava/lang/String;ZILkotlin/jvm/internal/DefaultConstructorMarker;)V
|
||||
public final fun copy (Ljava/lang/String;Ljava/lang/String;IILjava/lang/String;Z)Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;
|
||||
public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;Ljava/lang/String;Ljava/lang/String;IILjava/lang/String;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/db/TableColumnMetadata;
|
||||
public fun equals (Ljava/lang/Object;)Z
|
||||
public final fun getJavaClassName ()Ljava/lang/String;
|
||||
public final fun getJdbcType ()I
|
||||
public final fun getName ()Ljava/lang/String;
|
||||
public final fun getSize ()I
|
||||
public final fun getSqlTypeName ()Ljava/lang/String;
|
||||
public fun hashCode ()I
|
||||
public final fun isNullable ()Z
|
||||
public fun toString ()Ljava/lang/String;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/db/TableMetadata {
|
||||
public fun <init> (Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V
|
||||
public final fun copy (Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;
|
||||
public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/db/TableMetadata;
|
||||
public fun equals (Ljava/lang/Object;)Z
|
||||
public final fun getCatalogue ()Ljava/lang/String;
|
||||
public final fun getName ()Ljava/lang/String;
|
||||
public final fun getSchemaName ()Ljava/lang/String;
|
||||
public fun hashCode ()I
|
||||
public fun toString ()Ljava/lang/String;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/db/UtilKt {
|
||||
public static final fun driverClassNameFromUrl (Ljava/lang/String;)Ljava/lang/String;
|
||||
public static final fun extractDBTypeFromConnection (Ljava/sql/Connection;)Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;
|
||||
public static final fun extractDBTypeFromUrl (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;
|
||||
}
|
||||
|
||||
+42
@@ -0,0 +1,42 @@
|
||||
plugins {
|
||||
with(convention.plugins) {
|
||||
alias(kotlinJvm8)
|
||||
}
|
||||
with(libs.plugins) {
|
||||
alias(publisher)
|
||||
alias(binary.compatibility.validator)
|
||||
}
|
||||
}
|
||||
|
||||
group = "org.jetbrains.kotlinx"
|
||||
|
||||
dependencies {
|
||||
api(projects.core)
|
||||
compileOnly(libs.duckdb.jdbc)
|
||||
compileOnly(libs.sqlite)
|
||||
implementation(libs.kotlinLogging)
|
||||
testImplementation(libs.mariadb)
|
||||
testImplementation(libs.sqlite)
|
||||
testImplementation(libs.postgresql)
|
||||
testImplementation(libs.mysql)
|
||||
testImplementation(libs.h2db)
|
||||
testImplementation(libs.mssql)
|
||||
testImplementation(libs.junit)
|
||||
testImplementation(libs.sl4jsimple)
|
||||
testImplementation(libs.jts.core)
|
||||
testImplementation(libs.duckdb.jdbc)
|
||||
testImplementation(projects.dataframeJson)
|
||||
testImplementation(libs.kotestAssertions) {
|
||||
exclude("org.jetbrains.kotlin", "kotlin-stdlib-jdk8")
|
||||
}
|
||||
testImplementation(libs.hikaricp)
|
||||
}
|
||||
|
||||
kotlinPublications {
|
||||
publication {
|
||||
publicationName = "dataframeJDBC"
|
||||
artifactId = project.name
|
||||
description = "JDBC support for Kotlin DataFrame"
|
||||
packageName = artifactId
|
||||
}
|
||||
}
|
||||
Vendored
+93
@@ -0,0 +1,93 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
/**
|
||||
* Represents the configuration for an internally managed JDBC database connection.
|
||||
*
|
||||
* This class defines connection parameters used by the library to create a `Connection`
|
||||
* when the user does not provide one explicitly.
|
||||
* It is designed for safe, read-only access by default.
|
||||
*
|
||||
* __NOTE:__ Connections created using this configuration are managed entirely by the library.
|
||||
* Users do not have access to the underlying `Connection` instance and cannot commit or close it manually.
|
||||
*
|
||||
* ### Read-Only Mode Behavior:
|
||||
*
|
||||
* When [readOnly] is `true` (default), the connection operates in read-only mode with:
|
||||
* - `Connection.setReadOnly(true)`
|
||||
* - `Connection.setAutoCommit(false)`
|
||||
* - automatic `rollback()` at the end of execution
|
||||
*
|
||||
* When [readOnly] is `false`, the connection uses JDBC defaults (usually read-write),
|
||||
* but the library still rejects any queries that appear to modify data
|
||||
* (e.g. contain `INSERT`, `UPDATE`, `DELETE`, etc.).
|
||||
*
|
||||
* ### Examples:
|
||||
*
|
||||
* ```kotlin
|
||||
* // Safe read-only connection (default)
|
||||
* val config = DbConnectionConfig("jdbc:sqlite::memory:")
|
||||
* val df = DataFrame.readSqlQuery(config, "SELECT * FROM books")
|
||||
*
|
||||
* // Use default JDBC connection settings (still protected against mutations)
|
||||
* val config = DbConnectionConfig(
|
||||
* url = "jdbc:sqlite::memory:",
|
||||
* readOnly = false
|
||||
* )
|
||||
* ```
|
||||
*
|
||||
* @property [url] The JDBC URL of the database, e.g., `"jdbc:postgresql://localhost:5432/mydb"`.
|
||||
* Must follow the standard format: `jdbc:subprotocol:subname`.
|
||||
*
|
||||
* @property [user] The username used for authentication.
|
||||
* Optional, default is an empty string.
|
||||
*
|
||||
* @property [password] The password used for authentication.
|
||||
* Optional, default is an empty string.
|
||||
*
|
||||
* @property [readOnly] If `true` (default), enables read-only mode. If `false`, uses JDBC defaults
|
||||
* but still prevents data-modifying queries. See class documentation for details.
|
||||
*/
|
||||
public class DbConnectionConfig(
|
||||
public val url: String,
|
||||
public val user: String = "",
|
||||
public val password: String = "",
|
||||
public val readOnly: Boolean = true,
|
||||
) {
|
||||
override fun equals(other: Any?): Boolean {
|
||||
if (this === other) return true
|
||||
if (other !is DbConnectionConfig) return false
|
||||
|
||||
if (url != other.url) return false
|
||||
if (user != other.user) return false
|
||||
if (password != other.password) return false
|
||||
if (readOnly != other.readOnly) return false
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
override fun hashCode(): Int {
|
||||
var result = url.hashCode()
|
||||
result = 31 * result + user.hashCode()
|
||||
result = 31 * result + password.hashCode()
|
||||
result = 31 * result + readOnly.hashCode()
|
||||
return result
|
||||
}
|
||||
|
||||
override fun toString(): String = "DbConnectionConfig(url='$url', user='$user', password='***', readOnly=$readOnly)"
|
||||
|
||||
/**
|
||||
* Creates a copy of this configuration with the option to override specific properties.
|
||||
*
|
||||
* @param url The JDBC URL. If not specified, uses the current value.
|
||||
* @param user The username. If not specified, uses the current value.
|
||||
* @param password The password. If not specified, uses the current value.
|
||||
* @param readOnly The read-only flag. If not specified, uses the current value.
|
||||
* @return A new [DbConnectionConfig] instance with the specified changes.
|
||||
*/
|
||||
public fun copy(
|
||||
url: String = this.url,
|
||||
user: String = this.user,
|
||||
password: String = this.password,
|
||||
readOnly: Boolean = this.readOnly,
|
||||
): DbConnectionConfig = DbConnectionConfig(url, user, password, readOnly)
|
||||
}
|
||||
+56
@@ -0,0 +1,56 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.AnyFrame
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.codeGen.AbstractDefaultReadMethod
|
||||
import org.jetbrains.kotlinx.dataframe.codeGen.Code
|
||||
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod
|
||||
import java.io.File
|
||||
import java.io.InputStream
|
||||
import java.nio.file.Path
|
||||
|
||||
// TODO: https://github.com/Kotlin/dataframe/issues/450
|
||||
public class Jdbc :
|
||||
SupportedCodeGenerationFormat,
|
||||
SupportedDataFrameFormat {
|
||||
public override fun readDataFrame(stream: InputStream, header: List<String>): AnyFrame = DataFrame.readJDBC(stream)
|
||||
|
||||
public override fun readDataFrame(path: Path, header: List<String>): AnyFrame = DataFrame.readJDBC(path)
|
||||
|
||||
override fun readCodeForGeneration(
|
||||
stream: InputStream,
|
||||
name: String,
|
||||
generateHelperCompanionObject: Boolean,
|
||||
): Code {
|
||||
TODO("Not yet implemented")
|
||||
}
|
||||
|
||||
override fun readCodeForGeneration(file: File, name: String, generateHelperCompanionObject: Boolean): Code {
|
||||
TODO("Not yet implemented")
|
||||
}
|
||||
|
||||
override fun acceptsExtension(ext: String): Boolean = ext == "jdbc"
|
||||
|
||||
override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough
|
||||
|
||||
override val testOrder: Int = 40000
|
||||
|
||||
override fun createDefaultReadMethod(pathRepresentation: String?): DefaultReadDfMethod =
|
||||
DefaultReadJdbcMethod(pathRepresentation)
|
||||
}
|
||||
|
||||
private fun DataFrame.Companion.readJDBC(stream: File): DataFrame<*> {
|
||||
TODO("Not yet implemented")
|
||||
}
|
||||
|
||||
private fun DataFrame.Companion.readJDBC(path: Path): DataFrame<*> {
|
||||
TODO("Not yet implemented")
|
||||
}
|
||||
|
||||
private fun DataFrame.Companion.readJDBC(stream: InputStream): DataFrame<*> {
|
||||
TODO("Not yet implemented")
|
||||
}
|
||||
|
||||
internal class DefaultReadJdbcMethod(path: String?) : AbstractDefaultReadMethod(path, MethodArguments.EMPTY, READ_JDBC)
|
||||
|
||||
private const val READ_JDBC = "readJDBC"
|
||||
+561
@@ -0,0 +1,561 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io.db
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.DataColumn
|
||||
import org.jetbrains.kotlinx.dataframe.api.Infer
|
||||
import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig
|
||||
import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
|
||||
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
|
||||
import java.math.BigDecimal
|
||||
import java.sql.Blob
|
||||
import java.sql.Clob
|
||||
import java.sql.Connection
|
||||
import java.sql.DatabaseMetaData
|
||||
import java.sql.DriverManager
|
||||
import java.sql.NClob
|
||||
import java.sql.PreparedStatement
|
||||
import java.sql.Ref
|
||||
import java.sql.ResultSet
|
||||
import java.sql.ResultSetMetaData
|
||||
import java.sql.RowId
|
||||
import java.sql.SQLXML
|
||||
import java.sql.Time
|
||||
import java.sql.Timestamp
|
||||
import java.sql.Types
|
||||
import java.time.LocalDateTime
|
||||
import java.time.OffsetDateTime
|
||||
import java.time.OffsetTime
|
||||
import java.util.Date
|
||||
import java.util.UUID
|
||||
import kotlin.reflect.KClass
|
||||
import kotlin.reflect.KType
|
||||
import kotlin.reflect.full.createType
|
||||
import kotlin.reflect.full.isSupertypeOf
|
||||
import kotlin.reflect.full.safeCast
|
||||
import kotlin.reflect.full.starProjectedType
|
||||
|
||||
/**
|
||||
* The `DbType` class represents a database type used for reading dataframe from the database.
|
||||
*
|
||||
* @property [dbTypeInJdbcUrl] The name of the database as specified in the JDBC URL.
|
||||
*/
|
||||
public abstract class DbType(public val dbTypeInJdbcUrl: String) {
|
||||
/**
|
||||
* Represents the JDBC driver class name for a given database type.
|
||||
*
|
||||
* NOTE: It's important for usage in dataframe-gradle-plugin for force class loading.
|
||||
*
|
||||
* @return The JDBC driver class name as a [String].
|
||||
*/
|
||||
public abstract val driverClassName: String
|
||||
|
||||
/**
|
||||
* The table type(s) (`TABLE_TYPE`) of ordinary tables in the SQL database, used by
|
||||
* [readAllSqlTables], and [readAllSqlTables] as a filter when querying the database
|
||||
* for all the tables it has using [DatabaseMetaData.getTables].
|
||||
*
|
||||
* This is usually "TABLE" or "BASE TABLE", which is what [tableTypes] is set to by default,
|
||||
* but it can be overridden to any custom list of table types, or `null` to let the JDBC integration
|
||||
* return all types of tables.
|
||||
*
|
||||
* See [DatabaseMetaData.getTableTypes] for all supported table types of your specific database.
|
||||
*/
|
||||
public open val tableTypes: List<String>? = listOf("TABLE", "BASE TABLE")
|
||||
|
||||
/**
|
||||
* Specifies the default batch size for fetching rows from the database during query execution.
|
||||
*
|
||||
* This property determines how many rows are fetched in a single batch from the database.
|
||||
* A proper fetch size can improve performance by reducing the number of network round-trips required
|
||||
* when handling large result sets.
|
||||
*
|
||||
* Value is set to 1000 by default, but it can be overridden based on database-specific requirements
|
||||
* or performance considerations.
|
||||
*/
|
||||
public open val defaultFetchSize: Int = 1000
|
||||
|
||||
/**
|
||||
* Specifies the default timeout in seconds for database queries.
|
||||
*
|
||||
* If set to `null`, no timeout is applied, allowing queries to run indefinitely.
|
||||
* This property can be used to set a default query timeout for the database type,
|
||||
* which can help manage long-running queries.
|
||||
*/
|
||||
public open val defaultQueryTimeout: Int? = null // null = no timeout
|
||||
|
||||
/**
|
||||
* Returns a [ColumnSchema] produced from [tableColumnMetadata].
|
||||
*/
|
||||
public abstract fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema?
|
||||
|
||||
/**
|
||||
* Checks if the given table name is a system table for the specified database type.
|
||||
*
|
||||
* @param [tableMetadata] the table object representing the table from the database.
|
||||
* @return True if the table is a system table for the specified database type, false otherwise.
|
||||
*/
|
||||
public abstract fun isSystemTable(tableMetadata: TableMetadata): Boolean
|
||||
|
||||
/**
|
||||
* Builds the table metadata based on the database type and the ResultSet from the query.
|
||||
*
|
||||
* @param [tables] the ResultSet containing the table's meta-information.
|
||||
* @return the TableMetadata object representing the table metadata.
|
||||
*/
|
||||
public abstract fun buildTableMetadata(tables: ResultSet): TableMetadata
|
||||
|
||||
/**
|
||||
* Converts SQL data type to a Kotlin data type.
|
||||
*
|
||||
* @param [tableColumnMetadata] The metadata of the table column.
|
||||
* @return The corresponding Kotlin data type, or null if no mapping is found.
|
||||
*/
|
||||
public abstract fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType?
|
||||
|
||||
/**
|
||||
* Builds a SELECT query for reading from a table.
|
||||
*
|
||||
* @param [tableName] the name of the table to query.
|
||||
* @param [limit] the maximum number of rows to retrieve. If 0 or negative, no limit is applied.
|
||||
* @return the SQL query string.
|
||||
*/
|
||||
public open fun buildSelectTableQueryWithLimit(tableName: String, limit: Int?): String {
|
||||
require(tableName.isNotBlank()) { "Table name cannot be blank" }
|
||||
|
||||
val quotedTableName = quoteIdentifier(tableName)
|
||||
|
||||
return if (limit != null && limit > 0) {
|
||||
buildSqlQueryWithLimit("SELECT * FROM $quotedTableName", limit)
|
||||
} else {
|
||||
"SELECT * FROM $quotedTableName"
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Configures the provided `PreparedStatement` for optimized read operations.
|
||||
*
|
||||
* This method sets the fetch size for efficient streaming, applies a query timeout if specified,
|
||||
* and configures the fetch direction to forward-only for better performance in read-only operations.
|
||||
*
|
||||
* @param statement the `PreparedStatement` to be configured
|
||||
*/
|
||||
public open fun configureReadStatement(statement: PreparedStatement) {
|
||||
// Set fetch size for better streaming performance
|
||||
statement.fetchSize = defaultFetchSize
|
||||
|
||||
defaultQueryTimeout?.let {
|
||||
statement.queryTimeout = it
|
||||
}
|
||||
|
||||
// Set the fetch direction (forward-only for read-only operations)
|
||||
statement.fetchDirection = ResultSet.FETCH_FORWARD
|
||||
}
|
||||
|
||||
/**
|
||||
* Quotes an identifier (table or column name) according to database-specific rules.
|
||||
*
|
||||
* Examples:
|
||||
* - PostgreSQL: "tableName" or "schema"."table"
|
||||
* - MySQL: `tableName` or `schema`.`table`
|
||||
* - MS SQL: `[tableName]` or `[schema].[table]`
|
||||
* - SQLite/H2: no quotes for simple names
|
||||
*
|
||||
* @param [name] the identifier to quote (can contain dots for schema.table).
|
||||
* @return the quoted identifier.
|
||||
*/
|
||||
public open fun quoteIdentifier(name: String): String {
|
||||
require(name.isNotBlank()) { "Identifier cannot be blank" }
|
||||
|
||||
// Default: no quoting (works for SQLite, H2, simple names)
|
||||
return name
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a SQL query with a limit clause.
|
||||
*
|
||||
* @param sqlQuery The original SQL query.
|
||||
* @param limit The maximum number of rows to retrieve from the query. Default is 1.
|
||||
* @return A new SQL query with the limit clause added.
|
||||
*/
|
||||
public open fun buildSqlQueryWithLimit(sqlQuery: String, limit: Int = 1): String = "$sqlQuery LIMIT $limit"
|
||||
|
||||
/**
|
||||
* Creates a database connection using the provided configuration.
|
||||
* This method is only called when working with [DbConnectionConfig] (internally managed connections).
|
||||
*
|
||||
* Some databases (like [Sqlite]) require read-only mode to be set during connection creation
|
||||
* rather than after the connection is established.
|
||||
*
|
||||
* @param [dbConfig] The database configuration containing URL, credentials, and read-only flag.
|
||||
* @return A configured [Connection] instance.
|
||||
*/
|
||||
public open fun createConnection(dbConfig: DbConnectionConfig): Connection {
|
||||
val connection = DriverManager.getConnection(dbConfig.url, dbConfig.user, dbConfig.password)
|
||||
if (dbConfig.readOnly) {
|
||||
connection.isReadOnly = true
|
||||
}
|
||||
return connection
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts a value from the ResultSet for the given column.
|
||||
* This method can be overridden by custom database types to provide specialized parsing logic.
|
||||
*
|
||||
* @param [rs] the ResultSet to read from
|
||||
* @param [columnIndex] zero-based column index
|
||||
* @param [columnMetadata] metadata for the column
|
||||
* @param [kType] the Kotlin type for this column
|
||||
* @return the extracted value, or null
|
||||
*/
|
||||
public open fun extractValueFromResultSet(
|
||||
rs: ResultSet,
|
||||
columnIndex: Int,
|
||||
columnMetadata: TableColumnMetadata,
|
||||
kType: KType,
|
||||
): Any? =
|
||||
try {
|
||||
rs.getObject(columnIndex + 1)
|
||||
// TODO: add a special handler for Blob via Streams
|
||||
} catch (_: Throwable) {
|
||||
// TODO: expand for all the types like in generateKType function
|
||||
if (kType.isSupertypeOf(String::class.starProjectedType)) {
|
||||
rs.getString(columnIndex + 1)
|
||||
} else {
|
||||
rs.getString(columnIndex + 1)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a single DataColumn with proper type handling.
|
||||
* Accepts a mutable list to allow efficient post-processing.
|
||||
*/
|
||||
public open fun buildDataColumn(
|
||||
name: String,
|
||||
values: MutableList<Any?>,
|
||||
kType: KType,
|
||||
inferNullability: Boolean,
|
||||
): DataColumn<*> {
|
||||
val correctedValues = postProcessColumnValues(values, kType)
|
||||
|
||||
return DataColumn.createValueColumn(
|
||||
name = name,
|
||||
values = correctedValues,
|
||||
infer = convertNullabilityInference(inferNullability),
|
||||
type = kType,
|
||||
)
|
||||
}
|
||||
|
||||
private fun convertNullabilityInference(inferNullability: Boolean) =
|
||||
if (inferNullability) Infer.Nulls else Infer.None
|
||||
|
||||
/**
|
||||
* Processes the column values retrieved from the database and performs transformations based on the provided
|
||||
* Kotlin type and column metadata. The method allows for custom post-processing logic, such as handling
|
||||
* specific database column types, including arrays.
|
||||
*
|
||||
* @param values the list of raw values retrieved from the database for the column.
|
||||
* @param kType the Kotlin type that the column values should be transformed to.
|
||||
* @return a list of processed column values, with transformations applied where necessary, or the original list if no transformation is needed.
|
||||
*/
|
||||
private fun postProcessColumnValues(values: MutableList<Any?>, kType: KType): List<Any?> =
|
||||
when {
|
||||
/* EXAMPLE: columnMetadata.sqlTypeName == "MY_CUSTOM_ARRAY" -> {
|
||||
values.map { /* custom transformation */ }
|
||||
} */
|
||||
kType.classifier == Array::class -> {
|
||||
handleArrayValues(values)
|
||||
}
|
||||
|
||||
else -> values
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts SQL Array objects to strongly-typed arrays.
|
||||
*
|
||||
* Extracts arrays from SQL Array objects and converts them to a consistent type
|
||||
* if all elements share the same type. Returns original arrays if types vary.
|
||||
*
|
||||
* @param values raw values containing SQL Array objects
|
||||
* @return list of consistently typed arrays, or original arrays if no common type exists
|
||||
*/
|
||||
private fun handleArrayValues(values: MutableList<Any?>): List<Any> {
|
||||
// Intermediate variable for the first mapping
|
||||
val sqlArrays = values.mapNotNull {
|
||||
(it as? java.sql.Array)?.array?.let { array -> array as? Array<*> }
|
||||
}
|
||||
|
||||
// Flatten the arrays to iterate through all elements and filter out null values, then map to component types
|
||||
val allElementTypes = sqlArrays
|
||||
.flatMap { array ->
|
||||
(array.javaClass.componentType?.kotlin?.let { listOf(it) } ?: emptyList())
|
||||
} // Get the component type of each array and convert it to a Kotlin class, if available
|
||||
|
||||
// Find distinct types and ensure there's only one distinct type
|
||||
val commonElementType = allElementTypes
|
||||
.distinct() // Get unique element types
|
||||
.singleOrNull() // Ensure there's only one unique element type, otherwise return null
|
||||
?: Any::class // Fallback to Any::class if multiple distinct types or no elements found
|
||||
|
||||
return if (commonElementType != Any::class) {
|
||||
sqlArrays.map { castArray(it, commonElementType).toTypedArray() }
|
||||
} else {
|
||||
sqlArrays
|
||||
}
|
||||
}
|
||||
|
||||
/** Utility function to cast arrays based on the type of elements */
|
||||
private fun <T : Any> castArray(array: Array<*>, elementType: KClass<T>): List<T> =
|
||||
array.mapNotNull { elementType.safeCast(it) }
|
||||
|
||||
/**
|
||||
* Creates a mapping between common SQL types and their corresponding KTypes.
|
||||
*
|
||||
* @param tableColumnMetadata The metadata of the table column.
|
||||
* @return The KType associated with the SQL type or a default type if no mapping is found.
|
||||
*/
|
||||
public open fun makeCommonSqlToKTypeMapping(tableColumnMetadata: TableColumnMetadata): KType {
|
||||
val jdbcTypeToKTypeMapping = mapOf(
|
||||
Types.BIT to Boolean::class,
|
||||
Types.TINYINT to Int::class,
|
||||
Types.SMALLINT to Int::class,
|
||||
Types.INTEGER to Int::class,
|
||||
Types.BIGINT to Long::class,
|
||||
Types.FLOAT to Float::class,
|
||||
Types.REAL to Float::class,
|
||||
Types.DOUBLE to Double::class,
|
||||
Types.NUMERIC to BigDecimal::class,
|
||||
Types.DECIMAL to BigDecimal::class,
|
||||
Types.CHAR to String::class,
|
||||
Types.VARCHAR to String::class,
|
||||
Types.LONGVARCHAR to String::class,
|
||||
Types.DATE to Date::class,
|
||||
Types.TIME to Time::class,
|
||||
Types.TIMESTAMP to Timestamp::class,
|
||||
Types.BINARY to ByteArray::class,
|
||||
Types.VARBINARY to ByteArray::class,
|
||||
Types.LONGVARBINARY to ByteArray::class,
|
||||
Types.NULL to String::class,
|
||||
Types.JAVA_OBJECT to Any::class,
|
||||
Types.DISTINCT to Any::class,
|
||||
Types.STRUCT to Any::class,
|
||||
Types.ARRAY to Array::class,
|
||||
Types.BLOB to ByteArray::class,
|
||||
Types.CLOB to Clob::class,
|
||||
Types.REF to Ref::class,
|
||||
Types.DATALINK to Any::class,
|
||||
Types.BOOLEAN to Boolean::class,
|
||||
Types.ROWID to RowId::class,
|
||||
Types.NCHAR to String::class,
|
||||
Types.NVARCHAR to String::class,
|
||||
Types.LONGNVARCHAR to String::class,
|
||||
Types.NCLOB to NClob::class,
|
||||
Types.SQLXML to SQLXML::class,
|
||||
Types.REF_CURSOR to Ref::class,
|
||||
Types.TIME_WITH_TIMEZONE to OffsetTime::class,
|
||||
Types.TIMESTAMP_WITH_TIMEZONE to OffsetDateTime::class,
|
||||
)
|
||||
|
||||
fun determineKotlinClass(tableColumnMetadata: TableColumnMetadata): KClass<*> =
|
||||
when {
|
||||
tableColumnMetadata.jdbcType == Types.OTHER -> when (tableColumnMetadata.javaClassName) {
|
||||
"[B" -> ByteArray::class
|
||||
else -> Any::class
|
||||
}
|
||||
|
||||
tableColumnMetadata.javaClassName == "[B" -> ByteArray::class
|
||||
|
||||
tableColumnMetadata.javaClassName == "java.sql.Blob" -> Blob::class
|
||||
|
||||
tableColumnMetadata.jdbcType == Types.TIMESTAMP &&
|
||||
tableColumnMetadata.javaClassName == "java.time.LocalDateTime" -> LocalDateTime::class
|
||||
|
||||
tableColumnMetadata.jdbcType == Types.BINARY &&
|
||||
tableColumnMetadata.javaClassName == "java.util.UUID" -> UUID::class
|
||||
|
||||
tableColumnMetadata.jdbcType == Types.REAL &&
|
||||
tableColumnMetadata.javaClassName == "java.lang.Double" -> Double::class
|
||||
|
||||
tableColumnMetadata.jdbcType == Types.FLOAT &&
|
||||
tableColumnMetadata.javaClassName == "java.lang.Double" -> Double::class
|
||||
|
||||
tableColumnMetadata.jdbcType == Types.NUMERIC &&
|
||||
tableColumnMetadata.javaClassName == "java.lang.Double" -> Double::class
|
||||
|
||||
// Force BIGINT to always be Long, regardless of javaClassName
|
||||
// Some JDBC drivers (e.g., MariaDB) may report Integer for small BIGINT values
|
||||
// TODO: tableColumnMetadata.jdbcType == Types.BIGINT -> Long::class
|
||||
|
||||
else -> jdbcTypeToKTypeMapping[tableColumnMetadata.jdbcType] ?: String::class
|
||||
}
|
||||
|
||||
fun createArrayTypeIfNeeded(kClass: KClass<*>, isNullable: Boolean): KType =
|
||||
if (kClass == Array::class) {
|
||||
val typeParam = kClass.typeParameters[0].createType()
|
||||
kClass.createType(
|
||||
arguments = listOf(kotlin.reflect.KTypeProjection.invariant(typeParam)),
|
||||
nullable = isNullable,
|
||||
)
|
||||
} else {
|
||||
kClass.createType(nullable = isNullable)
|
||||
}
|
||||
|
||||
val kClass: KClass<*> = determineKotlinClass(tableColumnMetadata)
|
||||
val kType = createArrayTypeIfNeeded(kClass, tableColumnMetadata.isNullable)
|
||||
return kType
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves column metadata from a JDBC ResultSet.
|
||||
*
|
||||
* This method reads column metadata from [ResultSetMetaData] with graceful fallbacks
|
||||
* for JDBC drivers that throw [java.sql.SQLFeatureNotSupportedException] for certain methods
|
||||
* (e.g., Apache Hive).
|
||||
*
|
||||
* Fallback behavior for unsupported methods:
|
||||
* - `getColumnName()` → `getColumnLabel()` → `"column_N"`
|
||||
* - `getTableName()` → extract from column name if contains '.' → `null`
|
||||
* - `isNullable()` → [DatabaseMetaData.getColumns] → `true` (assume nullable)
|
||||
* - `getColumnTypeName()` → `"OTHER"`
|
||||
* - `getColumnType()` → [java.sql.Types.OTHER]
|
||||
* - `getColumnDisplaySize()` → `0`
|
||||
* - `getColumnClassName()` → `"java.lang.Object"`
|
||||
*
|
||||
* Override this method in subclasses to provide database-specific behavior
|
||||
* (for example, to disable fallback for databases like Teradata or Oracle
|
||||
* where [DatabaseMetaData.getColumns] is known to be slow).
|
||||
*
|
||||
* @param resultSet The [ResultSet] containing query results.
|
||||
* @return A list of [TableColumnMetadata] objects.
|
||||
*/
|
||||
public open fun getTableColumnsMetadata(resultSet: ResultSet): List<TableColumnMetadata> {
|
||||
val rsMetaData = resultSet.metaData
|
||||
val connection = resultSet.statement.connection
|
||||
val dbMetaData = connection.metaData
|
||||
|
||||
// Some JDBC drivers (e.g., Hive) throw SQLFeatureNotSupportedException
|
||||
val catalog = try {
|
||||
connection.catalog.takeUnless { it.isNullOrBlank() }
|
||||
} catch (_: Exception) {
|
||||
null
|
||||
}
|
||||
|
||||
val schema = try {
|
||||
connection.schema.takeUnless { it.isNullOrBlank() }
|
||||
} catch (_: Exception) {
|
||||
null
|
||||
}
|
||||
|
||||
val columnCount = rsMetaData.columnCount
|
||||
val columns = mutableListOf<TableColumnMetadata>()
|
||||
val nameCounter = mutableMapOf<String, Int>()
|
||||
|
||||
for (index in 1..columnCount) {
|
||||
// Try to getColumnName, fallback to getColumnLabel, then generate name
|
||||
val columnName = try {
|
||||
rsMetaData.getColumnName(index)
|
||||
} catch (_: Exception) {
|
||||
try {
|
||||
rsMetaData.getColumnLabel(index)
|
||||
} catch (_: Exception) {
|
||||
"column$index"
|
||||
}
|
||||
}
|
||||
|
||||
// Some JDBC drivers (e.g., Apache Hive) throw SQLFeatureNotSupportedException
|
||||
val tableName = try {
|
||||
rsMetaData.getTableName(index).takeUnless { it.isBlank() }
|
||||
} catch (_: Exception) {
|
||||
// Fallback: try to extract table name from column name if it contains '.'
|
||||
val dotIndex = columnName.lastIndexOf('.')
|
||||
if (dotIndex > 0) columnName.take(dotIndex) else null
|
||||
}
|
||||
|
||||
// Try to detect nullability from ResultSetMetaData
|
||||
val isNullable = try {
|
||||
when (rsMetaData.isNullable(index)) {
|
||||
ResultSetMetaData.columnNoNulls -> false
|
||||
|
||||
ResultSetMetaData.columnNullable -> true
|
||||
|
||||
// Unknown nullability: assume it nullable, may trigger fallback
|
||||
ResultSetMetaData.columnNullableUnknown -> true
|
||||
|
||||
else -> true
|
||||
}
|
||||
} catch (_: Exception) {
|
||||
// Some drivers may throw for unsupported features
|
||||
// Try fallback to DatabaseMetaData, with additional safety
|
||||
try {
|
||||
dbMetaData.getColumns(catalog, schema, tableName, columnName).use { cols ->
|
||||
if (cols.next()) !cols.getString("IS_NULLABLE").equals("NO", ignoreCase = true) else true
|
||||
}
|
||||
} catch (_: Exception) {
|
||||
// Fallback failed, assume nullable as the safest default
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
// adding fallbacks to avoid SQLException
|
||||
val columnType = try {
|
||||
rsMetaData.getColumnTypeName(index)
|
||||
} catch (_: Exception) {
|
||||
"OTHER"
|
||||
}
|
||||
|
||||
val jdbcType = try {
|
||||
rsMetaData.getColumnType(index)
|
||||
} catch (_: Exception) {
|
||||
Types.OTHER
|
||||
}
|
||||
|
||||
val displaySize = try {
|
||||
rsMetaData.getColumnDisplaySize(index)
|
||||
} catch (_: Exception) {
|
||||
0
|
||||
}
|
||||
|
||||
val javaClassName = try {
|
||||
rsMetaData.getColumnClassName(index)
|
||||
} catch (_: Exception) {
|
||||
"java.lang.Object"
|
||||
}
|
||||
|
||||
val uniqueName = manageColumnNameDuplication(nameCounter, columnName)
|
||||
|
||||
columns += TableColumnMetadata(
|
||||
uniqueName,
|
||||
columnType,
|
||||
jdbcType,
|
||||
displaySize,
|
||||
javaClassName,
|
||||
isNullable,
|
||||
)
|
||||
}
|
||||
|
||||
return columns
|
||||
}
|
||||
|
||||
/**
|
||||
* Manages the duplication of column names by appending a unique identifier to the original name if necessary.
|
||||
*
|
||||
* @param columnNameCounter a mutable map that keeps track of the count for each column name.
|
||||
* @param originalName the original name of the column to be managed.
|
||||
* @return the modified column name that is free from duplication.
|
||||
*/
|
||||
internal fun manageColumnNameDuplication(columnNameCounter: MutableMap<String, Int>, originalName: String): String {
|
||||
var name = originalName
|
||||
val count = columnNameCounter[originalName]
|
||||
|
||||
if (count != null) {
|
||||
var incrementedCount = count + 1
|
||||
while (columnNameCounter.containsKey("${originalName}_$incrementedCount")) {
|
||||
incrementedCount++
|
||||
}
|
||||
columnNameCounter[originalName] = incrementedCount
|
||||
name = "${originalName}_$incrementedCount"
|
||||
} else {
|
||||
columnNameCounter[originalName] = 0
|
||||
}
|
||||
|
||||
return name
|
||||
}
|
||||
}
|
||||
+258
@@ -0,0 +1,258 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io.db
|
||||
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import org.duckdb.DuckDBColumnType
|
||||
import org.duckdb.DuckDBColumnType.ARRAY
|
||||
import org.duckdb.DuckDBColumnType.BIGINT
|
||||
import org.duckdb.DuckDBColumnType.BIT
|
||||
import org.duckdb.DuckDBColumnType.BLOB
|
||||
import org.duckdb.DuckDBColumnType.BOOLEAN
|
||||
import org.duckdb.DuckDBColumnType.DATE
|
||||
import org.duckdb.DuckDBColumnType.DECIMAL
|
||||
import org.duckdb.DuckDBColumnType.DOUBLE
|
||||
import org.duckdb.DuckDBColumnType.ENUM
|
||||
import org.duckdb.DuckDBColumnType.FLOAT
|
||||
import org.duckdb.DuckDBColumnType.HUGEINT
|
||||
import org.duckdb.DuckDBColumnType.INTEGER
|
||||
import org.duckdb.DuckDBColumnType.INTERVAL
|
||||
import org.duckdb.DuckDBColumnType.JSON
|
||||
import org.duckdb.DuckDBColumnType.LIST
|
||||
import org.duckdb.DuckDBColumnType.MAP
|
||||
import org.duckdb.DuckDBColumnType.SMALLINT
|
||||
import org.duckdb.DuckDBColumnType.STRUCT
|
||||
import org.duckdb.DuckDBColumnType.TIME
|
||||
import org.duckdb.DuckDBColumnType.TIMESTAMP
|
||||
import org.duckdb.DuckDBColumnType.TIMESTAMP_MS
|
||||
import org.duckdb.DuckDBColumnType.TIMESTAMP_NS
|
||||
import org.duckdb.DuckDBColumnType.TIMESTAMP_S
|
||||
import org.duckdb.DuckDBColumnType.TIMESTAMP_WITH_TIME_ZONE
|
||||
import org.duckdb.DuckDBColumnType.TIME_WITH_TIME_ZONE
|
||||
import org.duckdb.DuckDBColumnType.TINYINT
|
||||
import org.duckdb.DuckDBColumnType.UBIGINT
|
||||
import org.duckdb.DuckDBColumnType.UHUGEINT
|
||||
import org.duckdb.DuckDBColumnType.UINTEGER
|
||||
import org.duckdb.DuckDBColumnType.UNION
|
||||
import org.duckdb.DuckDBColumnType.UNKNOWN
|
||||
import org.duckdb.DuckDBColumnType.USMALLINT
|
||||
import org.duckdb.DuckDBColumnType.UTINYINT
|
||||
import org.duckdb.DuckDBColumnType.UUID
|
||||
import org.duckdb.DuckDBColumnType.VARCHAR
|
||||
import org.duckdb.DuckDBResultSetMetaData
|
||||
import org.duckdb.JsonNode
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.DuckDb.convertSqlTypeToKType
|
||||
import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
|
||||
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
|
||||
import java.math.BigDecimal
|
||||
import java.math.BigInteger
|
||||
import java.sql.Array
|
||||
import java.sql.Blob
|
||||
import java.sql.Connection
|
||||
import java.sql.DatabaseMetaData
|
||||
import java.sql.DriverManager
|
||||
import java.sql.ResultSet
|
||||
import java.sql.Struct
|
||||
import java.sql.Timestamp
|
||||
import java.time.LocalDate
|
||||
import java.time.LocalTime
|
||||
import java.time.OffsetDateTime
|
||||
import java.time.OffsetTime
|
||||
import java.util.Properties
|
||||
import java.util.UUID
|
||||
import kotlin.reflect.KType
|
||||
import kotlin.reflect.KTypeProjection
|
||||
import kotlin.reflect.full.createType
|
||||
import kotlin.reflect.full.withNullability
|
||||
import kotlin.reflect.typeOf
|
||||
|
||||
private val logger = KotlinLogging.logger {}
|
||||
|
||||
/**
|
||||
* Represents the [DuckDB](http://duckdb.org/) database type.
|
||||
*
|
||||
* This class provides methods to convert data from a [ResultSet] to the appropriate type for DuckDB,
|
||||
* and to generate the corresponding [column schema][ColumnSchema].
|
||||
*/
|
||||
public object DuckDb : DbType("duckdb") {
|
||||
|
||||
/** the name of the class of the DuckDB JDBC driver */
|
||||
override val driverClassName: String = "org.duckdb.DuckDBDriver"
|
||||
|
||||
/**
|
||||
* How a column type from JDBC, [tableColumnMetadata], is read in Java/Kotlin.
|
||||
* The returned type must exactly follow [ResultSet.getObject] of your specific database's JDBC driver.
|
||||
* Returning `null` defer the implementation to the default one (which may not always be correct).
|
||||
*
|
||||
* Following [org.duckdb.DuckDBVector.getObject].
|
||||
*/
|
||||
override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType =
|
||||
tableColumnMetadata.sqlTypeName.toKType(tableColumnMetadata.isNullable)
|
||||
|
||||
/**
|
||||
* How a column from JDBC should be represented as DataFrame (value) column
|
||||
* See [convertSqlTypeToKType].
|
||||
*/
|
||||
override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema {
|
||||
val type = convertSqlTypeToKType(tableColumnMetadata)
|
||||
return ColumnSchema.Value(type)
|
||||
}
|
||||
|
||||
/**
|
||||
* Follows exactly [org.duckdb.DuckDBVector.getObject].
|
||||
*
|
||||
* "// dataframe-jdbc" is added for all types that are covered correctly by
|
||||
* [org.jetbrains.kotlinx.dataframe.io.db.DbType.makeCommonSqlToKTypeMapping] at the moment, however, to cover
|
||||
* all nested types, we'll use a full type-map for all [DuckDB types][DuckDBColumnType] exactly.
|
||||
*/
|
||||
@Suppress("ktlint:standard:blank-line-between-when-conditions")
|
||||
internal fun String.toKType(isNullable: Boolean): KType {
|
||||
val sqlTypeName = this
|
||||
return when (DuckDBResultSetMetaData.TypeNameToType(sqlTypeName)) {
|
||||
BOOLEAN -> typeOf<Boolean>() // dataframe-jdbc
|
||||
TINYINT -> typeOf<Byte>()
|
||||
SMALLINT -> typeOf<Short>()
|
||||
INTEGER -> typeOf<Int>() // dataframe-jdbc
|
||||
BIGINT -> typeOf<Long>() // dataframe-jdbc
|
||||
HUGEINT -> typeOf<BigInteger>()
|
||||
UHUGEINT -> typeOf<BigInteger>()
|
||||
UTINYINT -> typeOf<Short>()
|
||||
USMALLINT -> typeOf<Int>()
|
||||
UINTEGER -> typeOf<Long>()
|
||||
UBIGINT -> typeOf<BigInteger>()
|
||||
FLOAT -> typeOf<Float>() // dataframe-jdbc
|
||||
DOUBLE -> typeOf<Double>() // dataframe-jdbc
|
||||
DECIMAL -> typeOf<BigDecimal>() // dataframe-jdbc
|
||||
TIME -> typeOf<LocalTime>()
|
||||
TIME_WITH_TIME_ZONE -> typeOf<OffsetTime>() // dataframe-jdbc
|
||||
DATE -> typeOf<LocalDate>()
|
||||
TIMESTAMP, TIMESTAMP_MS, TIMESTAMP_NS, TIMESTAMP_S -> typeOf<Timestamp>() // dataframe-jdbc
|
||||
TIMESTAMP_WITH_TIME_ZONE -> typeOf<OffsetDateTime>() // dataframe-jdbc
|
||||
JSON -> typeOf<JsonNode>()
|
||||
BLOB -> typeOf<Blob>()
|
||||
UUID -> typeOf<UUID>()
|
||||
MAP -> {
|
||||
val (key, value) = parseMapTypes(sqlTypeName)
|
||||
Map::class.createType(
|
||||
listOf(
|
||||
KTypeProjection.invariant(key.toKType(false)),
|
||||
KTypeProjection.invariant(value.toKType(true)),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
LIST, ARRAY -> {
|
||||
// TODO requires #1266 and #1273 for specific types
|
||||
// val listType = parseListType(sqlTypeName)
|
||||
// Array::class.createType(
|
||||
// listOf(KTypeProjection.invariant(listType.toKType(true))),
|
||||
// )
|
||||
typeOf<Array>()
|
||||
}
|
||||
|
||||
STRUCT -> typeOf<Struct>() // TODO requires #1266 for specific types
|
||||
UNION -> typeOf<Any>() // Cannot handle this in Kotlin
|
||||
VARCHAR -> typeOf<String>()
|
||||
UNKNOWN, BIT, INTERVAL, ENUM -> typeOf<String>()
|
||||
}.withNullability(isNullable)
|
||||
}
|
||||
|
||||
/** Parses "MAP(X, Y)" into "X" and "Y", taking parentheses into account */
|
||||
internal fun parseMapTypes(typeString: String): Pair<String, String> {
|
||||
if (!typeString.startsWith("MAP(") || !typeString.endsWith(")")) {
|
||||
error("invalid MAP type: $typeString")
|
||||
}
|
||||
|
||||
val content = typeString.removeSurrounding("MAP(", ")")
|
||||
|
||||
// Find the comma that separates key and value types
|
||||
var parenCount = 0
|
||||
var commaIndex = -1
|
||||
for (i in content.indices) {
|
||||
when (content[i]) {
|
||||
'(' -> parenCount++
|
||||
|
||||
')' -> parenCount--
|
||||
|
||||
',' -> if (parenCount == 0) {
|
||||
commaIndex = i
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (commaIndex == -1) error("invalid MAP type: $typeString")
|
||||
val keyType = content.take(commaIndex).trim()
|
||||
val valueType = content.substring(commaIndex + 1).trim()
|
||||
return Pair(keyType, valueType)
|
||||
}
|
||||
|
||||
/** Parses "X[]" and "X[123]" into "X", and "X[][]" into "X[]" */
|
||||
internal fun parseListType(typeString: String): String {
|
||||
if (!typeString.endsWith("]")) {
|
||||
error("invalid LIST/ARRAY type: $typeString")
|
||||
}
|
||||
|
||||
return typeString.take(typeString.indexOfLast { it == '[' })
|
||||
}
|
||||
|
||||
/**
|
||||
* How to filter out system tables from user-created ones when using
|
||||
* [DataFrame.readAllSqlTables][DataFrame.Companion.readAllSqlTables] and
|
||||
* [DataFrameSchema.readAllSqlTables][org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema.Companion.readAllSqlTables].
|
||||
*
|
||||
* The names of these can sometimes be found in the specific JDBC integration.
|
||||
*/
|
||||
override fun isSystemTable(tableMetadata: TableMetadata): Boolean =
|
||||
tableMetadata.schemaName?.lowercase()?.contains("information_schema") == true ||
|
||||
tableMetadata.schemaName?.lowercase()?.contains("system") == true ||
|
||||
tableMetadata.name.lowercase().contains("system_")
|
||||
|
||||
/**
|
||||
* How to retrieve the correct table metadata when using
|
||||
* [DataFrame.readAllSqlTables][DataFrame.Companion.readAllSqlTables] and
|
||||
* [DataFrameSchema.readAllSqlTables][org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema.Companion.readAllSqlTables].
|
||||
* The names of these can be found in the [DatabaseMetaData] implementation of the DuckDB JDBC integration.
|
||||
*/
|
||||
override fun buildTableMetadata(tables: ResultSet): TableMetadata =
|
||||
TableMetadata(
|
||||
tables.getString("TABLE_NAME"),
|
||||
tables.getString("TABLE_SCHEM"),
|
||||
tables.getString("TABLE_CAT"),
|
||||
)
|
||||
|
||||
/**
|
||||
* Creates a database connection using the provided configuration.
|
||||
*
|
||||
* DuckDB does not support changing read-only status after connection creation,
|
||||
* but supports read-only mode through connection parameters.
|
||||
*
|
||||
* @param [dbConfig] The database configuration containing URL, credentials, and read-only flag.
|
||||
* @return A configured [java.sql.Connection] instance.
|
||||
*/
|
||||
override fun createConnection(dbConfig: DbConnectionConfig): Connection {
|
||||
val properties = Properties().apply {
|
||||
dbConfig.user.takeIf { it.isNotEmpty() }?.let { setProperty("user", it) }
|
||||
dbConfig.password.takeIf { it.isNotEmpty() }?.let { setProperty("password", it) }
|
||||
|
||||
// Handle DuckDB limitation: in-memory databases cannot be opened in read-only mode
|
||||
if (dbConfig.readOnly && !dbConfig.url.isInMemoryDuckDb()) {
|
||||
setProperty("access_mode", "read_only")
|
||||
} else if (dbConfig.readOnly) {
|
||||
logger.warn {
|
||||
"Cannot create read-only in-memory DuckDB database (url=${dbConfig.url}). " +
|
||||
"In-memory databases require write access for initialization. Connection will be created without read-only mode."
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return DriverManager.getConnection(dbConfig.url, properties)
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the DuckDB URL represents an in-memory database.
|
||||
* In-memory DuckDB URLs are either "jdbc:duckdb:" or "jdbc:duckdb:" followed only by whitespace.
|
||||
*/
|
||||
private fun String.isInMemoryDuckDb(): Boolean =
|
||||
this.trim() == "jdbc:duckdb:" || matches("jdbc:duckdb:\\s*$".toRegex())
|
||||
}
|
||||
+154
@@ -0,0 +1,154 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io.db
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
|
||||
import java.sql.ResultSet
|
||||
import java.util.Locale
|
||||
import kotlin.reflect.KType
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.MariaDb as MariaDbType
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.MsSql as MsSqlType
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.MySql as MySqlType
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.PostgreSql as PostgreSqlType
|
||||
|
||||
/**
|
||||
* Represents the H2 database type.
|
||||
*
|
||||
* This class provides methods to convert data from a ResultSet to the appropriate type for H2
|
||||
* and to generate the corresponding column schema.
|
||||
*
|
||||
* NOTE: All date and timestamp-related types are converted to String to avoid java.sql.* types.
|
||||
*/
|
||||
|
||||
public open class H2(public val mode: Mode = Mode.Regular) : DbType("h2") {
|
||||
@Deprecated("Use H2(mode = Mode.XXX) instead", ReplaceWith("H2(H2.Mode.MySql)"))
|
||||
public constructor(dialect: DbType) : this(
|
||||
Mode.fromDbType(dialect)
|
||||
?: throw IllegalArgumentException("H2 database could not be specified with H2 dialect!"),
|
||||
)
|
||||
|
||||
private val delegate: DbType? = mode.toDbType()
|
||||
|
||||
/**
|
||||
* Represents the compatibility modes supported by an H2 database.
|
||||
*
|
||||
* @property value The string value used in H2 JDBC URL and settings.
|
||||
*/
|
||||
public enum class Mode(public val value: String) {
|
||||
/** Native H2 mode (no compatibility), our synthetic marker. */
|
||||
Regular("H2-Regular"),
|
||||
MySql("MySQL"),
|
||||
PostgreSql("PostgreSQL"),
|
||||
MsSqlServer("MSSQLServer"),
|
||||
MariaDb("MariaDB"), ;
|
||||
|
||||
/**
|
||||
* Converts this Mode to the corresponding DbType delegate.
|
||||
*
|
||||
* @return The DbType for this mode, or null for Regular mode.
|
||||
*/
|
||||
public fun toDbType(): DbType? =
|
||||
when (this) {
|
||||
Regular -> null
|
||||
MySql -> MySqlType
|
||||
PostgreSql -> PostgreSqlType
|
||||
MsSqlServer -> MsSqlType
|
||||
MariaDb -> MariaDbType
|
||||
}
|
||||
|
||||
public companion object {
|
||||
/**
|
||||
* Creates a Mode from the given DbType.
|
||||
*
|
||||
* @param dialect The DbType to convert.
|
||||
* @return The corresponding Mode, or null if the dialect is H2.
|
||||
*/
|
||||
public fun fromDbType(dialect: DbType): Mode? =
|
||||
when (dialect) {
|
||||
is H2 -> null
|
||||
MySqlType -> MySql
|
||||
PostgreSqlType -> PostgreSql
|
||||
MsSqlType -> MsSqlServer
|
||||
MariaDbType -> MariaDb
|
||||
else -> Regular
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds a Mode by its string value (case-insensitive).
|
||||
* Handles both URL values (MySQL, PostgreSQL, etc.) and
|
||||
* INFORMATION_SCHEMA values (Regular).
|
||||
*
|
||||
* @param value The string value to search for.
|
||||
* @return The matching Mode, or null if not found.
|
||||
*/
|
||||
public fun fromValue(value: String): Mode? {
|
||||
// "Regular" from INFORMATION_SCHEMA or "H2-Regular" from URL
|
||||
if (value.equals("regular", ignoreCase = true) ||
|
||||
value.equals("h2-regular", ignoreCase = true)
|
||||
) {
|
||||
return Regular
|
||||
}
|
||||
return entries.find { it.value.equals(value, ignoreCase = true) }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* It contains constants related to different database modes.
|
||||
*
|
||||
* The mode value is used in the [extractDBTypeFromConnection] function to determine the corresponding `DbType` for the H2 database connection URL.
|
||||
* For example, if the URL contains the mode value "MySQL", the H2 instance with the MySQL database type is returned.
|
||||
* Otherwise, the `DbType` is determined based on the URL without the mode value.
|
||||
*
|
||||
* @see [extractDBTypeFromConnection]
|
||||
* @see [createH2Instance]
|
||||
*/
|
||||
public companion object {
|
||||
|
||||
@Deprecated("Use Mode.MySql.value instead", ReplaceWith("Mode.MySql.value"))
|
||||
public const val MODE_MYSQL: String = "MySQL"
|
||||
|
||||
@Deprecated("Use Mode.PostgreSql.value instead", ReplaceWith("Mode.PostgreSql.value"))
|
||||
public const val MODE_POSTGRESQL: String = "PostgreSQL"
|
||||
|
||||
@Deprecated("Use Mode.MsSqlServer.value instead", ReplaceWith("Mode.MsSqlServer.value"))
|
||||
public const val MODE_MSSQLSERVER: String = "MSSQLServer"
|
||||
|
||||
@Deprecated("Use Mode.MariaDb.value instead", ReplaceWith("Mode.MariaDb.value"))
|
||||
public const val MODE_MARIADB: String = "MariaDB"
|
||||
}
|
||||
|
||||
override val driverClassName: String
|
||||
get() = "org.h2.Driver"
|
||||
|
||||
override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? =
|
||||
delegate?.convertSqlTypeToColumnSchemaValue(tableColumnMetadata)
|
||||
|
||||
override fun isSystemTable(tableMetadata: TableMetadata): Boolean {
|
||||
val locale = Locale.getDefault()
|
||||
|
||||
fun String?.containsWithLowercase(substr: String) = this?.lowercase(locale)?.contains(substr) == true
|
||||
val schemaName = tableMetadata.schemaName
|
||||
|
||||
// could be extended for other symptoms of the system tables for H2
|
||||
val isH2SystemTable = schemaName.containsWithLowercase("information_schema")
|
||||
|
||||
return if (delegate == null) {
|
||||
isH2SystemTable
|
||||
} else {
|
||||
isH2SystemTable || delegate.isSystemTable(tableMetadata)
|
||||
}
|
||||
}
|
||||
|
||||
override fun buildTableMetadata(tables: ResultSet): TableMetadata =
|
||||
delegate?.buildTableMetadata(tables)
|
||||
?: TableMetadata(
|
||||
tables.getString("table_name"),
|
||||
tables.getString("table_schem"),
|
||||
tables.getString("table_cat"),
|
||||
)
|
||||
|
||||
override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? =
|
||||
delegate?.convertSqlTypeToKType(tableColumnMetadata)
|
||||
|
||||
public override fun buildSqlQueryWithLimit(sqlQuery: String, limit: Int): String =
|
||||
delegate?.buildSqlQueryWithLimit(sqlQuery, limit) ?: super.buildSqlQueryWithLimit(sqlQuery, limit)
|
||||
}
|
||||
+77
@@ -0,0 +1,77 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io.db
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.TableColumnMetadata
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.TableMetadata
|
||||
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
|
||||
import java.sql.ResultSet
|
||||
import kotlin.reflect.KType
|
||||
import kotlin.reflect.full.createType
|
||||
|
||||
/**
|
||||
* Represents the MariaDb database type.
|
||||
*
|
||||
* This class provides methods to convert data from a ResultSet to the appropriate type for MariaDb,
|
||||
* and to generate the corresponding column schema.
|
||||
*/
|
||||
public object MariaDb : DbType("mariadb") {
|
||||
override val driverClassName: String
|
||||
get() = "org.mariadb.jdbc.Driver"
|
||||
|
||||
override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? {
|
||||
// Force BIGINT to always be Long, regardless of javaClassName
|
||||
// MariaDB JDBC driver may report Integer for small BIGINT values
|
||||
// TODO: investigate the corner case
|
||||
|
||||
// if (tableColumnMetadata.jdbcType == java.sql.Types.BIGINT) {
|
||||
// val kType = Long::class.createType(nullable = tableColumnMetadata.isNullable)
|
||||
// return ColumnSchema.Value(kType)
|
||||
// }
|
||||
|
||||
if (tableColumnMetadata.sqlTypeName == "INTEGER UNSIGNED" ||
|
||||
tableColumnMetadata.sqlTypeName == "INT UNSIGNED"
|
||||
) {
|
||||
val kType = Long::class.createType(nullable = tableColumnMetadata.isNullable)
|
||||
return ColumnSchema.Value(kType)
|
||||
}
|
||||
|
||||
if (tableColumnMetadata.sqlTypeName == "SMALLINT" && tableColumnMetadata.javaClassName == "java.lang.Short") {
|
||||
val kType = Short::class.createType(nullable = tableColumnMetadata.isNullable)
|
||||
return ColumnSchema.Value(kType)
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
override fun isSystemTable(tableMetadata: TableMetadata): Boolean = MySql.isSystemTable(tableMetadata)
|
||||
|
||||
override fun buildTableMetadata(tables: ResultSet): TableMetadata =
|
||||
TableMetadata(
|
||||
tables.getString("table_name"),
|
||||
tables.getString("table_schem"),
|
||||
tables.getString("table_cat"),
|
||||
)
|
||||
|
||||
override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? {
|
||||
// Force BIGINT to always be Long, regardless of javaClassName
|
||||
// MariaDB JDBC driver may report Integer for small BIGINT values
|
||||
// TODO: investigate the corner case
|
||||
// if (tableColumnMetadata.jdbcType == java.sql.Types.BIGINT) {
|
||||
// return Long::class.createType(nullable = tableColumnMetadata.isNullable)
|
||||
// }
|
||||
|
||||
if (tableColumnMetadata.sqlTypeName == "INTEGER UNSIGNED" ||
|
||||
tableColumnMetadata.sqlTypeName == "INT UNSIGNED"
|
||||
) {
|
||||
return Long::class.createType(nullable = tableColumnMetadata.isNullable)
|
||||
}
|
||||
|
||||
if (tableColumnMetadata.sqlTypeName == "SMALLINT" && tableColumnMetadata.javaClassName == "java.lang.Short") {
|
||||
return Short::class.createType(nullable = tableColumnMetadata.isNullable)
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
override fun quoteIdentifier(name: String): String {
|
||||
// schema.table -> `schema`.`table`
|
||||
return name.split(".").joinToString(".") { "`$it`" }
|
||||
}
|
||||
}
|
||||
+59
@@ -0,0 +1,59 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io.db
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.TableColumnMetadata
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.TableMetadata
|
||||
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
|
||||
import java.sql.ResultSet
|
||||
import java.util.Locale
|
||||
import kotlin.reflect.KType
|
||||
|
||||
/**
|
||||
* Represents the MSSQL database type.
|
||||
*
|
||||
* This class provides methods to convert data from a ResultSet to the appropriate type for MSSQL,
|
||||
* and to generate the corresponding column schema.
|
||||
*/
|
||||
public object MsSql : DbType("sqlserver") {
|
||||
override val driverClassName: String
|
||||
get() = "com.microsoft.sqlserver.jdbc.SQLServerDriver"
|
||||
|
||||
override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? = null
|
||||
|
||||
override fun isSystemTable(tableMetadata: TableMetadata): Boolean {
|
||||
val locale = Locale.getDefault()
|
||||
|
||||
fun String?.containsWithLowercase(substr: String) = this?.lowercase(locale)?.contains(substr) == true
|
||||
|
||||
val schemaName = tableMetadata.schemaName
|
||||
val tableName = tableMetadata.name
|
||||
val catalogName = tableMetadata.catalogue
|
||||
|
||||
return schemaName.containsWithLowercase("sys") ||
|
||||
schemaName.containsWithLowercase("information_schema") ||
|
||||
tableName.startsWith("sys") ||
|
||||
tableName.startsWith("dt") ||
|
||||
tableName.containsWithLowercase("sys_config") ||
|
||||
catalogName.containsWithLowercase("system") ||
|
||||
catalogName.containsWithLowercase("master") ||
|
||||
catalogName.containsWithLowercase("model") ||
|
||||
catalogName.containsWithLowercase("msdb") ||
|
||||
catalogName.containsWithLowercase("tempdb")
|
||||
}
|
||||
|
||||
override fun buildTableMetadata(tables: ResultSet): TableMetadata =
|
||||
TableMetadata(
|
||||
tables.getString("table_name"),
|
||||
tables.getString("table_schem"),
|
||||
tables.getString("table_cat"),
|
||||
)
|
||||
|
||||
override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? = null
|
||||
|
||||
public override fun buildSqlQueryWithLimit(sqlQuery: String, limit: Int): String =
|
||||
sqlQuery.replace("SELECT", "SELECT TOP $limit", ignoreCase = true)
|
||||
|
||||
override fun quoteIdentifier(name: String): String {
|
||||
// schema.table -> [schema].[table]
|
||||
return name.split(".").joinToString(".") { "[$it]" }
|
||||
}
|
||||
}
|
||||
+63
@@ -0,0 +1,63 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io.db
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.TableColumnMetadata
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.TableMetadata
|
||||
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
|
||||
import java.sql.ResultSet
|
||||
import java.util.Locale
|
||||
import kotlin.reflect.KType
|
||||
import kotlin.reflect.full.createType
|
||||
|
||||
/**
|
||||
* Represents the MySql database type.
|
||||
*
|
||||
* This class provides methods to convert data from a ResultSet to the appropriate type for MySql,
|
||||
* and to generate the corresponding column schema.
|
||||
*/
|
||||
public object MySql : DbType("mysql") {
|
||||
override val driverClassName: String
|
||||
get() = "com.mysql.jdbc.Driver"
|
||||
|
||||
override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? {
|
||||
if (tableColumnMetadata.sqlTypeName == "INT UNSIGNED") {
|
||||
val kType = Long::class.createType(nullable = tableColumnMetadata.isNullable)
|
||||
return ColumnSchema.Value(kType)
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
override fun isSystemTable(tableMetadata: TableMetadata): Boolean {
|
||||
val locale = Locale.getDefault()
|
||||
|
||||
fun String?.containsWithLowercase(substr: String) = this?.lowercase(locale)?.contains(substr) == true
|
||||
|
||||
val schemaName = tableMetadata.schemaName
|
||||
val name = tableMetadata.name
|
||||
|
||||
return schemaName.containsWithLowercase("information_schema") ||
|
||||
tableMetadata.catalogue.containsWithLowercase("performance_schema") ||
|
||||
tableMetadata.catalogue.containsWithLowercase("mysql") ||
|
||||
schemaName?.contains("mysql.") == true ||
|
||||
name.contains("mysql.") ||
|
||||
name.contains("sys_config")
|
||||
}
|
||||
|
||||
override fun buildTableMetadata(tables: ResultSet): TableMetadata =
|
||||
TableMetadata(
|
||||
tables.getString("table_name"),
|
||||
tables.getString("table_schem"),
|
||||
tables.getString("table_cat"),
|
||||
)
|
||||
|
||||
override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? {
|
||||
if (tableColumnMetadata.sqlTypeName == "INT UNSIGNED") {
|
||||
return Long::class.createType(nullable = tableColumnMetadata.isNullable)
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
override fun quoteIdentifier(name: String): String {
|
||||
// schema.table -> `schema`.`table`
|
||||
return name.split(".").joinToString(".") { "`$it`" }
|
||||
}
|
||||
}
|
||||
Vendored
+55
@@ -0,0 +1,55 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io.db
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.TableColumnMetadata
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.TableMetadata
|
||||
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
|
||||
import java.sql.ResultSet
|
||||
import java.util.Locale
|
||||
import kotlin.reflect.KType
|
||||
import kotlin.reflect.full.createType
|
||||
|
||||
/**
|
||||
* Represents the PostgreSql database type.
|
||||
*
|
||||
* This class provides methods to convert data from a ResultSet to the appropriate type for PostgreSql,
|
||||
* and to generate the corresponding column schema.
|
||||
*/
|
||||
public object PostgreSql : DbType("postgresql") {
|
||||
override val driverClassName: String
|
||||
get() = "org.postgresql.Driver"
|
||||
|
||||
override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? {
|
||||
// TODO: could be a wrapper of convertSqlTypeToKType
|
||||
// because of https://github.com/pgjdbc/pgjdbc/issues/425
|
||||
if (tableColumnMetadata.sqlTypeName == "money") {
|
||||
val kType = String::class.createType(nullable = tableColumnMetadata.isNullable)
|
||||
return ColumnSchema.Value(kType)
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
override fun isSystemTable(tableMetadata: TableMetadata): Boolean =
|
||||
tableMetadata.name.lowercase(Locale.getDefault()).contains("pg_") ||
|
||||
tableMetadata.schemaName?.lowercase(Locale.getDefault())?.contains("pg_catalog.") ?: false
|
||||
|
||||
override fun buildTableMetadata(tables: ResultSet): TableMetadata =
|
||||
TableMetadata(
|
||||
tables.getString("table_name"),
|
||||
tables.getString("table_schem"),
|
||||
tables.getString("table_cat"),
|
||||
)
|
||||
|
||||
override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? {
|
||||
// because of https://github.com/pgjdbc/pgjdbc/issues/425
|
||||
if (tableColumnMetadata.sqlTypeName == "money") {
|
||||
return String::class.createType(nullable = tableColumnMetadata.isNullable)
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
override fun quoteIdentifier(name: String): String {
|
||||
// schema.table -> "schema"."table"
|
||||
return name.split(".").joinToString(".") { "\"$it\"" }
|
||||
}
|
||||
}
|
||||
+44
@@ -0,0 +1,44 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io.db
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.TableColumnMetadata
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.TableMetadata
|
||||
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
|
||||
import org.sqlite.SQLiteConfig
|
||||
import java.sql.Connection
|
||||
import java.sql.DriverManager
|
||||
import java.sql.ResultSet
|
||||
import kotlin.reflect.KType
|
||||
|
||||
/**
|
||||
* Represents the Sqlite database type.
|
||||
*
|
||||
* This class provides methods to convert data from a ResultSet to the appropriate type for Sqlite,
|
||||
* and to generate the corresponding column schema.
|
||||
*/
|
||||
public object Sqlite : DbType("sqlite") {
|
||||
override val driverClassName: String
|
||||
get() = "org.sqlite.JDBC"
|
||||
|
||||
override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? = null
|
||||
|
||||
override fun isSystemTable(tableMetadata: TableMetadata): Boolean = tableMetadata.name.startsWith("sqlite_")
|
||||
|
||||
override fun buildTableMetadata(tables: ResultSet): TableMetadata =
|
||||
TableMetadata(
|
||||
tables.getString("TABLE_NAME"),
|
||||
tables.getString("TABLE_SCHEM"),
|
||||
tables.getString("TABLE_CAT"),
|
||||
)
|
||||
|
||||
override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? = null
|
||||
|
||||
override fun createConnection(dbConfig: DbConnectionConfig): Connection =
|
||||
if (dbConfig.readOnly) {
|
||||
val config = SQLiteConfig()
|
||||
config.setReadOnly(true)
|
||||
config.createConnection(dbConfig.url)
|
||||
} else {
|
||||
DriverManager.getConnection(dbConfig.url, dbConfig.user, dbConfig.password)
|
||||
}
|
||||
}
|
||||
+68
@@ -0,0 +1,68 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io.db
|
||||
|
||||
/**
|
||||
* Represents a column in a database table to keep all required meta-information.
|
||||
*
|
||||
* @property [name] the name of the column.
|
||||
* @property [sqlTypeName] the SQL data type of the column.
|
||||
* @property [jdbcType] the JDBC data type of the column produced from [java.sql.Types].
|
||||
* @property [size] the size of the column.
|
||||
* @property [javaClassName] the class name in Java.
|
||||
* @property [isNullable] true if column could contain nulls.
|
||||
*/
|
||||
public class TableColumnMetadata(
|
||||
public val name: String,
|
||||
public val sqlTypeName: String,
|
||||
public val jdbcType: Int,
|
||||
public val size: Int,
|
||||
public val javaClassName: String,
|
||||
public val isNullable: Boolean = false,
|
||||
) {
|
||||
override fun equals(other: Any?): Boolean {
|
||||
if (this === other) return true
|
||||
if (other !is TableColumnMetadata) return false
|
||||
|
||||
if (name != other.name) return false
|
||||
if (sqlTypeName != other.sqlTypeName) return false
|
||||
if (jdbcType != other.jdbcType) return false
|
||||
if (size != other.size) return false
|
||||
if (javaClassName != other.javaClassName) return false
|
||||
if (isNullable != other.isNullable) return false
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
override fun hashCode(): Int {
|
||||
var result = name.hashCode()
|
||||
result = 31 * result + sqlTypeName.hashCode()
|
||||
result = 31 * result + jdbcType
|
||||
result = 31 * result + size
|
||||
result = 31 * result + javaClassName.hashCode()
|
||||
result = 31 * result + isNullable.hashCode()
|
||||
return result
|
||||
}
|
||||
|
||||
override fun toString(): String =
|
||||
"TableColumnMetadata(name='$name', sqlTypeName='$sqlTypeName', jdbcType=$jdbcType, " +
|
||||
"size=$size, javaClassName='$javaClassName', isNullable=$isNullable)"
|
||||
|
||||
/**
|
||||
* Creates a copy of the current `TableColumnMetadata` instance with optionally modified attributes.
|
||||
*
|
||||
* @param name The name of the table column. Defaults to the current instance's `name`.
|
||||
* @param sqlTypeName The SQL type name of the column. Defaults to the current instance's `sqlTypeName`.
|
||||
* @param jdbcType The JDBC type of the column, represented as an integer. Defaults to the current instance's `jdbcType`.
|
||||
* @param size The size of the column. Defaults to the current instance's `size`.
|
||||
* @param javaClassName The fully qualified name of the Java class representing the column type. Defaults to the current instance's `javaClassName`.
|
||||
* @param isNullable Indicates whether the column is nullable. Defaults to the current instance's `isNullable`.
|
||||
* @return A new `TableColumnMetadata` instance with the specified attribute values.
|
||||
*/
|
||||
public fun copy(
|
||||
name: String = this.name,
|
||||
sqlTypeName: String = this.sqlTypeName,
|
||||
jdbcType: Int = this.jdbcType,
|
||||
size: Int = this.size,
|
||||
javaClassName: String = this.javaClassName,
|
||||
isNullable: Boolean = this.isNullable,
|
||||
): TableColumnMetadata = TableColumnMetadata(name, sqlTypeName, jdbcType, size, javaClassName, isNullable)
|
||||
}
|
||||
Vendored
+48
@@ -0,0 +1,48 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io.db
|
||||
|
||||
/**
|
||||
* Represents a table metadata to store information about a database table,
|
||||
* including its name, schema name, and catalogue name.
|
||||
*
|
||||
* NOTE: we need to extract both, [schemaName] and [catalogue]
|
||||
* because the different databases have different implementations of metadata.
|
||||
*
|
||||
* @property [name] the name of the table.
|
||||
* @property [schemaName] the name of the schema the table belongs to (optional).
|
||||
* @property [catalogue] the name of the catalogue the table belongs to (optional).
|
||||
*/
|
||||
public class TableMetadata(public val name: String, public val schemaName: String?, public val catalogue: String?) {
|
||||
override fun equals(other: Any?): Boolean {
|
||||
if (this === other) return true
|
||||
if (other !is TableMetadata) return false
|
||||
|
||||
if (name != other.name) return false
|
||||
if (schemaName != other.schemaName) return false
|
||||
if (catalogue != other.catalogue) return false
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
override fun hashCode(): Int {
|
||||
var result = name.hashCode()
|
||||
result = 31 * result + (schemaName?.hashCode() ?: 0)
|
||||
result = 31 * result + (catalogue?.hashCode() ?: 0)
|
||||
return result
|
||||
}
|
||||
|
||||
override fun toString(): String = "TableMetadata(name='$name', schemaName=$schemaName, catalogue=$catalogue)"
|
||||
|
||||
/**
|
||||
* Creates a copy of the `TableMetadata` instance with optional modifications.
|
||||
*
|
||||
* @param name the name of the table; defaults to the current name of the instance.
|
||||
* @param schemaName the name of the schema the table belongs to; defaults to the current schema name of the instance.
|
||||
* @param catalogue the name of the catalogue the table belongs to; defaults to the current catalogue of the instance.
|
||||
* @return a new `TableMetadata` instance with the specified or default values.
|
||||
*/
|
||||
public fun copy(
|
||||
name: String = this.name,
|
||||
schemaName: String? = this.schemaName,
|
||||
catalogue: String? = this.catalogue,
|
||||
): TableMetadata = TableMetadata(name, schemaName, catalogue)
|
||||
}
|
||||
+138
@@ -0,0 +1,138 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io.db
|
||||
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import java.sql.Connection
|
||||
import java.sql.SQLException
|
||||
|
||||
private val logger = KotlinLogging.logger {}
|
||||
|
||||
private const val UNSUPPORTED_H2_MODE_MESSAGE =
|
||||
"Unsupported H2 MODE: %s. Supported: MySQL, PostgreSQL, MSSQLServer, MariaDB, REGULAR/H2-Regular (or omit MODE)."
|
||||
|
||||
private const val H2_MODE_QUERY = "SELECT SETTING_VALUE FROM INFORMATION_SCHEMA.SETTINGS WHERE SETTING_NAME = 'MODE'"
|
||||
|
||||
private val H2_MODE_URL_PATTERN = "MODE=([^;:&]+)".toRegex(RegexOption.IGNORE_CASE)
|
||||
|
||||
/**
|
||||
* Extracts the database type from the given connection.
|
||||
* For H2, fetches the actual MODE from the active connection settings.
|
||||
* For other databases, extracts type from URL.
|
||||
*
|
||||
* @param [connection] the database connection.
|
||||
* @return the corresponding [DbType].
|
||||
* @throws [IllegalStateException] if URL information is missing in connection meta-data.
|
||||
* @throws [IllegalArgumentException] if the URL specifies an unsupported database type.
|
||||
* @throws [SQLException] if the URL is null.
|
||||
*/
|
||||
public fun extractDBTypeFromConnection(connection: Connection): DbType {
|
||||
val url = connection.metaData?.url
|
||||
?: throw IllegalStateException("URL information is missing in connection meta data!")
|
||||
logger.info { "Processing DB type extraction for connection url: $url" }
|
||||
|
||||
// First, determine the base database type from URL
|
||||
val baseDbType = extractDBTypeFromUrl(url)
|
||||
|
||||
// For H2, refine the mode by querying the active connection settings
|
||||
// This handles cases where MODE is not specified in URL, but H2 returns "Regular" from settings
|
||||
return if (baseDbType is H2) {
|
||||
val mode = fetchH2ModeFromConnection(connection)
|
||||
parseH2ModeOrThrow(mode)
|
||||
} else {
|
||||
logger.info { "Identified DB type as $baseDbType from url: $url" }
|
||||
baseDbType
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches H2 database mode from an active connection.
|
||||
* Works only for H2 version 2.
|
||||
*
|
||||
* @param [connection] the database connection.
|
||||
* @return the mode string or null if not set.
|
||||
*/
|
||||
private fun fetchH2ModeFromConnection(connection: Connection): String? {
|
||||
var mode: String? = null
|
||||
connection.prepareStatement(H2_MODE_QUERY).use { st ->
|
||||
st.executeQuery().use { rs ->
|
||||
if (rs.next()) {
|
||||
mode = rs.getString("SETTING_VALUE")
|
||||
logger.debug { "Fetched H2 DB mode: $mode" }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return mode?.trim()?.takeIf { it.isNotEmpty() }
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses H2 mode string and returns the corresponding H2 DbType instance.
|
||||
*
|
||||
* @param [mode] the mode string (may be null or empty for Regular mode).
|
||||
* @return H2 instance with the appropriate mode.
|
||||
* @throws [IllegalArgumentException] if the mode is not supported.
|
||||
*/
|
||||
private fun parseH2ModeOrThrow(mode: String?): H2 {
|
||||
if (mode.isNullOrEmpty()) {
|
||||
return H2(H2.Mode.Regular)
|
||||
}
|
||||
return H2.Mode.fromValue(mode)?.let { H2(it) }
|
||||
?: throw IllegalArgumentException(UNSUPPORTED_H2_MODE_MESSAGE.format(mode)).also {
|
||||
logger.error { it.message }
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts the database type from the given JDBC URL.
|
||||
*
|
||||
* @param [url] the JDBC URL.
|
||||
* @return the corresponding [DbType].
|
||||
* @throws [SQLException] if the url is null.
|
||||
* @throws [IllegalArgumentException] if the URL specifies an unsupported database type.
|
||||
*/
|
||||
public fun extractDBTypeFromUrl(url: String?): DbType {
|
||||
url ?: throw SQLException("Database URL could not be null.")
|
||||
|
||||
return when {
|
||||
H2().dbTypeInJdbcUrl in url -> createH2Instance(url)
|
||||
|
||||
MariaDb.dbTypeInJdbcUrl in url -> MariaDb
|
||||
|
||||
MySql.dbTypeInJdbcUrl in url -> MySql
|
||||
|
||||
Sqlite.dbTypeInJdbcUrl in url -> Sqlite
|
||||
|
||||
PostgreSql.dbTypeInJdbcUrl in url -> PostgreSql
|
||||
|
||||
MsSql.dbTypeInJdbcUrl in url -> MsSql
|
||||
|
||||
DuckDb.dbTypeInJdbcUrl in url -> DuckDb
|
||||
|
||||
else -> throw IllegalArgumentException(
|
||||
"Unsupported database type in the url: $url. " +
|
||||
"Only H2, MariaDB, MySQL, MSSQL, SQLite, PostgreSQL, and DuckDB are supported!",
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an instance of DbType based on the provided JDBC URL.
|
||||
*
|
||||
* @param [url] The JDBC URL representing the database connection.
|
||||
* @return The corresponding [DbType] instance.
|
||||
* @throws [IllegalArgumentException] if the provided URL does not contain a valid mode.
|
||||
*/
|
||||
private fun createH2Instance(url: String): DbType {
|
||||
val mode = H2_MODE_URL_PATTERN.find(url)?.groupValues?.getOrNull(1)
|
||||
return parseH2ModeOrThrow(mode?.takeIf { it.isNotBlank() })
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the driver class name from the given JDBC URL.
|
||||
*
|
||||
* @param [url] The JDBC URL to extract the driver class name from.
|
||||
* @return The driver class name as a [String].
|
||||
*/
|
||||
public fun driverClassNameFromUrl(url: String): String {
|
||||
val dbType = extractDBTypeFromUrl(url)
|
||||
return dbType.driverClassName
|
||||
}
|
||||
+28
@@ -0,0 +1,28 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
|
||||
import org.jetbrains.kotlinx.dataframe.codeGen.Code
|
||||
import org.jetbrains.kotlinx.dataframe.codeGen.CodeGenerator
|
||||
import org.jetbrains.kotlinx.dataframe.impl.codeGen.CodeGenerationReadResult
|
||||
import java.net.URL
|
||||
|
||||
// TODO: helper functions created to support existing hierarchy https://github.com/Kotlin/dataframe/issues/450
|
||||
public val CodeGenerator.Companion.databaseCodeGenReader: (url: URL, name: String) -> CodeGenerationReadResult
|
||||
get() = { url, name ->
|
||||
try {
|
||||
val code = buildCodeForDB(url, name)
|
||||
throw RuntimeException()
|
||||
CodeGenerationReadResult.Success(code, Jdbc())
|
||||
} catch (e: Throwable) {
|
||||
CodeGenerationReadResult.Error(e)
|
||||
}
|
||||
}
|
||||
|
||||
public fun buildCodeForDB(url: URL, name: String): Code {
|
||||
val annotationName = DataSchema::class.simpleName
|
||||
val visibility = "public "
|
||||
val propertyVisibility = "public "
|
||||
|
||||
val declarations = mutableListOf<String>()
|
||||
return declarations.joinToString()
|
||||
}
|
||||
Vendored
+464
@@ -0,0 +1,464 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.api.schema
|
||||
import org.jetbrains.kotlinx.dataframe.impl.schema.DataFrameSchemaImpl
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.DbType
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.TableColumnMetadata
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.extractDBTypeFromConnection
|
||||
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
|
||||
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
|
||||
import java.sql.Connection
|
||||
import java.sql.DriverManager
|
||||
import java.sql.ResultSet
|
||||
import javax.sql.DataSource
|
||||
import kotlin.use
|
||||
|
||||
/**
|
||||
* Retrieves the schema for an SQL table using the provided database configuration.
|
||||
*
|
||||
* ### Default Behavior:
|
||||
* If [DbConnectionConfig.readOnly] is `true` (which is the default), the connection will be:
|
||||
* - explicitly set as read-only via `Connection.setReadOnly(true)`
|
||||
* - used with `autoCommit = false`
|
||||
* - automatically rolled back after reading, ensuring no changes to the database
|
||||
*
|
||||
* Even if [DbConnectionConfig.readOnly] is set to `false`, the library still prevents data-modifying queries
|
||||
* and only permits safe `SELECT` operations internally.
|
||||
*
|
||||
* @param [dbConfig] the database configuration to connect to the database, including URL, user, and password.
|
||||
* @param [tableName] the name of the SQL table for which to retrieve the schema.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [dbConfig].
|
||||
* @return the [DataFrameSchema] object representing the schema of the SQL table
|
||||
*/
|
||||
public fun DataFrameSchema.Companion.readSqlTable(
|
||||
dbConfig: DbConnectionConfig,
|
||||
tableName: String,
|
||||
dbType: DbType? = null,
|
||||
): DataFrameSchema =
|
||||
withReadOnlyConnection(dbConfig, dbType) { connection ->
|
||||
readSqlTable(connection, tableName, dbType)
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the schema for an SQL table using the provided [DataSource].
|
||||
*
|
||||
* ### Example with HikariCP:
|
||||
* ```kotlin
|
||||
* import com.zaxxer.hikari.HikariConfig
|
||||
* import com.zaxxer.hikari.HikariDataSource
|
||||
*
|
||||
* val config = HikariConfig().apply {
|
||||
* jdbcUrl = "jdbc:postgresql://localhost:5432/mydb"
|
||||
* username = "user"
|
||||
* password = "password"
|
||||
* }
|
||||
* val dataSource = HikariDataSource(config)
|
||||
*
|
||||
* // Get the schema for a specific table
|
||||
* val customersSchema = DataFrame.getSchemaForSqlTable(dataSource, "customers")
|
||||
*
|
||||
* // Inspect the schema
|
||||
* println(customersSchema.columns)
|
||||
* ```
|
||||
*
|
||||
* @param [dataSource] the [DataSource] to get a database connection from.
|
||||
* @param [tableName] the name of the SQL table for which to retrieve the schema.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [dataSource].
|
||||
* @return the schema of the SQL table as a [DataFrameSchema] object.
|
||||
* @see [DataSource.getConnection]
|
||||
*/
|
||||
public fun DataFrameSchema.Companion.readSqlTable(
|
||||
dataSource: DataSource,
|
||||
tableName: String,
|
||||
dbType: DbType? = null,
|
||||
): DataFrameSchema {
|
||||
dataSource.connection.use { connection ->
|
||||
return readSqlTable(connection, tableName, dbType)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the schema for an SQL table using the provided database connection.
|
||||
*
|
||||
* @param [connection] the database connection.
|
||||
* @param [tableName] the name of the SQL table for which to retrieve the schema.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [connection].
|
||||
* @return the schema of the SQL table as a [DataFrameSchema] object.
|
||||
*
|
||||
* @see DriverManager.getConnection
|
||||
*/
|
||||
public fun DataFrameSchema.Companion.readSqlTable(
|
||||
connection: Connection,
|
||||
tableName: String,
|
||||
dbType: DbType? = null,
|
||||
): DataFrameSchema {
|
||||
val determinedDbType = dbType ?: extractDBTypeFromConnection(connection)
|
||||
|
||||
// Read just 1 row to get the schema
|
||||
val singleRowDataFrame = DataFrame.readSqlTable(
|
||||
connection = connection,
|
||||
tableName = tableName,
|
||||
limit = 1,
|
||||
inferNullability = false, // Schema extraction doesn't need nullability inference
|
||||
dbType = determinedDbType,
|
||||
strictValidation = true,
|
||||
)
|
||||
|
||||
return singleRowDataFrame.schema()
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the schema of an SQL query result using the provided database configuration.
|
||||
*
|
||||
* ### Default Behavior:
|
||||
* If [DbConnectionConfig.readOnly] is `true` (which is the default), the connection will be:
|
||||
* - explicitly set as read-only via `Connection.setReadOnly(true)`
|
||||
* - used with `autoCommit = false`
|
||||
* - automatically rolled back after reading, ensuring no changes to the database
|
||||
*
|
||||
* Even if [DbConnectionConfig.readOnly] is set to `false`, the library still prevents data-modifying queries
|
||||
* and only permits safe `SELECT` operations internally.
|
||||
*
|
||||
* @param [dbConfig] the database configuration to connect to the database, including URL, user, and password.
|
||||
* @param [sqlQuery] the SQL query to execute and retrieve the schema from.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [dbConfig].
|
||||
* @return the schema of the SQL query as a [DataFrameSchema] object.
|
||||
*/
|
||||
public fun DataFrameSchema.Companion.readSqlQuery(
|
||||
dbConfig: DbConnectionConfig,
|
||||
sqlQuery: String,
|
||||
dbType: DbType? = null,
|
||||
): DataFrameSchema =
|
||||
withReadOnlyConnection(dbConfig, dbType) { connection ->
|
||||
readSqlQuery(connection, sqlQuery, dbType)
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the schema of an SQL query result using the provided [DataSource].
|
||||
*
|
||||
* ### Example with HikariCP:
|
||||
* ```kotlin
|
||||
* import com.zaxxer.hikari.HikariConfig
|
||||
* import com.zaxxer.hikari.HikariDataSource
|
||||
*
|
||||
* val config = HikariConfig().apply {
|
||||
* jdbcUrl = "jdbc:postgresql://localhost:5432/mydb"
|
||||
* username = "user"
|
||||
* password = "password"
|
||||
* }
|
||||
* val dataSource = HikariDataSource(config)
|
||||
*
|
||||
* // Get the schema for a SQL query
|
||||
* val querySchema = DataFrame.getSchemaForSqlQuery(
|
||||
* dataSource,
|
||||
* "SELECT name, age, city FROM customers WHERE age > 25"
|
||||
* )
|
||||
*
|
||||
* // Inspect the schema
|
||||
* println(querySchema.columns)
|
||||
* ```
|
||||
*
|
||||
* @param [dataSource] the [DataSource] to get a database connection from.
|
||||
* @param [sqlQuery] the SQL query to execute and retrieve the schema from.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [dataSource].
|
||||
* @return the schema of the SQL query as a [DataFrameSchema] object.
|
||||
*
|
||||
* @see [DataSource.getConnection]
|
||||
*/
|
||||
public fun DataFrameSchema.Companion.readSqlQuery(
|
||||
dataSource: DataSource,
|
||||
sqlQuery: String,
|
||||
dbType: DbType? = null,
|
||||
): DataFrameSchema {
|
||||
dataSource.connection.use { connection ->
|
||||
return readSqlQuery(connection, sqlQuery, dbType)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the schema of an SQL query result using the provided database connection.
|
||||
*
|
||||
* @param [connection] the database connection.
|
||||
* @param [sqlQuery] the SQL query to execute and retrieve the schema from.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [connection].
|
||||
* @return the schema of the SQL query as a [DataFrameSchema] object.
|
||||
*
|
||||
* @see DriverManager.getConnection
|
||||
*/
|
||||
public fun DataFrameSchema.Companion.readSqlQuery(
|
||||
connection: Connection,
|
||||
sqlQuery: String,
|
||||
dbType: DbType? = null,
|
||||
): DataFrameSchema {
|
||||
val determinedDbType = dbType ?: extractDBTypeFromConnection(connection)
|
||||
|
||||
// Read just 1 row to get the schema
|
||||
val singleRowDataFrame = DataFrame.readSqlQuery(
|
||||
connection = connection,
|
||||
sqlQuery = sqlQuery,
|
||||
limit = 1,
|
||||
inferNullability = false, // Schema extraction doesn't need nullability inference
|
||||
dbType = determinedDbType,
|
||||
strictValidation = true,
|
||||
)
|
||||
|
||||
return singleRowDataFrame.schema()
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the schema of an SQL query result or the SQL table using the provided database configuration.
|
||||
*
|
||||
* ### Default Behavior:
|
||||
* If [DbConnectionConfig.readOnly] is `true` (which is the default), the connection will be:
|
||||
* - explicitly set as read-only via `Connection.setReadOnly(true)`
|
||||
* - used with `autoCommit = false`
|
||||
* - automatically rolled back after reading, ensuring no changes to the database
|
||||
*
|
||||
* Even if [DbConnectionConfig.readOnly] is set to `false`, the library still prevents data-modifying queries
|
||||
* and only permits safe `SELECT` operations internally.
|
||||
*
|
||||
* @param [sqlQueryOrTableName] the SQL query to execute and retrieve the schema from.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [DbConnectionConfig].
|
||||
* @return the schema of the SQL query as a [DataFrameSchema] object.
|
||||
*/
|
||||
public fun DbConnectionConfig.readDataFrameSchema(
|
||||
sqlQueryOrTableName: String,
|
||||
dbType: DbType? = null,
|
||||
): DataFrameSchema =
|
||||
when {
|
||||
isSqlQuery(sqlQueryOrTableName) -> DataFrameSchema.readSqlQuery(this, sqlQueryOrTableName, dbType)
|
||||
|
||||
isSqlTableName(sqlQueryOrTableName) -> DataFrameSchema.readSqlTable(this, sqlQueryOrTableName, dbType)
|
||||
|
||||
else -> throw IllegalArgumentException(
|
||||
"$sqlQueryOrTableName should be SQL query or name of one of the existing SQL tables!",
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the schema of an SQL query result or the SQL table using the provided [DataSource].
|
||||
*
|
||||
* ### Example with HikariCP:
|
||||
* ```kotlin
|
||||
* import com.zaxxer.hikari.HikariConfig
|
||||
* import com.zaxxer.hikari.HikariDataSource
|
||||
*
|
||||
* val config = HikariConfig().apply {
|
||||
* jdbcUrl = "jdbc:postgresql://localhost:5432/mydb"
|
||||
* username = "user"
|
||||
* password = "password"
|
||||
* }
|
||||
* val dataSource = HikariDataSource(config)
|
||||
*
|
||||
* // Get schema for a table
|
||||
* val tableSchema = dataSource.getDataFrameSchema("customers")
|
||||
*
|
||||
* // Or get schema for a query
|
||||
* val querySchema = dataSource.getDataFrameSchema("SELECT name, age FROM customers WHERE age > 25")
|
||||
*
|
||||
* // Inspect the schema
|
||||
* println(tableSchema.columns)
|
||||
* ```
|
||||
*
|
||||
* @param [sqlQueryOrTableName] the SQL query to execute or name of the SQL table.
|
||||
* It should be a name of one of the existing SQL tables,
|
||||
* or the SQL query should start from SELECT and contain one query for reading data without any manipulation.
|
||||
* It should not contain `;` symbol.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [DataSource].
|
||||
* @return the schema of the SQL query as a [DataFrameSchema] object.
|
||||
*
|
||||
* @see [DataSource.getConnection]
|
||||
*/
|
||||
public fun DataSource.readDataFrameSchema(sqlQueryOrTableName: String, dbType: DbType? = null): DataFrameSchema {
|
||||
connection.use { conn ->
|
||||
return when {
|
||||
isSqlQuery(sqlQueryOrTableName) -> DataFrameSchema.readSqlQuery(conn, sqlQueryOrTableName, dbType)
|
||||
|
||||
isSqlTableName(sqlQueryOrTableName) -> DataFrameSchema.readSqlTable(conn, sqlQueryOrTableName, dbType)
|
||||
|
||||
else -> throw IllegalArgumentException(
|
||||
"$sqlQueryOrTableName should be SQL query or name of one of the existing SQL tables!",
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the schema of an SQL query result or the SQL table using the provided database configuration.
|
||||
*
|
||||
* @param [sqlQueryOrTableName] the SQL query to execute and retrieve the schema from.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [Connection].
|
||||
* @return the schema of the SQL query as a [DataFrameSchema] object.
|
||||
*/
|
||||
public fun Connection.readDataFrameSchema(sqlQueryOrTableName: String, dbType: DbType? = null): DataFrameSchema =
|
||||
when {
|
||||
isSqlQuery(sqlQueryOrTableName) -> DataFrameSchema.readSqlQuery(this, sqlQueryOrTableName, dbType)
|
||||
|
||||
isSqlTableName(sqlQueryOrTableName) -> DataFrameSchema.readSqlTable(this, sqlQueryOrTableName, dbType)
|
||||
|
||||
else -> throw IllegalArgumentException(
|
||||
"$sqlQueryOrTableName should be SQL query or name of one of the existing SQL tables!",
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the schema from [ResultSet].
|
||||
*
|
||||
* NOTE: This function will not close connection and result set and not retrieve data from the result set.
|
||||
*
|
||||
* @param [resultSet] the [ResultSet] obtained from executing a database query.
|
||||
* @param [dbType] the type of database that the [ResultSet] belongs to, could be a custom object, provided by user.
|
||||
* @return the schema of the [ResultSet] as a [DataFrameSchema] object.
|
||||
*/
|
||||
public fun DataFrameSchema.Companion.readResultSet(resultSet: ResultSet, dbType: DbType): DataFrameSchema {
|
||||
val tableColumns = getTableColumnsMetadata(resultSet, dbType)
|
||||
return buildSchemaByTableColumns(tableColumns, dbType)
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the schema from [ResultSet].
|
||||
*
|
||||
* NOTE: This function will not close connection and result set and not retrieve data from the result set.
|
||||
*
|
||||
* @param [dbType] the type of database that the [ResultSet] belongs to, could be a custom object, provided by user.
|
||||
* @return the schema of the [ResultSet] as a [DataFrameSchema] object.
|
||||
*/
|
||||
public fun ResultSet.readDataFrameSchema(dbType: DbType): DataFrameSchema = DataFrameSchema.readResultSet(this, dbType)
|
||||
|
||||
/**
|
||||
* Retrieves the schemas of all non-system tables in the database using the provided database configuration.
|
||||
*
|
||||
* ### Default Behavior:
|
||||
* If [DbConnectionConfig.readOnly] is `true` (which is the default), the connection will be:
|
||||
* - explicitly set as read-only via `Connection.setReadOnly(true)`
|
||||
* - used with `autoCommit = false`
|
||||
* - automatically rolled back after reading, ensuring no changes to the database
|
||||
*
|
||||
* Even if [DbConnectionConfig.readOnly] is set to `false`, the library still prevents data-modifying queries
|
||||
* and only permits safe `SELECT` operations internally.
|
||||
*
|
||||
* @param [dbConfig] the database configuration to connect to the database, including URL, user, and password.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [dbConfig].
|
||||
* @return a map of [String, DataFrameSchema] objects representing the table name and its schema for each non-system table.
|
||||
*/
|
||||
public fun DataFrameSchema.Companion.readAllSqlTables(
|
||||
dbConfig: DbConnectionConfig,
|
||||
dbType: DbType? = null,
|
||||
): Map<String, DataFrameSchema> =
|
||||
withReadOnlyConnection(dbConfig, dbType) { connection ->
|
||||
readAllSqlTables(connection, dbType)
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the schemas of all non-system tables in the database using the provided [DataSource].
|
||||
*
|
||||
* ### Example with HikariCP:
|
||||
* ```kotlin
|
||||
* import com.zaxxer.hikari.HikariConfig
|
||||
* import com.zaxxer.hikari.HikariDataSource
|
||||
*
|
||||
* val config = HikariConfig().apply {
|
||||
* jdbcUrl = "jdbc:postgresql://localhost:5432/mydb"
|
||||
* username = "user"
|
||||
* password = "password"
|
||||
* }
|
||||
* val dataSource = HikariDataSource(config)
|
||||
*
|
||||
* // Get schemas for all tables
|
||||
* val allSchemas = DataFrame.getSchemaForAllSqlTables(dataSource)
|
||||
*
|
||||
* // Access individual table schemas
|
||||
* val customersSchema = allSchemas["customers"]
|
||||
* val ordersSchema = allSchemas["orders"]
|
||||
*
|
||||
* // Iterate through all schemas
|
||||
* allSchemas.forEach { (tableName, schema) ->
|
||||
* println("Table: \$tableName, Columns: \${schema.columns.keys}")
|
||||
* }
|
||||
* ```
|
||||
*
|
||||
* @param [dataSource] the DataSource to get a database connection from.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [dataSource].
|
||||
* @return a map of [String, DataFrameSchema] objects representing the table name and its schema for each non-system table.
|
||||
*
|
||||
* @see [DataSource.getConnection]
|
||||
*/
|
||||
public fun DataFrameSchema.Companion.readAllSqlTables(
|
||||
dataSource: DataSource,
|
||||
dbType: DbType? = null,
|
||||
): Map<String, DataFrameSchema> {
|
||||
dataSource.connection.use { connection ->
|
||||
return readAllSqlTables(connection, dbType)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the schemas of all non-system tables in the database using the provided database connection.
|
||||
*
|
||||
* @param [connection] the database connection.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [connection].
|
||||
* @return a map of [String, DataFrameSchema] objects representing the table name and its schema for each non-system table.
|
||||
*/
|
||||
public fun DataFrameSchema.Companion.readAllSqlTables(
|
||||
connection: Connection,
|
||||
dbType: DbType? = null,
|
||||
): Map<String, DataFrameSchema> {
|
||||
val metaData = connection.metaData
|
||||
val determinedDbType = dbType ?: extractDBTypeFromConnection(connection)
|
||||
|
||||
// exclude system- and other tables without data
|
||||
val tableTypes = determinedDbType.tableTypes?.toTypedArray()
|
||||
val tables = metaData.getTables(null, null, null, tableTypes)
|
||||
|
||||
val dataFrameSchemas = mutableMapOf<String, DataFrameSchema>()
|
||||
|
||||
while (tables.next()) {
|
||||
val jdbcTable = determinedDbType.buildTableMetadata(tables)
|
||||
if (!determinedDbType.isSystemTable(jdbcTable)) {
|
||||
// we filter her a second time because of specific logic with SQLite and possible issues with future databases
|
||||
val tableName = jdbcTable.name
|
||||
val dataFrameSchema = readSqlTable(connection, tableName, determinedDbType)
|
||||
dataFrameSchemas += tableName to dataFrameSchema
|
||||
}
|
||||
}
|
||||
|
||||
return dataFrameSchemas
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a DataFrame schema based on the given table columns.
|
||||
*
|
||||
* @param [tableColumns] a mutable map containing the table columns, where the key represents the column name
|
||||
* and the value represents the metadata of the column
|
||||
* @param [dbType] the type of database.
|
||||
* @return a [DataFrameSchema] object representing the schema built from the table columns.
|
||||
*/
|
||||
internal fun buildSchemaByTableColumns(
|
||||
tableColumns: MutableList<TableColumnMetadata>,
|
||||
dbType: DbType,
|
||||
): DataFrameSchema {
|
||||
val schemaColumns = tableColumns.associate {
|
||||
Pair(it.name, generateColumnSchemaValue(dbType, it))
|
||||
}
|
||||
|
||||
return DataFrameSchemaImpl(
|
||||
columns = schemaColumns,
|
||||
)
|
||||
}
|
||||
|
||||
internal fun generateColumnSchemaValue(dbType: DbType, tableColumnMetadata: TableColumnMetadata): ColumnSchema =
|
||||
dbType.convertSqlTypeToColumnSchemaValue(tableColumnMetadata)
|
||||
?: ColumnSchema.Value(dbType.makeCommonSqlToKTypeMapping(tableColumnMetadata))
|
||||
+956
@@ -0,0 +1,956 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import org.jetbrains.kotlinx.dataframe.AnyFrame
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.DbType
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.TableColumnMetadata
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.extractDBTypeFromConnection
|
||||
import java.sql.Connection
|
||||
import java.sql.DatabaseMetaData
|
||||
import java.sql.DriverManager
|
||||
import java.sql.PreparedStatement
|
||||
import java.sql.ResultSet
|
||||
import javax.sql.DataSource
|
||||
import kotlin.reflect.KType
|
||||
|
||||
private val logger = KotlinLogging.logger {}
|
||||
|
||||
/**
|
||||
* Reads data from an SQL table and converts it into a DataFrame.
|
||||
*
|
||||
* ### Default Behavior:
|
||||
* If [DbConnectionConfig.readOnly] is `true` (which is the default), the connection will be:
|
||||
* - explicitly set as read-only via `Connection.setReadOnly(true)`
|
||||
* - used with `autoCommit = false`
|
||||
* - automatically rolled back after reading, ensuring no changes to the database
|
||||
*
|
||||
* Even if [DbConnectionConfig.readOnly] is set to `false`, the library still prevents data-modifying queries
|
||||
* and only permits safe `SELECT` operations internally.
|
||||
*
|
||||
* @param [dbConfig] the configuration for the database, including URL, user, and password.
|
||||
* @param [tableName] the name of the table to read data from.
|
||||
* @param [limit] the maximum number of rows to retrieve from the table.
|
||||
* `null` (default) means no limit - all available rows will be fetched
|
||||
* or positive integer (e.g., `100`) - fetch at most that many rows
|
||||
* @param [inferNullability] indicates how the column nullability should be inferred.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [dbConfig].
|
||||
* @param [strictValidation] if `true`, the method validates that the provided table name is in a valid format.
|
||||
* Default is `true` for strict validation.
|
||||
* @param [configureStatement] optional lambda to configure the [PreparedStatement] before execution.
|
||||
* This allows for custom tuning of fetch size, query timeout, and other JDBC parameters.
|
||||
* @return the DataFrame containing the data from the SQL table.
|
||||
*/
|
||||
public fun DataFrame.Companion.readSqlTable(
|
||||
dbConfig: DbConnectionConfig,
|
||||
tableName: String,
|
||||
limit: Int? = null,
|
||||
inferNullability: Boolean = true,
|
||||
dbType: DbType? = null,
|
||||
strictValidation: Boolean = true,
|
||||
configureStatement: (PreparedStatement) -> Unit = {},
|
||||
): AnyFrame {
|
||||
validateLimit(limit)
|
||||
return withReadOnlyConnection(dbConfig, dbType) { conn ->
|
||||
readSqlTable(conn, tableName, limit, inferNullability, dbType, strictValidation, configureStatement)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads data from an SQL table and converts it into a DataFrame.
|
||||
*
|
||||
* @param [dataSource] the [DataSource] to get a database connection from.
|
||||
* @param [tableName] the name of the table to read data from.
|
||||
* @param [limit] the maximum number of rows to retrieve from the table.
|
||||
* `null` (default) means no limit - all available rows will be fetched
|
||||
* or positive integer (e.g., `100`) - fetch at most that many rows
|
||||
* @param [inferNullability] indicates how the column nullability should be inferred.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [dataSource].
|
||||
* @param [strictValidation] if `true`, the method validates that the provided table name is in a valid format.
|
||||
* Default is `true` for strict validation.
|
||||
* @param [configureStatement] optional lambda to configure the [PreparedStatement] before execution.
|
||||
* This allows for custom tuning of fetch size, query timeout, and other JDBC parameters.
|
||||
* @return the DataFrame containing the data from the SQL table.
|
||||
*
|
||||
* @see [DataSource.getConnection]
|
||||
*/
|
||||
public fun DataFrame.Companion.readSqlTable(
|
||||
dataSource: DataSource,
|
||||
tableName: String,
|
||||
limit: Int? = null,
|
||||
inferNullability: Boolean = true,
|
||||
dbType: DbType? = null,
|
||||
strictValidation: Boolean = true,
|
||||
configureStatement: (PreparedStatement) -> Unit = {},
|
||||
): AnyFrame {
|
||||
validateLimit(limit)
|
||||
dataSource.connection.use { connection ->
|
||||
return readSqlTable(
|
||||
connection,
|
||||
tableName,
|
||||
limit,
|
||||
inferNullability,
|
||||
dbType,
|
||||
strictValidation,
|
||||
configureStatement,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads data from an SQL table and converts it into a DataFrame.
|
||||
*
|
||||
* @param [connection] the database connection to read tables from.
|
||||
* @param [tableName] the name of the table to read data from.
|
||||
* @param [limit] the maximum number of rows to retrieve from the table.
|
||||
* `null` (default) means no limit - all available rows will be fetched
|
||||
* or positive integer (e.g., `100`) - fetch at most that many rows
|
||||
* @param [inferNullability] indicates how the column nullability should be inferred.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [connection].
|
||||
* @param [strictValidation] if `true`, the method validates that the provided table name is in a valid format.
|
||||
* Default is `true` for strict validation.
|
||||
* @param [configureStatement] optional lambda to configure the [PreparedStatement] before execution.
|
||||
* This allows for custom tuning of fetch size, query timeout, and other JDBC parameters.
|
||||
* @return the DataFrame containing the data from the SQL table.
|
||||
*
|
||||
* @see [DriverManager.getConnection]
|
||||
*/
|
||||
public fun DataFrame.Companion.readSqlTable(
|
||||
connection: Connection,
|
||||
tableName: String,
|
||||
limit: Int? = null,
|
||||
inferNullability: Boolean = true,
|
||||
dbType: DbType? = null,
|
||||
strictValidation: Boolean = true,
|
||||
configureStatement: (PreparedStatement) -> Unit = {},
|
||||
): AnyFrame {
|
||||
validateLimit(limit)
|
||||
if (strictValidation) {
|
||||
require(isValidTableName(tableName)) {
|
||||
"The provided table name '$tableName' is invalid. Please ensure it matches a valid table name in the database schema."
|
||||
}
|
||||
} else {
|
||||
logger.warn { "Strict validation is disabled. Make sure the table name '$tableName' is correct." }
|
||||
}
|
||||
|
||||
val determinedDbType = dbType ?: extractDBTypeFromConnection(connection)
|
||||
|
||||
// Build SQL query using DbType
|
||||
val sqlQuery = determinedDbType.buildSelectTableQueryWithLimit(tableName, limit)
|
||||
|
||||
return executeQueryAndBuildDataFrame(
|
||||
connection,
|
||||
sqlQuery,
|
||||
determinedDbType,
|
||||
configureStatement,
|
||||
limit,
|
||||
inferNullability,
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a data frame from the specified database using the provided SQL query and configurations.
|
||||
*
|
||||
* @param [connection] The database connection to be used for executing the query.
|
||||
* @param [sqlQuery] The SQL query string to be executed.
|
||||
* @param [determinedDbType] The type of database being accessed, which determines specific configurations.
|
||||
* @param [configureStatement] A lambda function to configure the prepared statement before execution.
|
||||
* @param [limit] the maximum number of rows to retrieve from the table.
|
||||
* `null` (default) means no limit - all available rows will be fetched.
|
||||
* @param [inferNullability] A flag to determine whether to infer nullability for result set fields.
|
||||
* @return The data frame constructed from the database query results.
|
||||
* @throws [IllegalStateException] If an error occurs while reading from the database or processing the data.
|
||||
*/
|
||||
private fun executeQueryAndBuildDataFrame(
|
||||
connection: Connection,
|
||||
sqlQuery: String,
|
||||
determinedDbType: DbType,
|
||||
configureStatement: (PreparedStatement) -> Unit,
|
||||
limit: Int?,
|
||||
inferNullability: Boolean,
|
||||
): AnyFrame =
|
||||
try {
|
||||
connection.prepareStatement(sqlQuery).use { statement ->
|
||||
logger.debug { "Connection established successfully (${connection.metaData.databaseProductName})" }
|
||||
determinedDbType.configureReadStatement(statement)
|
||||
configureStatement(statement)
|
||||
logger.debug { "Executing query: $sqlQuery" }
|
||||
statement.executeQuery().use { rs ->
|
||||
val tableColumns = getTableColumnsMetadata(rs, determinedDbType)
|
||||
fetchAndConvertDataFromResultSet(tableColumns, rs, determinedDbType, limit, inferNullability)
|
||||
}
|
||||
}
|
||||
} catch (e: java.sql.SQLException) {
|
||||
// Provide the same type for all SQLExceptions from JDBC and enrich with additional information
|
||||
logger.error(e) { "Database operation failed: $sqlQuery" }
|
||||
throw IllegalStateException(
|
||||
"Failed to read from database. Query: $sqlQuery, Database: ${determinedDbType.dbTypeInJdbcUrl}",
|
||||
e,
|
||||
)
|
||||
} catch (e: Exception) {
|
||||
// Provide the same type for all unexpected errors from JDBC
|
||||
logger.error(e) { "Unexpected error: ${e.message}" }
|
||||
throw IllegalStateException("Unexpected error while reading from database", e)
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the result of an SQL query to the DataFrame.
|
||||
*
|
||||
* __NOTE:__ SQL query should start from SELECT and contain one query for reading data without any manipulation.
|
||||
* It should not contain `;` symbol.
|
||||
*
|
||||
* ### Default Behavior:
|
||||
* If [DbConnectionConfig.readOnly] is `true` (which is the default), the connection will be:
|
||||
* - explicitly set as read-only via `Connection.setReadOnly(true)`
|
||||
* - used with `autoCommit = false`
|
||||
* - automatically rolled back after reading, ensuring no changes to the database
|
||||
*
|
||||
* Even if [DbConnectionConfig.readOnly] is set to `false`, the library still prevents data-modifying queries
|
||||
* and only permits safe `SELECT` operations internally.
|
||||
*
|
||||
* @param [dbConfig] the database configuration to connect to the database, including URL, user, and password.
|
||||
* @param [sqlQuery] the SQL query to execute.
|
||||
* @param [limit] the maximum number of rows to retrieve from the result of the SQL query execution.
|
||||
* `null` (default) means no limit - all available rows will be fetched
|
||||
* or positive integer (e.g., `100`) - fetch at most that many rows
|
||||
* @param [inferNullability] indicates how the column nullability should be inferred.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [dbConfig].
|
||||
* @param [strictValidation] if `true`, the method validates that the provided query is in a valid format.
|
||||
* Default is `true` for strict validation.
|
||||
* @param [configureStatement] optional lambda to configure the [PreparedStatement] before execution.
|
||||
* This allows for custom tuning of fetch size, query timeout, and other JDBC parameters.
|
||||
* @return the DataFrame containing the result of the SQL query.
|
||||
*/
|
||||
public fun DataFrame.Companion.readSqlQuery(
|
||||
dbConfig: DbConnectionConfig,
|
||||
sqlQuery: String,
|
||||
limit: Int? = null,
|
||||
inferNullability: Boolean = true,
|
||||
dbType: DbType? = null,
|
||||
strictValidation: Boolean = true,
|
||||
configureStatement: (PreparedStatement) -> Unit = {},
|
||||
): AnyFrame {
|
||||
validateLimit(limit)
|
||||
return withReadOnlyConnection(dbConfig, dbType) { conn ->
|
||||
readSqlQuery(conn, sqlQuery, limit, inferNullability, dbType, strictValidation, configureStatement)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the result of an SQL query to the DataFrame.
|
||||
*
|
||||
* __NOTE:__ SQL query should start from SELECT and contain one query for reading data without any manipulation.
|
||||
* It should not contain `;` symbol.
|
||||
*
|
||||
* @param [dataSource] the [DataSource] to obtain a database connection from.
|
||||
* @param [sqlQuery] the SQL query to execute.
|
||||
* @param [limit] the maximum number of rows to retrieve from the result of the SQL query execution.
|
||||
* `null` (default) means no limit - all available rows will be fetched
|
||||
* or positive integer (e.g., `100`) - fetch at most that many rows
|
||||
* @param [inferNullability] indicates how the column nullability should be inferred.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [dataSource].
|
||||
* @param [strictValidation] if `true`, the method validates that the provided query is in a valid format.
|
||||
* Default is `true` for strict validation.
|
||||
* @param [configureStatement] optional lambda to configure the [PreparedStatement] before execution.
|
||||
* This allows for custom tuning of fetch size, query timeout, and other JDBC parameters.
|
||||
* @return the DataFrame containing the result of the SQL query.
|
||||
*
|
||||
* @see [DataSource.getConnection]
|
||||
*/
|
||||
public fun DataFrame.Companion.readSqlQuery(
|
||||
dataSource: DataSource,
|
||||
sqlQuery: String,
|
||||
limit: Int? = null,
|
||||
inferNullability: Boolean = true,
|
||||
dbType: DbType? = null,
|
||||
strictValidation: Boolean = true,
|
||||
configureStatement: (PreparedStatement) -> Unit = {},
|
||||
): AnyFrame {
|
||||
validateLimit(limit)
|
||||
dataSource.connection.use { connection ->
|
||||
return readSqlQuery(connection, sqlQuery, limit, inferNullability, dbType, strictValidation, configureStatement)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the result of an SQL query to the DataFrame.
|
||||
*
|
||||
* __NOTE:__ SQL query should start from SELECT and contain one query for reading data without any manipulation.
|
||||
* It should not contain `;` symbol.
|
||||
*
|
||||
* @param [connection] the database connection to execute the SQL query.
|
||||
* @param [sqlQuery] the SQL query to execute.
|
||||
* @param [limit] the maximum number of rows to retrieve from the result of the SQL query execution.
|
||||
* `null` (default) means no limit - all available rows will be fetched
|
||||
* or positive integer (e.g., `100`) - fetch at most that many rows
|
||||
* @param [inferNullability] indicates how the column nullability should be inferred.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [connection].
|
||||
* @param [strictValidation] if `true`, the method validates that the provided query is in a valid format.
|
||||
* Default is `true` for strict validation.
|
||||
* @param [configureStatement] optional lambda to configure the [PreparedStatement] before execution.
|
||||
* This allows for custom tuning of fetch size, query timeout, and other JDBC parameters.
|
||||
* @return the DataFrame containing the result of the SQL query.
|
||||
*
|
||||
* @see [DriverManager.getConnection]
|
||||
*/
|
||||
public fun DataFrame.Companion.readSqlQuery(
|
||||
connection: Connection,
|
||||
sqlQuery: String,
|
||||
limit: Int? = null,
|
||||
inferNullability: Boolean = true,
|
||||
dbType: DbType? = null,
|
||||
strictValidation: Boolean = true,
|
||||
configureStatement: (PreparedStatement) -> Unit = {},
|
||||
): AnyFrame {
|
||||
validateLimit(limit)
|
||||
if (strictValidation) {
|
||||
require(isValidSqlQuery(sqlQuery)) {
|
||||
"SQL query should start from SELECT and contain one query for reading data without any manipulation. " +
|
||||
"Also it should not contain any separators like `;`."
|
||||
}
|
||||
} else {
|
||||
logger.warn { "Strict validation is disabled. Ensure the SQL query '$sqlQuery' is correct and safe." }
|
||||
}
|
||||
|
||||
val determinedDbType = dbType ?: extractDBTypeFromConnection(connection)
|
||||
|
||||
val internalSqlQuery = limit?.let {
|
||||
determinedDbType.buildSqlQueryWithLimit(sqlQuery, it)
|
||||
} ?: sqlQuery
|
||||
|
||||
return executeQueryAndBuildDataFrame(
|
||||
connection,
|
||||
internalSqlQuery,
|
||||
determinedDbType,
|
||||
configureStatement,
|
||||
limit,
|
||||
inferNullability,
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the result of an SQL query or SQL table (by name) to the DataFrame.
|
||||
*
|
||||
* ### Default Behavior:
|
||||
* If [DbConnectionConfig.readOnly] is `true` (which is the default), the connection will be:
|
||||
* - explicitly set as read-only via `Connection.setReadOnly(true)`
|
||||
* - used with `autoCommit = false`
|
||||
* - automatically rolled back after reading, ensuring no changes to the database
|
||||
*
|
||||
* Even if [DbConnectionConfig.readOnly] is set to `false`, the library still prevents data-modifying queries
|
||||
* and only permits safe `SELECT` operations internally.
|
||||
*
|
||||
* @param [sqlQueryOrTableName] the SQL query to execute or name of the SQL table.
|
||||
* It should be a name of one of the existing SQL tables,
|
||||
* or the SQL query should start from SELECT and contain one query for reading data without any manipulation.
|
||||
* It should not contain `;` symbol.
|
||||
* @param [limit] the maximum number of rows to retrieve from the result of the SQL query execution.
|
||||
* `null` (default) means no limit - all available rows will be fetched
|
||||
* or positive integer (e.g., `100`) - fetch at most that many rows
|
||||
* @param [inferNullability] indicates how the column nullability should be inferred.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [DbConnectionConfig].
|
||||
* @param [strictValidation] if `true`, the method validates that the provided query or table name is in a valid format.
|
||||
* Default is `true` for strict validation.
|
||||
* @param [configureStatement] optional lambda to configure the [PreparedStatement] before execution.
|
||||
* This allows for custom tuning of fetch size, query timeout, and other JDBC parameters.
|
||||
* @return the DataFrame containing the result of the SQL query.
|
||||
*/
|
||||
public fun DbConnectionConfig.readDataFrame(
|
||||
sqlQueryOrTableName: String,
|
||||
limit: Int? = null,
|
||||
inferNullability: Boolean = true,
|
||||
dbType: DbType? = null,
|
||||
strictValidation: Boolean = true,
|
||||
configureStatement: (PreparedStatement) -> Unit = {},
|
||||
): AnyFrame {
|
||||
validateLimit(limit)
|
||||
return when {
|
||||
isSqlQuery(sqlQueryOrTableName) -> DataFrame.readSqlQuery(
|
||||
this,
|
||||
sqlQueryOrTableName,
|
||||
limit,
|
||||
inferNullability,
|
||||
dbType,
|
||||
strictValidation,
|
||||
configureStatement,
|
||||
)
|
||||
|
||||
isSqlTableName(sqlQueryOrTableName) -> DataFrame.readSqlTable(
|
||||
this,
|
||||
sqlQueryOrTableName,
|
||||
limit,
|
||||
inferNullability,
|
||||
dbType,
|
||||
strictValidation,
|
||||
configureStatement,
|
||||
)
|
||||
|
||||
else -> throw IllegalArgumentException(
|
||||
"$sqlQueryOrTableName should be SQL query or name of one of the existing SQL tables!",
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the result of an SQL query or SQL table (by name) to the DataFrame.
|
||||
*
|
||||
* @param [sqlQueryOrTableName] the SQL query to execute or name of the SQL table.
|
||||
* It should be a name of one of the existing SQL tables,
|
||||
* or the SQL query should start from SELECT and contain one query for reading data without any manipulation.
|
||||
* It should not contain `;` symbol.
|
||||
* @param [limit] the maximum number of rows to retrieve from the result of the SQL query execution.
|
||||
* `null` (default) means no limit - all available rows will be fetched
|
||||
* or positive integer (e.g., `100`) - fetch at most that many rows
|
||||
* @param [inferNullability] indicates how the column nullability should be inferred.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [Connection].
|
||||
* @param [strictValidation] if `true`, the method validates that the provided query or table name is in a valid format.
|
||||
* Default is `true` for strict validation.
|
||||
* @param [configureStatement] optional lambda to configure the [PreparedStatement] before execution.
|
||||
* This allows for custom tuning of fetch size, query timeout, and other JDBC parameters.
|
||||
* @return the DataFrame containing the result of the SQL query.
|
||||
*/
|
||||
public fun Connection.readDataFrame(
|
||||
sqlQueryOrTableName: String,
|
||||
limit: Int? = null,
|
||||
inferNullability: Boolean = true,
|
||||
dbType: DbType? = null,
|
||||
strictValidation: Boolean = true,
|
||||
configureStatement: (PreparedStatement) -> Unit = {},
|
||||
): AnyFrame {
|
||||
validateLimit(limit)
|
||||
return when {
|
||||
isSqlQuery(sqlQueryOrTableName) -> DataFrame.readSqlQuery(
|
||||
this,
|
||||
sqlQueryOrTableName,
|
||||
limit,
|
||||
inferNullability,
|
||||
dbType,
|
||||
strictValidation,
|
||||
configureStatement,
|
||||
)
|
||||
|
||||
isSqlTableName(sqlQueryOrTableName) -> DataFrame.readSqlTable(
|
||||
this,
|
||||
sqlQueryOrTableName,
|
||||
limit,
|
||||
inferNullability,
|
||||
dbType,
|
||||
strictValidation,
|
||||
configureStatement,
|
||||
)
|
||||
|
||||
else -> throw IllegalArgumentException(
|
||||
"$sqlQueryOrTableName should be SQL query or name of one of the existing SQL tables!",
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the result of an SQL query or SQL table (by name) to the DataFrame.
|
||||
*
|
||||
* ### Example with HikariCP:
|
||||
* ```kotlin
|
||||
* import com.zaxxer.hikari.HikariConfig
|
||||
* import com.zaxxer.hikari.HikariDataSource
|
||||
*
|
||||
* val config = HikariConfig().apply {
|
||||
* jdbcUrl = "jdbc:postgresql://localhost:5432/mydb"
|
||||
* username = "user"
|
||||
* password = "password"
|
||||
* }
|
||||
* val dataSource = HikariDataSource(config)
|
||||
*
|
||||
* // Read from a table
|
||||
* val customersDF = dataSource.readDataFrame("customers", limit = 100)
|
||||
*
|
||||
* // Or execute a query
|
||||
* val queryDF = dataSource.readDataFrame("SELECT * FROM orders WHERE amount > 100")
|
||||
* ```
|
||||
*
|
||||
* @param [sqlQueryOrTableName] the SQL query to execute or name of the SQL table.
|
||||
* It should be a name of one of the existing SQL tables,
|
||||
* or the SQL query should start from SELECT and contain one query for reading data without any manipulation.
|
||||
* It should not contain `;` symbol.
|
||||
* @param [limit] the maximum number of rows to retrieve from the result of the SQL query execution.
|
||||
* `null` (default) means no limit - all available rows will be fetched
|
||||
* or positive integer (e.g., `100`) - fetch at most that many rows
|
||||
* @param [inferNullability] indicates how the column nullability should be inferred.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [DataSource].
|
||||
* @param [strictValidation] if `true`, the method validates that the provided query or table name is in a valid format.
|
||||
* Default is `true` for strict validation.
|
||||
* @param [configureStatement] optional lambda to configure the [PreparedStatement] before execution.
|
||||
* This allows for custom tuning of fetch size, query timeout, and other JDBC parameters.
|
||||
* @return the DataFrame containing the result of the SQL query.
|
||||
*
|
||||
* @see [DataSource.getConnection]
|
||||
*/
|
||||
public fun DataSource.readDataFrame(
|
||||
sqlQueryOrTableName: String,
|
||||
limit: Int? = null,
|
||||
inferNullability: Boolean = true,
|
||||
dbType: DbType? = null,
|
||||
strictValidation: Boolean = true,
|
||||
configureStatement: (PreparedStatement) -> Unit = {},
|
||||
): AnyFrame {
|
||||
validateLimit(limit)
|
||||
connection.use { conn ->
|
||||
return when {
|
||||
isSqlQuery(sqlQueryOrTableName) -> DataFrame.readSqlQuery(
|
||||
conn,
|
||||
sqlQueryOrTableName,
|
||||
limit,
|
||||
inferNullability,
|
||||
dbType,
|
||||
strictValidation,
|
||||
configureStatement,
|
||||
)
|
||||
|
||||
isSqlTableName(sqlQueryOrTableName) -> DataFrame.readSqlTable(
|
||||
conn,
|
||||
sqlQueryOrTableName,
|
||||
limit,
|
||||
inferNullability,
|
||||
dbType,
|
||||
strictValidation,
|
||||
configureStatement,
|
||||
)
|
||||
|
||||
else -> throw IllegalArgumentException(
|
||||
"$sqlQueryOrTableName should be SQL query or name of one of the existing SQL tables!",
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the data from a [ResultSet][java.sql.ResultSet] and converts it into a DataFrame.
|
||||
*
|
||||
* A [ResultSet][java.sql.ResultSet] object maintains a cursor pointing to its current row of data.
|
||||
* By default, a ResultSet object is not updatable and has a cursor that can only move forward.
|
||||
* Therefore, you can iterate through it only once, from the first row to the last row.
|
||||
*
|
||||
* For more details, refer to the official Java documentation on [ResultSet][java.sql.ResultSet].
|
||||
*
|
||||
* NOTE: Reading from the [ResultSet][java.sql.ResultSet] could potentially change its state.
|
||||
*
|
||||
* @param [resultSet] the [ResultSet][java.sql.ResultSet] containing the data to read.
|
||||
* Its state may be altered after the read operation.
|
||||
* @param [dbType] the type of database that the [ResultSet] belongs to.
|
||||
* @param [limit] the maximum number of rows to read from the [ResultSet][java.sql.ResultSet].
|
||||
* `null` (default) means no limit - all available rows will be fetched
|
||||
* or positive integer (e.g., `100`) - fetch at most that many rows
|
||||
* @param [inferNullability] indicates how the column nullability should be inferred.
|
||||
* @return the DataFrame generated from the [ResultSet][java.sql.ResultSet] data.
|
||||
*
|
||||
* @see [java.sql.ResultSet]
|
||||
*/
|
||||
public fun DataFrame.Companion.readResultSet(
|
||||
resultSet: ResultSet,
|
||||
dbType: DbType,
|
||||
limit: Int? = null,
|
||||
inferNullability: Boolean = true,
|
||||
): AnyFrame {
|
||||
validateLimit(limit)
|
||||
val tableColumns = getTableColumnsMetadata(resultSet, dbType)
|
||||
return fetchAndConvertDataFromResultSet(tableColumns, resultSet, dbType, limit, inferNullability)
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the data from a [ResultSet][java.sql.ResultSet] and converts it into a DataFrame.
|
||||
*
|
||||
* A [ResultSet][java.sql.ResultSet] object maintains a cursor pointing to its current row of data.
|
||||
* By default, a ResultSet object is not updatable and has a cursor that can only move forward.
|
||||
* Therefore, you can iterate through it only once, from the first row to the last row.
|
||||
*
|
||||
* For more details, refer to the official Java documentation on [ResultSet][java.sql.ResultSet].
|
||||
*
|
||||
* NOTE: Reading from the [ResultSet][java.sql.ResultSet] could potentially change its state.
|
||||
*
|
||||
* @param [dbType] the type of database that the [ResultSet] belongs to.
|
||||
* @param [limit] the maximum number of rows to read from the [ResultSet][java.sql.ResultSet].
|
||||
* `null` (default) means no limit - all available rows will be fetched
|
||||
* or positive integer (e.g., `100`) - fetch at most that many rows
|
||||
* @param [inferNullability] indicates how the column nullability should be inferred.
|
||||
* @return the DataFrame generated from the [ResultSet][java.sql.ResultSet] data.
|
||||
*
|
||||
* @see [java.sql.ResultSet]
|
||||
*/
|
||||
public fun ResultSet.readDataFrame(dbType: DbType, limit: Int? = null, inferNullability: Boolean = true): AnyFrame {
|
||||
validateLimit(limit)
|
||||
return DataFrame.readResultSet(this, dbType, limit, inferNullability)
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the data from a [ResultSet][java.sql.ResultSet] and converts it into a DataFrame.
|
||||
*
|
||||
* A [ResultSet][java.sql.ResultSet] object maintains a cursor pointing to its current row of data.
|
||||
* By default, a ResultSet object is not updatable and has a cursor that can only move forward.
|
||||
* Therefore, you can iterate through it only once, from the first row to the last row.
|
||||
*
|
||||
* For more details, refer to the official Java documentation on [ResultSet][java.sql.ResultSet].
|
||||
*
|
||||
* __NOTE:__ Reading from the [ResultSet][java.sql.ResultSet] could potentially change its state.
|
||||
*
|
||||
* @param [resultSet] the [ResultSet][java.sql.ResultSet] containing the data to read.
|
||||
* Its state may be altered after the read operation.
|
||||
* @param [connection] the connection to the database (it's required to extract the database type)
|
||||
* that the [ResultSet] belongs to.
|
||||
* @param [limit] the maximum number of rows to read from the [ResultSet][java.sql.ResultSet].
|
||||
* `null` (default) means no limit - all available rows will be fetched
|
||||
* or positive integer (e.g., `100`) - fetch at most that many rows
|
||||
* @param [inferNullability] indicates how the column nullability should be inferred.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [resultSet].
|
||||
* @return the DataFrame generated from the [ResultSet][java.sql.ResultSet] data.
|
||||
*
|
||||
* @see [java.sql.ResultSet]
|
||||
*/
|
||||
public fun DataFrame.Companion.readResultSet(
|
||||
resultSet: ResultSet,
|
||||
connection: Connection,
|
||||
limit: Int? = null,
|
||||
inferNullability: Boolean = true,
|
||||
dbType: DbType? = null,
|
||||
): AnyFrame {
|
||||
validateLimit(limit)
|
||||
val determinedDbType = dbType ?: extractDBTypeFromConnection(connection)
|
||||
|
||||
return readResultSet(resultSet, determinedDbType, limit, inferNullability)
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the data from a [ResultSet][java.sql.ResultSet] and converts it into a DataFrame.
|
||||
*
|
||||
* A [ResultSet][java.sql.ResultSet] object maintains a cursor pointing to its current row of data.
|
||||
* By default, a ResultSet object is not updatable and has a cursor that can only move forward.
|
||||
* Therefore, you can iterate through it only once, from the first row to the last row.
|
||||
*
|
||||
* For more details, refer to the official Java documentation on [ResultSet][java.sql.ResultSet].
|
||||
*
|
||||
* __NOTE:__ Reading from the [ResultSet][java.sql.ResultSet] could potentially change its state.
|
||||
*
|
||||
* @param [connection] the connection to the database (it's required to extract the database type)
|
||||
* that the [ResultSet] belongs to.
|
||||
* @param [limit] the maximum number of rows to read from the [ResultSet][java.sql.ResultSet].
|
||||
* `null` (default) means no limit - all available rows will be fetched
|
||||
* or positive integer (e.g., `100`) - fetch at most that many rows
|
||||
* @param [inferNullability] indicates how the column nullability should be inferred.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [ResultSet].
|
||||
* @return the DataFrame generated from the [ResultSet][java.sql.ResultSet] data.
|
||||
*
|
||||
* @see [java.sql.ResultSet]
|
||||
*/
|
||||
public fun ResultSet.readDataFrame(
|
||||
connection: Connection,
|
||||
limit: Int? = null,
|
||||
inferNullability: Boolean = true,
|
||||
dbType: DbType? = null,
|
||||
): AnyFrame {
|
||||
validateLimit(limit)
|
||||
return DataFrame.readResultSet(this, connection, limit, inferNullability, dbType)
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads all non-system tables from a database and returns them
|
||||
* as a map of SQL tables and corresponding dataframes using the provided database configuration and limit.
|
||||
*
|
||||
* ### Default Behavior:
|
||||
* If [DbConnectionConfig.readOnly] is `true` (which is the default), the connection will be:
|
||||
* - explicitly set as read-only via `Connection.setReadOnly(true)`
|
||||
* - used with `autoCommit = false`
|
||||
* - automatically rolled back after reading, ensuring no changes to the database
|
||||
*
|
||||
* Even if [DbConnectionConfig.readOnly] is set to `false`, the library still prevents data-modifying queries
|
||||
* and only permits safe `SELECT` operations internally.
|
||||
*
|
||||
* @param [dbConfig] the database configuration to connect to the database, including URL, user, and password.
|
||||
* @param [limit] the maximum number of rows to read from each table.
|
||||
* `null` (default) means no limit - all available rows will be fetched
|
||||
* or positive integer (e.g., `100`) - fetch at most that many rows
|
||||
* @param [catalogue] a name of the catalog from which tables will be retrieved. A null value retrieves tables from all catalogs.
|
||||
* @param [inferNullability] indicates how the column nullability should be inferred.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [dbConfig].
|
||||
* @param [configureStatement] optional lambda to configure the [PreparedStatement] before execution.
|
||||
* This allows for custom tuning of fetch size, query timeout, and other JDBC parameters.
|
||||
* @return a map of [String] to [AnyFrame] objects representing the non-system tables from the database.
|
||||
*/
|
||||
public fun DataFrame.Companion.readAllSqlTables(
|
||||
dbConfig: DbConnectionConfig,
|
||||
catalogue: String? = null,
|
||||
limit: Int? = null,
|
||||
inferNullability: Boolean = true,
|
||||
dbType: DbType? = null,
|
||||
configureStatement: (PreparedStatement) -> Unit = {},
|
||||
): Map<String, AnyFrame> {
|
||||
validateLimit(limit)
|
||||
return withReadOnlyConnection(dbConfig, dbType) { connection ->
|
||||
readAllSqlTables(connection, catalogue, limit, inferNullability, dbType, configureStatement)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads all non-system tables from a database and returns them
|
||||
* as a map of SQL tables and corresponding dataframes.
|
||||
*
|
||||
* ### Example with HikariCP:
|
||||
* ```kotlin
|
||||
* import com.zaxxer.hikari.HikariConfig
|
||||
* import com.zaxxer.hikari.HikariDataSource
|
||||
*
|
||||
* val config = HikariConfig().apply {
|
||||
* jdbcUrl = "jdbc:postgresql://localhost:5432/mydb"
|
||||
* username = "user"
|
||||
* password = "password"
|
||||
* }
|
||||
* val dataSource = HikariDataSource(config)
|
||||
*
|
||||
* // Read all tables from the database
|
||||
* val allTables = DataFrame.readAllSqlTables(dataSource, limit = 100)
|
||||
*
|
||||
* // Access individual tables
|
||||
* val customersDF = allTables["customers"]
|
||||
* val ordersDF = allTables["orders"]
|
||||
* ```
|
||||
*
|
||||
* @param [dataSource] the [DataSource] to get a database connection from.
|
||||
* @param [catalogue] a name of the catalog from which tables will be retrieved. A null value retrieves tables from all catalogs.
|
||||
* @param [limit] the maximum number of rows to read from each table.
|
||||
* `null` (default) means no limit - all available rows will be fetched
|
||||
* or positive integer (e.g., `100`) - fetch at most that many rows
|
||||
* @param [inferNullability] indicates how the column nullability should be inferred.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [dataSource].
|
||||
* @param [configureStatement] optional lambda to configure the [PreparedStatement] before execution.
|
||||
* This allows for custom tuning of fetch size, query timeout, and other JDBC parameters.
|
||||
* @return a map of [String] to [AnyFrame] objects representing the non-system tables from the database.
|
||||
*
|
||||
* @see [DataSource.getConnection]
|
||||
*/
|
||||
public fun DataFrame.Companion.readAllSqlTables(
|
||||
dataSource: DataSource,
|
||||
catalogue: String? = null,
|
||||
limit: Int? = null,
|
||||
inferNullability: Boolean = true,
|
||||
dbType: DbType? = null,
|
||||
configureStatement: (PreparedStatement) -> Unit = {},
|
||||
): Map<String, AnyFrame> {
|
||||
validateLimit(limit)
|
||||
dataSource.connection.use { connection ->
|
||||
return readAllSqlTables(connection, catalogue, limit, inferNullability, dbType, configureStatement)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads all non-system tables from a database and returns them
|
||||
* as a map of SQL tables and corresponding dataframes.
|
||||
*
|
||||
* @param [connection] the database connection to read tables from.
|
||||
* @param [limit] the maximum number of rows to read from each table.
|
||||
* `null` (default) means no limit - all available rows will be fetched
|
||||
* or positive integer (e.g., `100`) - fetch at most that many rows
|
||||
* @param [catalogue] a name of the catalog from which tables will be retrieved. A null value retrieves tables from all catalogs.
|
||||
* @param [inferNullability] indicates how the column nullability should be inferred.
|
||||
* @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`,
|
||||
* in that case the [dbType] will be recognized from the [connection].
|
||||
* @param [configureStatement] optional lambda to configure the [PreparedStatement] before execution.
|
||||
* This allows for custom tuning of fetch size, query timeout, and other JDBC parameters.
|
||||
* @return a map of [String] to [AnyFrame] objects representing the non-system tables from the database.
|
||||
*
|
||||
* @see [DriverManager.getConnection]
|
||||
*/
|
||||
public fun DataFrame.Companion.readAllSqlTables(
|
||||
connection: Connection,
|
||||
catalogue: String? = null,
|
||||
limit: Int? = null,
|
||||
inferNullability: Boolean = true,
|
||||
dbType: DbType? = null,
|
||||
configureStatement: (PreparedStatement) -> Unit = {},
|
||||
): Map<String, AnyFrame> {
|
||||
validateLimit(limit)
|
||||
val determinedDbType = dbType ?: extractDBTypeFromConnection(connection)
|
||||
val metaData = connection.metaData
|
||||
val tablesResultSet = retrieveTableMetadata(metaData, catalogue, determinedDbType)
|
||||
|
||||
return buildMap {
|
||||
while (tablesResultSet.next()) {
|
||||
val tableMetadata = determinedDbType.buildTableMetadata(tablesResultSet)
|
||||
|
||||
// We filter here a second time because of specific logic with SQLite and possible issues with future databases
|
||||
if (determinedDbType.isSystemTable(tableMetadata)) {
|
||||
continue
|
||||
}
|
||||
|
||||
val fullTableName = buildFullTableName(catalogue, tableMetadata.schemaName, tableMetadata.name)
|
||||
|
||||
val dataFrame = readTableAsDataFrame(
|
||||
connection,
|
||||
fullTableName,
|
||||
limit,
|
||||
inferNullability,
|
||||
dbType,
|
||||
configureStatement,
|
||||
)
|
||||
|
||||
put(fullTableName, dataFrame)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun retrieveTableMetadata(metaData: DatabaseMetaData, catalogue: String?, dbType: DbType): ResultSet {
|
||||
// Exclude system- and other tables without data (it looks like it is supported badly for many databases)
|
||||
val tableTypes = dbType.tableTypes?.toTypedArray()
|
||||
return metaData.getTables(catalogue, null, null, tableTypes)
|
||||
}
|
||||
|
||||
private fun buildFullTableName(catalogue: String?, schemaName: String?, tableName: String): String {
|
||||
// TODO: both cases is schema specified or not in URL
|
||||
// in h2 database name is recognized as a schema name https://www.h2database.com/html/features.html#database_url
|
||||
// https://stackoverflow.com/questions/20896935/spring-hibernate-h2-database-schema-not-found
|
||||
// could be Dialect/Database specific
|
||||
return when {
|
||||
catalogue != null && schemaName != null -> "$catalogue.$schemaName.$tableName"
|
||||
catalogue != null -> "$catalogue.$tableName"
|
||||
else -> tableName
|
||||
}
|
||||
}
|
||||
|
||||
private fun readTableAsDataFrame(
|
||||
connection: Connection,
|
||||
tableName: String,
|
||||
limit: Int?,
|
||||
inferNullability: Boolean,
|
||||
dbType: DbType?,
|
||||
configureStatement: (PreparedStatement) -> Unit = {},
|
||||
): AnyFrame {
|
||||
logger.debug { "Reading table: $tableName" }
|
||||
|
||||
val dataFrame = DataFrame.readSqlTable(
|
||||
connection,
|
||||
tableName,
|
||||
limit,
|
||||
inferNullability,
|
||||
dbType,
|
||||
true,
|
||||
configureStatement,
|
||||
)
|
||||
|
||||
logger.debug { "Finished reading table: $tableName" }
|
||||
|
||||
return dataFrame
|
||||
}
|
||||
|
||||
internal fun getTableColumnsMetadata(resultSet: ResultSet, dbType: DbType): MutableList<TableColumnMetadata> =
|
||||
dbType.getTableColumnsMetadata(resultSet).toMutableList()
|
||||
|
||||
/**
|
||||
* Fetches and converts data from a ResultSet into a mutable map.
|
||||
*
|
||||
* @param [tableColumns] a list containing the column metadata for the table.
|
||||
* @param [rs] the ResultSet object containing the data to be fetched and converted.
|
||||
* @param [dbType] the type of the database.
|
||||
* @param [limit] the maximum number of rows to retrieve from the table.
|
||||
* `null` (default) means no limit - all available rows will be fetched.
|
||||
* @param [inferNullability] indicates how the column nullability should be inferred.
|
||||
* @return A mutable map containing the fetched and converted data.
|
||||
*/
|
||||
internal fun fetchAndConvertDataFromResultSet(
|
||||
tableColumns: MutableList<TableColumnMetadata>,
|
||||
rs: ResultSet,
|
||||
dbType: DbType,
|
||||
limit: Int?,
|
||||
inferNullability: Boolean,
|
||||
): AnyFrame {
|
||||
val columnKTypes = buildColumnKTypes(tableColumns, dbType)
|
||||
val columnData = readAllRowsFromResultSet(rs, tableColumns, columnKTypes, dbType, limit)
|
||||
val dataFrame = buildDataFrameFromColumnData(columnData, tableColumns, columnKTypes, dbType, inferNullability)
|
||||
|
||||
logger.debug {
|
||||
"DataFrame with ${dataFrame.rowsCount()} rows and ${dataFrame.columnsCount()} columns created as a result of SQL query."
|
||||
}
|
||||
|
||||
return dataFrame
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a map of column indices to their Kotlin types.
|
||||
*/
|
||||
private fun buildColumnKTypes(tableColumns: List<TableColumnMetadata>, dbType: DbType): Map<Int, KType> =
|
||||
tableColumns.indices.associateWith { index ->
|
||||
generateKType(dbType, tableColumns[index])
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads all rows from ResultSet and returns a column-oriented data structure.
|
||||
* Returns mutable lists to allow efficient post-processing without copying.
|
||||
*/
|
||||
private fun readAllRowsFromResultSet(
|
||||
rs: ResultSet,
|
||||
tableColumns: List<TableColumnMetadata>,
|
||||
columnKTypes: Map<Int, KType>,
|
||||
dbType: DbType,
|
||||
limit: Int?,
|
||||
): List<MutableList<Any?>> {
|
||||
val columnsCount = tableColumns.size
|
||||
val columnData = List(columnsCount) { mutableListOf<Any?>() }
|
||||
var rowsRead = 0
|
||||
|
||||
while (rs.next() && (limit == null || rowsRead < limit)) {
|
||||
repeat(columnsCount) { columnIndex ->
|
||||
val value = dbType.extractValueFromResultSet(
|
||||
rs = rs,
|
||||
columnIndex = columnIndex,
|
||||
columnMetadata = tableColumns[columnIndex],
|
||||
kType = columnKTypes.getValue(columnIndex),
|
||||
)
|
||||
columnData[columnIndex].add(value)
|
||||
}
|
||||
rowsRead++
|
||||
// if (rowsRead % 1000 == 0) logger.debug { "Loaded $rowsRead rows." } // TODO: https://github.com/Kotlin/dataframe/issues/455
|
||||
}
|
||||
|
||||
return columnData
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds DataFrame from column-oriented data.
|
||||
* Accepts mutable lists to enable efficient in-place transformations.
|
||||
*/
|
||||
private fun buildDataFrameFromColumnData(
|
||||
columnData: List<MutableList<Any?>>,
|
||||
tableColumns: List<TableColumnMetadata>,
|
||||
columnKTypes: Map<Int, KType>,
|
||||
dbType: DbType,
|
||||
inferNullability: Boolean,
|
||||
): AnyFrame =
|
||||
columnData.mapIndexed { index, values ->
|
||||
dbType.buildDataColumn(
|
||||
name = tableColumns[index].name,
|
||||
values = values,
|
||||
kType = columnKTypes.getValue(index),
|
||||
inferNullability = inferNullability,
|
||||
)
|
||||
}.toDataFrame()
|
||||
|
||||
/**
|
||||
* Generates a KType based on the given database type and table column metadata.
|
||||
*
|
||||
* @param dbType The database type.
|
||||
* @param tableColumnMetadata The table column metadata.
|
||||
*
|
||||
* @return The generated KType.
|
||||
*/
|
||||
internal fun generateKType(dbType: DbType, tableColumnMetadata: TableColumnMetadata): KType =
|
||||
dbType.convertSqlTypeToKType(tableColumnMetadata)
|
||||
?: dbType.makeCommonSqlToKTypeMapping(tableColumnMetadata)
|
||||
Vendored
+250
@@ -0,0 +1,250 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.DbType
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.extractDBTypeFromUrl
|
||||
import java.sql.Connection
|
||||
import java.sql.SQLException
|
||||
|
||||
private val logger = KotlinLogging.logger {}
|
||||
|
||||
/**
|
||||
* Validates the provided limit to ensure it is either null or a positive integer.
|
||||
* Throws an [IllegalArgumentException] if the limit is negative or zero.
|
||||
*
|
||||
* @param limit The maximum allowed number of rows. Use `null` for unlimited rows.
|
||||
*/
|
||||
internal fun validateLimit(limit: Int?) {
|
||||
require(limit == null || limit > 0) {
|
||||
"Parameter 'limit' must be positive, but was: $limit. Use null for unlimited rows."
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes the given block with a managed JDBC connection created from [DbConnectionConfig].
|
||||
*
|
||||
* If [DbConnectionConfig.readOnly] is `true` (default), the connection will be:
|
||||
* - explicitly marked as read-only
|
||||
* - used with auto-commit disabled
|
||||
* - rolled back after execution to prevent unintended modifications
|
||||
*
|
||||
* This utility guarantees proper closing of the connection and safe rollback in read-only mode.
|
||||
* It should be used when the user does not manually manage JDBC connections.
|
||||
*
|
||||
* @param [dbConfig] The configuration used to create the connection.
|
||||
* @param [dbType] Optional database type (not used here but can be passed through for logging or future extensions).
|
||||
* @param [block] A lambda with receiver that runs with an open and managed [java.sql.Connection].
|
||||
* @return The result of the [block] execution.
|
||||
*/
|
||||
internal inline fun <T> withReadOnlyConnection(
|
||||
dbConfig: DbConnectionConfig,
|
||||
dbType: DbType? = null,
|
||||
block: (Connection) -> T,
|
||||
): T {
|
||||
val actualDbType = dbType ?: extractDBTypeFromUrl(dbConfig.url)
|
||||
val connection = actualDbType.createConnection(dbConfig)
|
||||
|
||||
return connection.use { conn ->
|
||||
try {
|
||||
if (dbConfig.readOnly) {
|
||||
conn.autoCommit = false
|
||||
}
|
||||
|
||||
block(conn)
|
||||
} finally {
|
||||
if (dbConfig.readOnly) {
|
||||
try {
|
||||
conn.rollback()
|
||||
} catch (e: SQLException) {
|
||||
logger.warn(e) {
|
||||
"Failed to rollback read-only transaction (url=${dbConfig.url})"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A regular expression defining the valid pattern for SQL table names.
|
||||
*
|
||||
* This pattern enforces that table names must:
|
||||
* - Contain only Unicode letters, Unicode digits, or underscores.
|
||||
* - Optionally be segmented by dots to indicate schema and table separation.
|
||||
*
|
||||
* It ensures compatibility with most SQL database naming conventions, thus minimizing risks of invalid names
|
||||
* or injection vulnerabilities.
|
||||
*
|
||||
* Example of valid table names:
|
||||
* - `my_table`
|
||||
* - `schema1.table2`
|
||||
*
|
||||
* Example of invalid table names:
|
||||
* - `my-table` (contains a dash)
|
||||
* - `table!name` (contains special characters)
|
||||
* - `.startWithDot` (cannot start with a dot)
|
||||
*/
|
||||
internal const val TABLE_NAME_VALID_PATTERN = "^[\\p{L}\\p{N}_]+(\\.[\\p{L}\\p{N}_]+)*$"
|
||||
|
||||
internal fun isSqlQuery(sqlQueryOrTableName: String): Boolean {
|
||||
val queryPattern = Regex("(?i)\\b(SELECT)\\b")
|
||||
return queryPattern.containsMatchIn(sqlQueryOrTableName.trim())
|
||||
}
|
||||
|
||||
/**
|
||||
* SQL table name pattern matching: __catalog.schema.table__
|
||||
* Allows alphanumeric characters and underscores, must start with letter or underscore
|
||||
*/
|
||||
private val SQL_TABLE_NAME_PATTERN = Regex("^[a-zA-Z_][a-zA-Z0-9_]*(\\.[a-zA-Z_][a-zA-Z0-9_]*){0,2}$")
|
||||
|
||||
internal fun isSqlTableName(sqlQueryOrTableName: String): Boolean {
|
||||
// Match table names with optional schema and catalog (e.g., catalog.schema.table)
|
||||
return SQL_TABLE_NAME_PATTERN.matches(sqlQueryOrTableName.trim())
|
||||
}
|
||||
|
||||
internal val FORBIDDEN_PATTERNS_REGEX = listOf(
|
||||
";", // Separator for SQL statements
|
||||
"--", // Single-line comments
|
||||
"/\\*", // Start of multi-line comments
|
||||
"\\*/", // End of multi-line comments
|
||||
"\\bDROP\\b", // DROP as a full word
|
||||
"\\bDELETE\\b", // DELETE as a full word
|
||||
"\\bINSERT\\b", // INSERT as a full word
|
||||
"\\bUPDATE\\b", // UPDATE as a full word
|
||||
"\\bEXEC\\b", // EXEC as a full word
|
||||
"\\bEXECUTE\\b", // EXECUTE as a full word
|
||||
"\\bCREATE\\b", // CREATE as a full word
|
||||
"\\bALTER\\b", // ALTER as a full word
|
||||
"\\bGRANT\\b", // GRANT as a full word
|
||||
"\\bREVOKE\\b", // REVOKE as a full word
|
||||
"\\bMERGE\\b", // MERGE as a full word
|
||||
).map { Regex(it, RegexOption.IGNORE_CASE) }
|
||||
|
||||
/**
|
||||
* Checks if a given string contains forbidden patterns or keywords.
|
||||
* Logs a clear and friendly message if any forbidden pattern is found.
|
||||
*
|
||||
* ### Forbidden SQL Examples:
|
||||
* 1. **Single-line comment** (using `--`):
|
||||
* - `SELECT * FROM Sale WHERE amount = 100.0 -- AND id = 5`
|
||||
*
|
||||
* 2. **Multi-line comment** (using `/* */`):
|
||||
* - `SELECT * FROM Customer /* Possible malicious comment */ WHERE id = 1`
|
||||
*
|
||||
* 3. **Multiple statements separated by semicolon (`;`)**:
|
||||
* - `SELECT * FROM Sale WHERE amount = 500.0; DROP TABLE Customer`
|
||||
*
|
||||
* 4. **Potentially malicious SQL with single quotes for injection**:
|
||||
* - `SELECT * FROM Sale WHERE id = 1 AND amount = 100.0 OR '1'='1`
|
||||
*
|
||||
* 5. **Usage of dangerous commands like `DROP`, `DELETE`, `ALTER`, etc.**:
|
||||
* - `DROP TABLE Customer; SELECT * FROM Sale`
|
||||
*
|
||||
* ### Allowed SQL Examples:
|
||||
* 1. Query with names containing reserved words as parts of identifiers:
|
||||
* - `SELECT last_update FROM HELLO_ALTER`
|
||||
*
|
||||
* 2. Query with fully valid syntax:
|
||||
* - `SELECT id, name FROM Customers WHERE age > 25`
|
||||
*
|
||||
* 3. Query with identifiers resembling commands but not in forbidden contexts:
|
||||
* - `SELECT id, amount FROM TRANSACTION_DROP`
|
||||
*
|
||||
* 4. Query with case-insensitive identifiers:
|
||||
* - `select Id, Name from Hello_Table`
|
||||
*
|
||||
* ### Key Notes:
|
||||
* - Reserved keywords like `DROP`, `DELETE`, `ALTER`, etc., are forbidden **only when they appear as standalone commands**.
|
||||
* - Reserved words as parts of table or column names (e.g., `HELLO_ALTER`, `myDropTable`) **are allowed**.
|
||||
* - Inline or multi-line comments (`--` or `/* */`) are restricted to prevent potential SQL injection attacks.
|
||||
* - Multiple SQL statements separated by semicolons (`;`) are not allowed to prevent the execution of unintended commands.
|
||||
*/
|
||||
internal fun hasForbiddenPatterns(input: String): Boolean {
|
||||
for (regex in FORBIDDEN_PATTERNS_REGEX) {
|
||||
if (regex.containsMatchIn(input)) {
|
||||
logger.error {
|
||||
"Validation failed: The input contains a forbidden element matching '${regex.pattern}'. Please review the input: '$input'."
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* Allowed list of SQL operators
|
||||
*/
|
||||
internal val ALLOWED_SQL_OPERATORS = listOf("SELECT", "WITH", "VALUES", "TABLE")
|
||||
|
||||
/**
|
||||
* Validates if the SQL query is safe and starts with SELECT.
|
||||
* Ensures a proper syntax structure, checks for balanced quotes, and disallows dangerous commands or patterns.
|
||||
*/
|
||||
internal fun isValidSqlQuery(sqlQuery: String): Boolean {
|
||||
val normalizedSqlQuery = sqlQuery.trim().uppercase()
|
||||
|
||||
// Log the query being validated
|
||||
logger.debug { "Validating SQL query: '$sqlQuery'" }
|
||||
|
||||
// Ensure the query starts from one of the allowed SQL operators
|
||||
if (ALLOWED_SQL_OPERATORS.none { normalizedSqlQuery.startsWith(it) }) {
|
||||
logger.error {
|
||||
"Validation failed: The SQL query must start with one of: $ALLOWED_SQL_OPERATORS. Given query: '$sqlQuery'."
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Validate against forbidden patterns
|
||||
if (hasForbiddenPatterns(normalizedSqlQuery)) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check if there are balanced quotes (single and double)
|
||||
val singleQuotes = sqlQuery.count { it == '\'' }
|
||||
val doubleQuotes = sqlQuery.count { it == '"' }
|
||||
if (singleQuotes % 2 != 0) {
|
||||
logger.error {
|
||||
"Validation failed: Unbalanced single quotes in the SQL query. " +
|
||||
"Please correct the query: '$sqlQuery'."
|
||||
}
|
||||
return false
|
||||
}
|
||||
if (doubleQuotes % 2 != 0) {
|
||||
logger.error {
|
||||
"Validation failed: Unbalanced double quotes in the SQL query. " +
|
||||
"Please correct the query: '$sqlQuery'."
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
logger.debug { "SQL query validation succeeded for query: '$sqlQuery'." }
|
||||
return true
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates if the given SQL table name is safe and logs any validation violations.
|
||||
*/
|
||||
internal fun isValidTableName(tableName: String): Boolean {
|
||||
val normalizedTableName = tableName.trim().uppercase()
|
||||
|
||||
// Log the table name being validated
|
||||
logger.debug { "Validating SQL table name: '$tableName'" }
|
||||
|
||||
// Validate against forbidden patterns
|
||||
if (hasForbiddenPatterns(normalizedTableName)) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Validate the table name structure: letters, numbers, underscores, and dots are allowed
|
||||
val tableNameRegex = Regex(TABLE_NAME_VALID_PATTERN)
|
||||
if (!tableNameRegex.matches(normalizedTableName)) {
|
||||
logger.error {
|
||||
"Validation failed: The table name contains invalid characters. " +
|
||||
"Only letters, numbers, underscores, and dots are allowed. Provided name: '$tableName'."
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
logger.debug { "Table name validation passed for table: '$tableName'." }
|
||||
return true
|
||||
}
|
||||
+1
@@ -0,0 +1 @@
|
||||
org.jetbrains.kotlinx.dataframe.io.Jdbc
|
||||
Vendored
+160
@@ -0,0 +1,160 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import io.kotest.assertions.withClue
|
||||
import io.kotest.matchers.shouldBe
|
||||
import org.intellij.lang.annotations.Language
|
||||
import org.jetbrains.kotlinx.dataframe.AnyFrame
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.api.inferType
|
||||
import org.jetbrains.kotlinx.dataframe.api.schema
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.MsSql
|
||||
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
|
||||
import java.sql.Connection
|
||||
import java.sql.ResultSet
|
||||
import kotlin.reflect.typeOf
|
||||
|
||||
private const val TEST_TABLE_NAME = "testtable123"
|
||||
|
||||
internal fun inferNullability(connection: Connection) {
|
||||
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS $TEST_TABLE_NAME") }
|
||||
|
||||
// prepare tables and data
|
||||
@Language("SQL")
|
||||
val createTestTable1Query = """
|
||||
CREATE TABLE $TEST_TABLE_NAME (
|
||||
id INT PRIMARY KEY,
|
||||
name VARCHAR(50),
|
||||
surname VARCHAR(50),
|
||||
age INT NOT NULL
|
||||
)
|
||||
"""
|
||||
|
||||
connection.createStatement().use { st -> st.execute(createTestTable1Query) }
|
||||
|
||||
connection.createStatement()
|
||||
.execute("INSERT INTO $TEST_TABLE_NAME (id, name, surname, age) VALUES (1, 'John', 'Crawford', 40)")
|
||||
connection.createStatement()
|
||||
.execute("INSERT INTO $TEST_TABLE_NAME (id, name, surname, age) VALUES (2, 'Alice', 'Smith', 25)")
|
||||
connection.createStatement()
|
||||
.execute("INSERT INTO $TEST_TABLE_NAME (id, name, surname, age) VALUES (3, 'Bob', 'Johnson', 47)")
|
||||
connection.createStatement()
|
||||
.execute("INSERT INTO $TEST_TABLE_NAME (id, name, surname, age) VALUES (4, 'Sam', NULL, 15)")
|
||||
|
||||
// start testing `readSqlTable` method
|
||||
|
||||
// with default inferNullability: Boolean = true
|
||||
val df = DataFrame.readSqlTable(connection, TEST_TABLE_NAME)
|
||||
df.schema().columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
df.schema().columns["name"]!!.type shouldBe typeOf<String>()
|
||||
df.schema().columns["surname"]!!.type shouldBe typeOf<String?>()
|
||||
df.schema().columns["age"]!!.type shouldBe typeOf<Int>()
|
||||
|
||||
val dataSchema = DataFrameSchema.readSqlTable(connection, TEST_TABLE_NAME)
|
||||
dataSchema.columns.size shouldBe 4
|
||||
dataSchema.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
dataSchema.columns["name"]!!.type shouldBe typeOf<String?>()
|
||||
dataSchema.columns["surname"]!!.type shouldBe typeOf<String?>()
|
||||
dataSchema.columns["age"]!!.type shouldBe typeOf<Int>()
|
||||
|
||||
// with inferNullability: Boolean = false
|
||||
val df1 = DataFrame.readSqlTable(connection, TEST_TABLE_NAME, inferNullability = false)
|
||||
df1.schema().columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
|
||||
// this column changed a type because it doesn't contain nulls
|
||||
df1.schema().columns["name"]!!.type shouldBe typeOf<String?>()
|
||||
df1.schema().columns["surname"]!!.type shouldBe typeOf<String?>()
|
||||
df1.schema().columns["age"]!!.type shouldBe typeOf<Int>()
|
||||
|
||||
// end testing `readSqlTable` method
|
||||
|
||||
// start testing `readSQLQuery` method
|
||||
|
||||
// ith default inferNullability: Boolean = true
|
||||
@Language("SQL")
|
||||
val sqlQuery =
|
||||
"""
|
||||
SELECT name, surname, age FROM $TEST_TABLE_NAME
|
||||
""".trimIndent()
|
||||
|
||||
val df2 = DataFrame.readSqlQuery(connection, sqlQuery)
|
||||
df2.schema().columns["name"]!!.type shouldBe typeOf<String>()
|
||||
df2.schema().columns["surname"]!!.type shouldBe typeOf<String?>()
|
||||
df2.schema().columns["age"]!!.type shouldBe typeOf<Int>()
|
||||
|
||||
val dataSchema2 = DataFrameSchema.readSqlQuery(connection, sqlQuery)
|
||||
dataSchema2.columns.size shouldBe 3
|
||||
dataSchema2.columns["name"]!!.type shouldBe typeOf<String?>()
|
||||
dataSchema2.columns["surname"]!!.type shouldBe typeOf<String?>()
|
||||
dataSchema2.columns["age"]!!.type shouldBe typeOf<Int>()
|
||||
|
||||
// with inferNullability: Boolean = false
|
||||
val df3 = DataFrame.readSqlQuery(connection, sqlQuery, inferNullability = false)
|
||||
// this column changed a type because it doesn't contain nulls
|
||||
df3.schema().columns["name"]!!.type shouldBe typeOf<String?>()
|
||||
df3.schema().columns["surname"]!!.type shouldBe typeOf<String?>()
|
||||
df3.schema().columns["age"]!!.type shouldBe typeOf<Int>()
|
||||
|
||||
// end testing `readSQLQuery` method
|
||||
|
||||
// start testing `readResultSet` method
|
||||
|
||||
connection.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE, ResultSet.CONCUR_UPDATABLE).use { st ->
|
||||
@Language("SQL")
|
||||
val selectStatement = "SELECT * FROM $TEST_TABLE_NAME"
|
||||
|
||||
st.executeQuery(selectStatement).use { rs ->
|
||||
// ith default inferNullability: Boolean = true
|
||||
val df4 = DataFrame.readResultSet(rs, MsSql)
|
||||
df4.schema().columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
df4.schema().columns["name"]!!.type shouldBe typeOf<String>()
|
||||
df4.schema().columns["surname"]!!.type shouldBe typeOf<String?>()
|
||||
df4.schema().columns["age"]!!.type shouldBe typeOf<Int>()
|
||||
|
||||
rs.beforeFirst()
|
||||
|
||||
val dataSchema3 = DataFrameSchema.readResultSet(rs, MsSql)
|
||||
dataSchema3.columns.size shouldBe 4
|
||||
dataSchema3.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
dataSchema3.columns["name"]!!.type shouldBe typeOf<String?>()
|
||||
dataSchema3.columns["surname"]!!.type shouldBe typeOf<String?>()
|
||||
dataSchema3.columns["age"]!!.type shouldBe typeOf<Int>()
|
||||
|
||||
// with inferNullability: Boolean = false
|
||||
rs.beforeFirst()
|
||||
|
||||
val df5 = DataFrame.readResultSet(rs, MsSql, inferNullability = false)
|
||||
df5.schema().columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
|
||||
// this column changed a type because it doesn't contain nulls
|
||||
df5.schema().columns["name"]!!.type shouldBe typeOf<String?>()
|
||||
df5.schema().columns["surname"]!!.type shouldBe typeOf<String?>()
|
||||
df5.schema().columns["age"]!!.type shouldBe typeOf<Int>()
|
||||
}
|
||||
}
|
||||
// end testing `readResultSet` method
|
||||
|
||||
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS $TEST_TABLE_NAME") }
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to check whether the provided schema matches the inferred schema.
|
||||
*
|
||||
* It must hold that all types in the provided schema are equal or super to
|
||||
* the corresponding types in the inferred schema.
|
||||
*/
|
||||
@Suppress("INVISIBLE_REFERENCE")
|
||||
fun AnyFrame.assertInferredTypesMatchSchema() {
|
||||
withClue({
|
||||
"""
|
||||
|Inferred schema must be <: Provided schema
|
||||
|
|
||||
|Inferred Schema:
|
||||
|${inferType().schema().toString().lines().joinToString("\n|")}
|
||||
|
|
||||
|Provided Schema:
|
||||
|${schema().toString().lines().joinToString("\n|")}
|
||||
""".trimMargin()
|
||||
}) {
|
||||
schema().compare(inferType().schema()).isSuperOrMatches() shouldBe true
|
||||
}
|
||||
}
|
||||
+1381
File diff suppressed because it is too large
Load Diff
Vendored
+425
@@ -0,0 +1,425 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io.h2
|
||||
|
||||
import io.kotest.matchers.shouldBe
|
||||
import org.intellij.lang.annotations.Language
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
|
||||
import org.jetbrains.kotlinx.dataframe.api.add
|
||||
import org.jetbrains.kotlinx.dataframe.api.cast
|
||||
import org.jetbrains.kotlinx.dataframe.api.filter
|
||||
import org.jetbrains.kotlinx.dataframe.api.select
|
||||
import org.jetbrains.kotlinx.dataframe.io.inferNullability
|
||||
import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
|
||||
import org.jetbrains.kotlinx.dataframe.io.readSqlQuery
|
||||
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
|
||||
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
|
||||
import org.junit.AfterClass
|
||||
import org.junit.BeforeClass
|
||||
import org.junit.Test
|
||||
import java.math.BigDecimal
|
||||
import java.sql.Blob
|
||||
import java.sql.Connection
|
||||
import java.sql.DriverManager
|
||||
import java.sql.SQLException
|
||||
import java.util.Date
|
||||
import kotlin.reflect.typeOf
|
||||
|
||||
private const val URL = "jdbc:h2:mem:test1;DB_CLOSE_DELAY=-1;MODE=MariaDB;DATABASE_TO_LOWER=TRUE"
|
||||
|
||||
@DataSchema
|
||||
interface Table1MariaDb {
|
||||
val id: Int
|
||||
val bitCol: Boolean
|
||||
val tinyintcol: Int
|
||||
val smallintcol: Short?
|
||||
val mediumintcol: Int
|
||||
val mediumintunsignedcol: Int
|
||||
val integercol: Int
|
||||
val intCol: Int
|
||||
val integerunsignedcol: Long
|
||||
val bigintcol: Long
|
||||
val floatcol: Float
|
||||
val doublecol: Double
|
||||
val decimalcol: BigDecimal
|
||||
val dateCol: String
|
||||
val datetimeCol: String
|
||||
val timestampCol: String
|
||||
val timeCol: String
|
||||
val yearCol: String
|
||||
val varcharCol: String
|
||||
val charCol: String
|
||||
val binaryCol: ByteArray
|
||||
val varbinaryCol: ByteArray
|
||||
val tinyblobCol: ByteArray
|
||||
val blobCol: ByteArray
|
||||
val mediumblobCol: ByteArray
|
||||
val longblobCol: ByteArray
|
||||
val textCol: String
|
||||
val mediumtextCol: String
|
||||
val longtextCol: String
|
||||
val enumCol: String
|
||||
val jsonCol: String
|
||||
}
|
||||
|
||||
@DataSchema
|
||||
interface Table2MariaDb {
|
||||
val id: Int
|
||||
val bitCol: Boolean?
|
||||
val tinyintCol: Int?
|
||||
val smallintCol: Int?
|
||||
val mediumintCol: Int?
|
||||
val mediumintUnsignedCol: Int?
|
||||
val integercol: Int?
|
||||
val intCol: Int?
|
||||
val integerUnsignedCol: Long?
|
||||
val bigintCol: Long?
|
||||
val floatCol: Float?
|
||||
val doubleCol: Double?
|
||||
val decimalCol: Double?
|
||||
val dateCol: String?
|
||||
val datetimeCol: String?
|
||||
val timestampCol: String?
|
||||
val timeCol: String?
|
||||
val yearCol: String?
|
||||
val varcharCol: String?
|
||||
val charCol: String?
|
||||
val binaryCol: ByteArray?
|
||||
val varbinaryCol: ByteArray?
|
||||
val tinyblobCol: ByteArray?
|
||||
val blobCol: ByteArray?
|
||||
val mediumblobCol: ByteArray?
|
||||
val longblobCol: ByteArray?
|
||||
val textCol: String?
|
||||
val mediumtextCol: String?
|
||||
val longtextCol: String?
|
||||
val enumCol: String?
|
||||
val jsonCol: String?
|
||||
}
|
||||
|
||||
@DataSchema
|
||||
interface Table3MariaDb {
|
||||
val id: Int
|
||||
val enumCol: String
|
||||
val setCol: Char?
|
||||
}
|
||||
|
||||
private const val JSON_STRING =
|
||||
"{\"details\": {\"foodType\": \"Pizza\", \"menu\": \"https://www.loumalnatis.com/our-menu\"}, \n" +
|
||||
" \t\"favorites\": [{\"description\": \"Pepperoni deep dish\", \"price\": 18.75}, \n" +
|
||||
"{\"description\": \"The Lou\", \"price\": 24.75}]}"
|
||||
|
||||
class MariadbH2Test {
|
||||
companion object {
|
||||
private lateinit var connection: Connection
|
||||
|
||||
@BeforeClass
|
||||
@JvmStatic
|
||||
fun setUpClass() {
|
||||
connection = DriverManager.getConnection(URL)
|
||||
|
||||
@Language("SQL")
|
||||
val createTableQuery = """
|
||||
CREATE TABLE IF NOT EXISTS table1 (
|
||||
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||
bitCol BIT NOT NULL,
|
||||
tinyintCol TINYINT NOT NULL,
|
||||
smallintCol SMALLINT,
|
||||
mediumintCol MEDIUMINT NOT NULL,
|
||||
mediumintUnsignedCol MEDIUMINT UNSIGNED NOT NULL,
|
||||
integerCol INTEGER NOT NULL,
|
||||
intCol INT NOT NULL,
|
||||
integerUnsignedCol INTEGER UNSIGNED NOT NULL,
|
||||
bigintCol BIGINT NOT NULL,
|
||||
floatCol FLOAT NOT NULL,
|
||||
doubleCol DOUBLE NOT NULL,
|
||||
decimalCol DECIMAL NOT NULL,
|
||||
dateCol DATE NOT NULL,
|
||||
datetimeCol DATETIME NOT NULL,
|
||||
timestampCol TIMESTAMP NOT NULL,
|
||||
timeCol TIME NOT NULL,
|
||||
yearCol YEAR NOT NULL,
|
||||
varcharCol VARCHAR(255) NOT NULL,
|
||||
charCol CHAR(10) NOT NULL,
|
||||
binaryCol BINARY(64) NOT NULL,
|
||||
varbinaryCol VARBINARY(128) NOT NULL,
|
||||
tinyblobCol TINYBLOB NOT NULL,
|
||||
blobCol BLOB NOT NULL,
|
||||
mediumblobCol MEDIUMBLOB NOT NULL ,
|
||||
longblobCol LONGBLOB NOT NULL,
|
||||
textCol TEXT NOT NULL,
|
||||
mediumtextCol MEDIUMTEXT NOT NULL,
|
||||
longtextCol LONGTEXT NOT NULL,
|
||||
enumCol ENUM('Value1', 'Value2', 'Value3') NOT NULL,
|
||||
jsonCol JSON NOT NULL
|
||||
)
|
||||
"""
|
||||
connection.createStatement().execute(createTableQuery.trimIndent())
|
||||
|
||||
@Language("SQL")
|
||||
val createTableQuery2 = """
|
||||
CREATE TABLE IF NOT EXISTS table2 (
|
||||
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||
bitCol BIT,
|
||||
tinyintCol TINYINT,
|
||||
smallintCol SMALLINT,
|
||||
mediumintCol MEDIUMINT,
|
||||
mediumintUnsignedCol MEDIUMINT UNSIGNED,
|
||||
integerCol INTEGER,
|
||||
intCol INT,
|
||||
integerUnsignedCol INTEGER UNSIGNED,
|
||||
bigintCol BIGINT,
|
||||
floatCol FLOAT,
|
||||
doubleCol DOUBLE,
|
||||
decimalCol DECIMAL,
|
||||
dateCol DATE,
|
||||
datetimeCol DATETIME,
|
||||
timestampCol TIMESTAMP,
|
||||
timeCol TIME,
|
||||
yearCol YEAR,
|
||||
varcharCol VARCHAR(255),
|
||||
charCol CHAR(10),
|
||||
binaryCol BINARY(64),
|
||||
varbinaryCol VARBINARY(128),
|
||||
tinyblobCol TINYBLOB,
|
||||
blobCol BLOB,
|
||||
mediumblobCol MEDIUMBLOB,
|
||||
longblobCol LONGBLOB,
|
||||
textCol TEXT,
|
||||
mediumtextCol MEDIUMTEXT,
|
||||
longtextCol LONGTEXT,
|
||||
enumCol ENUM('Value1', 'Value2', 'Value3')
|
||||
)
|
||||
"""
|
||||
connection.createStatement().execute(createTableQuery2.trimIndent())
|
||||
|
||||
@Language("SQL")
|
||||
val insertData1 =
|
||||
"""
|
||||
INSERT INTO table1 (
|
||||
bitCol, tinyintCol, smallintCol, mediumintCol, mediumintUnsignedCol, integerCol, intCol,
|
||||
integerUnsignedCol, bigintCol, floatCol, doubleCol, decimalCol, dateCol, datetimeCol, timestampCol,
|
||||
timeCol, yearCol, varcharCol, charCol, binaryCol, varbinaryCol, tinyblobCol, blobCol,
|
||||
mediumblobCol, longblobCol, textCol, mediumtextCol, longtextCol, enumCol, jsonCol
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""".trimIndent()
|
||||
|
||||
@Language("SQL")
|
||||
val insertData2 =
|
||||
"""
|
||||
INSERT INTO table2 (
|
||||
bitCol, tinyintCol, smallintCol, mediumintCol, mediumintUnsignedCol, integerCol, intCol,
|
||||
integerUnsignedCol, bigintCol, floatCol, doubleCol, decimalCol, dateCol, datetimeCol, timestampCol,
|
||||
timeCol, yearCol, varcharCol, charCol, binaryCol, varbinaryCol, tinyblobCol, blobCol,
|
||||
mediumblobCol, longblobCol, textCol, mediumtextCol, longtextCol, enumCol
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""".trimIndent()
|
||||
|
||||
connection.prepareStatement(insertData1).use { st ->
|
||||
// Insert data into table1
|
||||
for (i in 1..3) {
|
||||
st.setBoolean(1, true)
|
||||
st.setByte(2, i.toByte())
|
||||
st.setShort(3, (i * 10).toShort())
|
||||
st.setInt(4, i * 100)
|
||||
st.setInt(5, i * 100)
|
||||
st.setInt(6, i * 100)
|
||||
st.setInt(7, i * 100)
|
||||
st.setInt(8, i * 100)
|
||||
st.setInt(9, i * 100)
|
||||
st.setFloat(10, i * 10.0f)
|
||||
st.setDouble(11, i * 10.0)
|
||||
st.setBigDecimal(12, BigDecimal(i * 10))
|
||||
st.setDate(13, java.sql.Date(System.currentTimeMillis()))
|
||||
st.setTimestamp(14, java.sql.Timestamp(System.currentTimeMillis()))
|
||||
st.setTimestamp(15, java.sql.Timestamp(System.currentTimeMillis()))
|
||||
st.setTime(16, java.sql.Time(System.currentTimeMillis()))
|
||||
st.setInt(17, 2023)
|
||||
st.setString(18, "varcharValue$i")
|
||||
st.setString(19, "charValue$i")
|
||||
st.setBytes(20, "binaryValue".toByteArray())
|
||||
st.setBytes(21, "varbinaryValue".toByteArray())
|
||||
st.setBytes(22, "tinyblobValue".toByteArray())
|
||||
st.setBytes(23, "blobValue".toByteArray())
|
||||
st.setBytes(24, "mediumblobValue".toByteArray())
|
||||
st.setBytes(25, "longblobValue".toByteArray())
|
||||
st.setString(26, "textValue$i")
|
||||
st.setString(27, "mediumtextValue$i")
|
||||
st.setString(28, "longtextValue$i")
|
||||
st.setString(29, "Value$i")
|
||||
st.setString(30, JSON_STRING)
|
||||
|
||||
st.executeUpdate()
|
||||
}
|
||||
}
|
||||
|
||||
connection.prepareStatement(insertData2).use { st ->
|
||||
// Insert data into table2
|
||||
for (i in 1..3) {
|
||||
st.setBoolean(1, false)
|
||||
st.setByte(2, (i * 2).toByte())
|
||||
st.setShort(3, (i * 20).toShort())
|
||||
st.setInt(4, i * 200)
|
||||
st.setInt(5, i * 200)
|
||||
st.setInt(6, i * 200)
|
||||
st.setInt(7, i * 200)
|
||||
st.setInt(8, i * 200)
|
||||
st.setInt(9, i * 200)
|
||||
st.setFloat(10, i * 20.0f)
|
||||
st.setDouble(11, i * 20.0)
|
||||
st.setBigDecimal(12, BigDecimal(i * 20))
|
||||
st.setDate(13, java.sql.Date(System.currentTimeMillis()))
|
||||
st.setTimestamp(14, java.sql.Timestamp(System.currentTimeMillis()))
|
||||
st.setTimestamp(15, java.sql.Timestamp(System.currentTimeMillis()))
|
||||
st.setTime(16, java.sql.Time(System.currentTimeMillis()))
|
||||
st.setInt(17, 2023)
|
||||
st.setString(18, "varcharValue$i")
|
||||
st.setString(19, "charValue$i")
|
||||
st.setBytes(20, "binaryValue".toByteArray())
|
||||
st.setBytes(21, "varbinaryValue".toByteArray())
|
||||
st.setBytes(22, "tinyblobValue".toByteArray())
|
||||
st.setBytes(23, "blobValue".toByteArray())
|
||||
st.setBytes(24, "mediumblobValue".toByteArray())
|
||||
st.setBytes(25, "longblobValue".toByteArray())
|
||||
st.setString(26, null)
|
||||
st.setString(27, null)
|
||||
st.setString(28, "longtextValue$i")
|
||||
st.setString(29, "Value$i")
|
||||
st.executeUpdate()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
@JvmStatic
|
||||
fun tearDownClass() {
|
||||
try {
|
||||
connection.close()
|
||||
} catch (e: SQLException) {
|
||||
e.printStackTrace()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `basic test for reading sql tables`() {
|
||||
val df1 = DataFrame.readSqlTable(connection, "table1").cast<Table1MariaDb>()
|
||||
val result = df1.filter { it[Table1MariaDb::id] == 1 }
|
||||
result[0][26] shouldBe "textValue1"
|
||||
val byteArray = "tinyblobValue".toByteArray()
|
||||
(result[0][22] as Blob).getBytes(1, byteArray.size) contentEquals byteArray
|
||||
|
||||
val schema = DataFrameSchema.readSqlTable(connection, "table1")
|
||||
schema.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["textcol"]!!.type shouldBe typeOf<String>()
|
||||
schema.columns["varbinarycol"]!!.type shouldBe typeOf<ByteArray>()
|
||||
schema.columns["binarycol"]!!.type shouldBe typeOf<ByteArray>()
|
||||
schema.columns["longblobcol"]!!.type shouldBe typeOf<Blob>()
|
||||
schema.columns["tinyblobcol"]!!.type shouldBe typeOf<Blob>()
|
||||
schema.columns["datecol"]!!.type shouldBe typeOf<Date>()
|
||||
schema.columns["datetimecol"]!!.type shouldBe typeOf<java.sql.Timestamp>()
|
||||
schema.columns["timestampcol"]!!.type shouldBe typeOf<java.sql.Timestamp>()
|
||||
schema.columns["timecol"]!!.type shouldBe typeOf<java.sql.Time>()
|
||||
schema.columns["yearcol"]!!.type shouldBe typeOf<Int>()
|
||||
|
||||
val df2 = DataFrame.readSqlTable(connection, "table2").cast<Table2MariaDb>()
|
||||
val result2 = df2.filter { it[Table2MariaDb::id] == 1 }
|
||||
result2[0][26] shouldBe null
|
||||
|
||||
val schema2 = DataFrameSchema.readSqlTable(connection, "table2")
|
||||
schema2.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
schema2.columns["textcol"]!!.type shouldBe typeOf<String?>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from sql query`() {
|
||||
@Language("SQL")
|
||||
val sqlQuery =
|
||||
"""
|
||||
SELECT
|
||||
t1.id,
|
||||
t1.enumCol
|
||||
FROM table1 t1
|
||||
JOIN table2 t2 ON t1.id = t2.id
|
||||
""".trimIndent()
|
||||
|
||||
val df = DataFrame.readSqlQuery(connection, sqlQuery = sqlQuery).cast<Table3MariaDb>()
|
||||
val result = df.filter { it[Table3MariaDb::id] == 1 }
|
||||
result[0][1] shouldBe "Value1"
|
||||
|
||||
val schema = DataFrameSchema.readSqlQuery(connection, sqlQuery = sqlQuery)
|
||||
schema.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["enumcol"]!!.type shouldBe typeOf<Any>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from all tables`() {
|
||||
val dataframes = DataFrame.readAllSqlTables(connection, limit = 1000).values.toList()
|
||||
|
||||
val table1Df = dataframes[0].cast<Table1MariaDb>()
|
||||
|
||||
table1Df.rowsCount() shouldBe 3
|
||||
table1Df.filter { it[Table1MariaDb::integercol] > 100 }.rowsCount() shouldBe 2
|
||||
table1Df[0][11] shouldBe 10.0
|
||||
table1Df[0][26] shouldBe "textValue1"
|
||||
|
||||
val table2Df = dataframes[1].cast<Table2MariaDb>()
|
||||
|
||||
table2Df.rowsCount() shouldBe 3
|
||||
table2Df.filter {
|
||||
it[Table2MariaDb::integercol] != null && it[Table2MariaDb::integercol]!! > 400
|
||||
}.rowsCount() shouldBe 1
|
||||
table2Df[0][11] shouldBe 20.0
|
||||
table2Df[0][26] shouldBe null
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `reading numeric types`() {
|
||||
val df1 = DataFrame.readSqlTable(connection, "table1").cast<Table1MariaDb>()
|
||||
|
||||
val result = df1.select("tinyintcol")
|
||||
.add("tinyintcol2") { it[Table1MariaDb::tinyintcol] }
|
||||
|
||||
result[0][1] shouldBe 1
|
||||
|
||||
val result2 = df1.select("mediumintcol")
|
||||
.add("mediumintcol2") { it[Table1MariaDb::mediumintcol] }
|
||||
|
||||
result2[0][1] shouldBe 100
|
||||
|
||||
val result3 = df1.select("mediumintunsignedcol")
|
||||
.add("mediumintunsignedcol2") { it[Table1MariaDb::mediumintunsignedcol] }
|
||||
|
||||
result3[0][1] shouldBe 100
|
||||
|
||||
val result5 = df1.select("bigintcol")
|
||||
.add("bigintcol2") { it[Table1MariaDb::bigintcol] }
|
||||
|
||||
result5[0][1] shouldBe 100
|
||||
|
||||
val result7 = df1.select("doublecol")
|
||||
.add("doublecol2") { it[Table1MariaDb::doublecol] }
|
||||
|
||||
result7[0][1] shouldBe 10.0
|
||||
|
||||
val result8 = df1.select("decimalcol")
|
||||
.add("decimalcol2") { it[Table1MariaDb::decimalcol] }
|
||||
|
||||
result8[0][1] shouldBe BigDecimal("10")
|
||||
|
||||
val schema = DataFrameSchema.readSqlTable(connection, "table1")
|
||||
|
||||
schema.columns["tinyintcol"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["smallintcol"]!!.type shouldBe typeOf<Int?>()
|
||||
schema.columns["mediumintcol"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["mediumintunsignedcol"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["bigintcol"]!!.type shouldBe typeOf<Long>()
|
||||
schema.columns["floatcol"]!!.type shouldBe typeOf<Double>()
|
||||
schema.columns["doublecol"]!!.type shouldBe typeOf<Double>()
|
||||
schema.columns["decimalcol"]!!.type shouldBe typeOf<BigDecimal>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `infer nullability`() {
|
||||
inferNullability(connection)
|
||||
}
|
||||
}
|
||||
Vendored
+250
@@ -0,0 +1,250 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io.h2
|
||||
|
||||
import io.kotest.matchers.shouldBe
|
||||
import org.intellij.lang.annotations.Language
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
|
||||
import org.jetbrains.kotlinx.dataframe.api.cast
|
||||
import org.jetbrains.kotlinx.dataframe.api.filter
|
||||
import org.jetbrains.kotlinx.dataframe.io.inferNullability
|
||||
import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
|
||||
import org.jetbrains.kotlinx.dataframe.io.readSqlQuery
|
||||
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
|
||||
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
|
||||
import org.junit.AfterClass
|
||||
import org.junit.BeforeClass
|
||||
import org.junit.Test
|
||||
import java.math.BigDecimal
|
||||
import java.sql.Connection
|
||||
import java.sql.DriverManager
|
||||
import java.sql.SQLException
|
||||
import java.util.Date
|
||||
import java.util.UUID
|
||||
import kotlin.reflect.typeOf
|
||||
|
||||
private const val URL =
|
||||
"jdbc:h2:mem:testmssql;DB_CLOSE_DELAY=-1;MODE=MSSQLServer;DATABASE_TO_UPPER=FALSE;CASE_INSENSITIVE_IDENTIFIERS=TRUE"
|
||||
|
||||
@DataSchema
|
||||
interface Table1MSSSQL {
|
||||
val id: Int
|
||||
val bigintColumn: Long
|
||||
val binaryColumn: ByteArray
|
||||
val bitColumn: Boolean
|
||||
val charColumn: Char
|
||||
val dateColumn: Date
|
||||
val datetime3Column: java.sql.Timestamp
|
||||
val datetime2Column: java.sql.Timestamp
|
||||
val decimalColumn: BigDecimal
|
||||
val floatColumn: Double
|
||||
val imageColumn: ByteArray?
|
||||
val intColumn: Int
|
||||
val moneyColumn: BigDecimal
|
||||
val ncharColumn: Char
|
||||
val ntextColumn: String
|
||||
val numericColumn: BigDecimal
|
||||
val nvarcharColumn: String
|
||||
val nvarcharMaxColumn: String
|
||||
val realColumn: Float
|
||||
val smalldatetimeColumn: java.sql.Timestamp
|
||||
val smallintColumn: Int
|
||||
val smallmoneyColumn: BigDecimal
|
||||
val timeColumn: java.sql.Time
|
||||
val timestampColumn: java.sql.Timestamp
|
||||
val tinyintColumn: Int
|
||||
val uniqueidentifierColumn: Char
|
||||
val varbinaryColumn: ByteArray
|
||||
val varbinaryMaxColumn: ByteArray
|
||||
val varcharColumn: String
|
||||
val varcharMaxColumn: String
|
||||
val geometryColumn: String
|
||||
val geographyColumn: String
|
||||
}
|
||||
|
||||
class MSSQLH2Test {
|
||||
companion object {
|
||||
private lateinit var connection: Connection
|
||||
|
||||
@BeforeClass
|
||||
@JvmStatic
|
||||
fun setUpClass() {
|
||||
connection = DriverManager.getConnection(URL)
|
||||
|
||||
@Language("SQL")
|
||||
val createTableQuery = """
|
||||
CREATE TABLE Table1 (
|
||||
id INT NOT NULL IDENTITY PRIMARY KEY,
|
||||
bigintColumn BIGINT,
|
||||
binaryColumn BINARY(50),
|
||||
bitColumn BIT,
|
||||
charColumn CHAR(10),
|
||||
dateColumn DATE,
|
||||
datetime3Column DATETIME2(3),
|
||||
datetime2Column DATETIME2,
|
||||
decimalColumn DECIMAL(10,2),
|
||||
floatColumn FLOAT,
|
||||
imageColumn IMAGE,
|
||||
intColumn INT,
|
||||
moneyColumn MONEY,
|
||||
ncharColumn NCHAR(10),
|
||||
ntextColumn NTEXT,
|
||||
numericColumn NUMERIC(10,2),
|
||||
nvarcharColumn NVARCHAR(50),
|
||||
nvarcharMaxColumn NVARCHAR(MAX),
|
||||
realColumn REAL,
|
||||
smalldatetimeColumn SMALLDATETIME,
|
||||
smallintColumn SMALLINT,
|
||||
smallmoneyColumn SMALLMONEY,
|
||||
textColumn TEXT,
|
||||
timeColumn TIME,
|
||||
timestampColumn DATETIME2,
|
||||
tinyintColumn TINYINT,
|
||||
uniqueidentifierColumn UNIQUEIDENTIFIER,
|
||||
varbinaryColumn VARBINARY(50),
|
||||
varbinaryMaxColumn VARBINARY(MAX),
|
||||
varcharColumn VARCHAR(50),
|
||||
varcharMaxColumn VARCHAR(MAX)
|
||||
);
|
||||
"""
|
||||
|
||||
connection.createStatement().execute(createTableQuery.trimIndent())
|
||||
|
||||
@Language("SQL")
|
||||
val insertData1 =
|
||||
"""
|
||||
INSERT INTO Table1 (
|
||||
bigintColumn, binaryColumn, bitColumn, charColumn, dateColumn, datetime3Column, datetime2Column,
|
||||
decimalColumn, floatColumn, imageColumn, intColumn, moneyColumn, ncharColumn,
|
||||
ntextColumn, numericColumn, nvarcharColumn, nvarcharMaxColumn, realColumn, smalldatetimeColumn,
|
||||
smallintColumn, smallmoneyColumn, textColumn, timeColumn, timestampColumn, tinyintColumn,
|
||||
uniqueidentifierColumn, varbinaryColumn, varbinaryMaxColumn, varcharColumn, varcharMaxColumn
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""".trimIndent()
|
||||
|
||||
connection.prepareStatement(insertData1).use { st ->
|
||||
for (i in 1..5) {
|
||||
st.setLong(1, 123456789012345L) // bigintColumn
|
||||
st.setBytes(2, byteArrayOf(0x01, 0x23, 0x45, 0x67, 0x67, 0x67, 0x67, 0x67)) // binaryColumn
|
||||
st.setBoolean(3, true) // bitColumn
|
||||
st.setString(4, "Sample") // charColumn
|
||||
st.setDate(5, java.sql.Date(System.currentTimeMillis())) // dateColumn
|
||||
st.setTimestamp(6, java.sql.Timestamp(System.currentTimeMillis())) // datetime3Column
|
||||
st.setTimestamp(7, java.sql.Timestamp(System.currentTimeMillis())) // datetime2Column
|
||||
st.setBigDecimal(8, BigDecimal("12345.67")) // decimalColumn
|
||||
st.setFloat(9, 123.45f) // floatColumn
|
||||
st.setNull(10, java.sql.Types.NULL) // imageColumn (assuming nullable)
|
||||
st.setInt(11, 123456) // intColumn
|
||||
st.setBigDecimal(12, BigDecimal("123.45")) // moneyColumn
|
||||
st.setString(13, "Sample") // ncharColumn
|
||||
st.setString(14, "Sample$i text") // ntextColumn
|
||||
st.setBigDecimal(15, BigDecimal("1234.56")) // numericColumn
|
||||
st.setString(16, "Sample") // nvarcharColumn
|
||||
st.setString(17, "Sample$i text") // nvarcharMaxColumn
|
||||
st.setFloat(18, 123.45f) // realColumn
|
||||
st.setTimestamp(19, java.sql.Timestamp(System.currentTimeMillis())) // smalldatetimeColumn
|
||||
st.setInt(20, 123) // smallintColumn
|
||||
st.setBigDecimal(21, BigDecimal("123.45")) // smallmoneyColumn
|
||||
st.setString(22, "Sample$i text") // textColumn
|
||||
st.setTime(23, java.sql.Time(System.currentTimeMillis())) // timeColumn
|
||||
st.setTimestamp(24, java.sql.Timestamp(System.currentTimeMillis())) // timestampColumn
|
||||
st.setInt(25, 123) // tinyintColumn
|
||||
// st.setObject(27, null) // udtColumn (assuming nullable)
|
||||
st.setObject(26, UUID.randomUUID()) // uniqueidentifierColumn
|
||||
st.setBytes(27, byteArrayOf(0x01, 0x23, 0x45, 0x67, 0x67, 0x67, 0x67, 0x67)) // varbinaryColumn
|
||||
st.setBytes(28, byteArrayOf(0x01, 0x23, 0x45, 0x67, 0x67, 0x67, 0x67, 0x67)) // varbinaryMaxColumn
|
||||
st.setString(29, "Sample$i") // varcharColumn
|
||||
st.setString(30, "Sample$i text") // varcharMaxColumn
|
||||
st.executeUpdate()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
@JvmStatic
|
||||
fun tearDownClass() {
|
||||
try {
|
||||
connection.close()
|
||||
} catch (e: SQLException) {
|
||||
e.printStackTrace()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `basic test for reading sql tables`() {
|
||||
val df1 = DataFrame.readSqlTable(connection, "table1", limit = 5).cast<Table1MSSSQL>()
|
||||
|
||||
val result = df1.filter { it[Table1MSSSQL::id] == 1 }
|
||||
result[0][30] shouldBe "Sample1 text"
|
||||
result[0][Table1MSSSQL::bigintColumn] shouldBe 123456789012345L
|
||||
result[0][Table1MSSSQL::bitColumn] shouldBe true
|
||||
result[0][Table1MSSSQL::intColumn] shouldBe 123456
|
||||
result[0][Table1MSSSQL::ntextColumn] shouldBe "Sample1 text"
|
||||
|
||||
val schema = DataFrameSchema.readSqlTable(connection, "table1")
|
||||
schema.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["bigintColumn"]!!.type shouldBe typeOf<Long?>()
|
||||
schema.columns["binaryColumn"]!!.type shouldBe typeOf<ByteArray?>()
|
||||
schema.columns["bitColumn"]!!.type shouldBe typeOf<Boolean?>()
|
||||
schema.columns["charColumn"]!!.type shouldBe typeOf<String?>()
|
||||
schema.columns["dateColumn"]!!.type shouldBe typeOf<Date?>()
|
||||
schema.columns["datetime3Column"]!!.type shouldBe typeOf<java.sql.Timestamp?>()
|
||||
schema.columns["datetime2Column"]!!.type shouldBe typeOf<java.sql.Timestamp?>()
|
||||
schema.columns["decimalColumn"]!!.type shouldBe typeOf<BigDecimal?>()
|
||||
schema.columns["floatColumn"]!!.type shouldBe typeOf<Double?>()
|
||||
schema.columns["intColumn"]!!.type shouldBe typeOf<Int?>()
|
||||
schema.columns["moneyColumn"]!!.type shouldBe typeOf<BigDecimal?>()
|
||||
schema.columns["ncharColumn"]!!.type shouldBe typeOf<String?>()
|
||||
schema.columns["ntextColumn"]!!.type shouldBe typeOf<String?>()
|
||||
schema.columns["numericColumn"]!!.type shouldBe typeOf<BigDecimal?>()
|
||||
schema.columns["nvarcharColumn"]!!.type shouldBe typeOf<String?>()
|
||||
schema.columns["nvarcharMaxColumn"]!!.type shouldBe typeOf<String?>()
|
||||
schema.columns["realColumn"]!!.type shouldBe typeOf<Float?>()
|
||||
schema.columns["smalldatetimeColumn"]!!.type shouldBe typeOf<java.sql.Timestamp?>()
|
||||
schema.columns["smallintColumn"]!!.type shouldBe typeOf<Int?>()
|
||||
schema.columns["smallmoneyColumn"]!!.type shouldBe typeOf<BigDecimal?>()
|
||||
schema.columns["timeColumn"]!!.type shouldBe typeOf<java.sql.Time?>()
|
||||
schema.columns["timestampColumn"]!!.type shouldBe typeOf<java.sql.Timestamp?>()
|
||||
schema.columns["tinyintColumn"]!!.type shouldBe typeOf<Int?>()
|
||||
schema.columns["varbinaryColumn"]!!.type shouldBe typeOf<ByteArray?>()
|
||||
schema.columns["varbinaryMaxColumn"]!!.type shouldBe typeOf<ByteArray?>()
|
||||
schema.columns["varcharColumn"]!!.type shouldBe typeOf<String?>()
|
||||
schema.columns["varcharMaxColumn"]!!.type shouldBe typeOf<String?>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from sql query`() {
|
||||
@Language("SQL")
|
||||
val sqlQuery =
|
||||
"""
|
||||
SELECT
|
||||
Table1.id,
|
||||
Table1.bigintColumn
|
||||
FROM Table1
|
||||
""".trimIndent()
|
||||
|
||||
val df = DataFrame.readSqlQuery(connection, sqlQuery = sqlQuery, limit = 3).cast<Table1MSSSQL>()
|
||||
val result = df.filter { it[Table1MSSSQL::id] == 1 }
|
||||
result[0][Table1MSSSQL::bigintColumn] shouldBe 123456789012345L
|
||||
|
||||
val schema = DataFrameSchema.readSqlQuery(connection, sqlQuery = sqlQuery)
|
||||
schema.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["bigintColumn"]!!.type shouldBe typeOf<Long?>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from all tables`() {
|
||||
val dataframes = DataFrame.readAllSqlTables(connection, limit = 4).values.toList()
|
||||
|
||||
val table1Df = dataframes[0].cast<Table1MSSSQL>()
|
||||
|
||||
table1Df.rowsCount() shouldBe 4
|
||||
table1Df.filter { it[Table1MSSSQL::id] > 2 }.rowsCount() shouldBe 2
|
||||
table1Df[0][Table1MSSSQL::bigintColumn] shouldBe 123456789012345L
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `infer nullability`() {
|
||||
inferNullability(connection)
|
||||
}
|
||||
}
|
||||
Vendored
+425
@@ -0,0 +1,425 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io.h2
|
||||
|
||||
import io.kotest.matchers.shouldBe
|
||||
import org.intellij.lang.annotations.Language
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
|
||||
import org.jetbrains.kotlinx.dataframe.api.add
|
||||
import org.jetbrains.kotlinx.dataframe.api.cast
|
||||
import org.jetbrains.kotlinx.dataframe.api.filter
|
||||
import org.jetbrains.kotlinx.dataframe.api.select
|
||||
import org.jetbrains.kotlinx.dataframe.io.inferNullability
|
||||
import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
|
||||
import org.jetbrains.kotlinx.dataframe.io.readSqlQuery
|
||||
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
|
||||
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
|
||||
import org.junit.AfterClass
|
||||
import org.junit.BeforeClass
|
||||
import org.junit.Test
|
||||
import java.math.BigDecimal
|
||||
import java.sql.Connection
|
||||
import java.sql.DriverManager
|
||||
import java.sql.SQLException
|
||||
import java.util.Date
|
||||
import kotlin.reflect.typeOf
|
||||
|
||||
// NOTE: the names of testing databases should be different to avoid collisions and should not contain the system names itself
|
||||
private const val URL = "jdbc:h2:mem:test2;DB_CLOSE_DELAY=-1;MODE=MySQL;DATABASE_TO_LOWER=TRUE"
|
||||
|
||||
@DataSchema
|
||||
interface Table1MySql {
|
||||
val id: Int
|
||||
val bitCol: Boolean
|
||||
val tinyintcol: Int
|
||||
val smallintcol: Int
|
||||
val mediumintcol: Int
|
||||
val mediumintunsignedcol: Int
|
||||
val integercol: Int
|
||||
val intcol: Int
|
||||
val integerunsignedcol: Long
|
||||
val bigintcol: Long
|
||||
val floatcol: Float
|
||||
val doublecol: Double
|
||||
val decimalcol: BigDecimal
|
||||
val datecol: String
|
||||
val datetimecol: String
|
||||
val timestampcol: String
|
||||
val timecol: String
|
||||
val yearcol: String
|
||||
val varcharcol: String
|
||||
val charcol: String
|
||||
val binarycol: ByteArray
|
||||
val varbinarycol: ByteArray
|
||||
val tinyblobcol: ByteArray
|
||||
val blobcol: ByteArray
|
||||
val mediumblobcol: ByteArray
|
||||
val longblobcol: ByteArray
|
||||
val textcol: String
|
||||
val mediumtextcol: String
|
||||
val longtextcol: String
|
||||
val enumcol: String
|
||||
val setcol: Char
|
||||
}
|
||||
|
||||
@DataSchema
|
||||
interface Table2MySql {
|
||||
val id: Int
|
||||
val bitcol: Boolean?
|
||||
val tinyintcol: Int?
|
||||
val smallintcol: Int?
|
||||
val mediumintcol: Int?
|
||||
val mediumintUnsignedcol: Int?
|
||||
val integercol: Int?
|
||||
val intcol: Int?
|
||||
val integerUnsignedcol: Long?
|
||||
val bigintcol: Long?
|
||||
val floatcol: Float?
|
||||
val doublecol: Double?
|
||||
val decimalcol: Double?
|
||||
val datecol: String?
|
||||
val datetimecol: String?
|
||||
val timestampcol: String?
|
||||
val timecol: String?
|
||||
val yearcol: String?
|
||||
val varcharcol: String?
|
||||
val charcol: String?
|
||||
val binarycol: ByteArray?
|
||||
val varbinarycol: ByteArray?
|
||||
val tinyblobcol: ByteArray?
|
||||
val blobcol: ByteArray?
|
||||
val mediumblobcol: ByteArray?
|
||||
val longblobcol: ByteArray?
|
||||
val textcol: String?
|
||||
val mediumtextcol: String?
|
||||
val longtextcol: String?
|
||||
val enumcol: String?
|
||||
val setcol: Char?
|
||||
val jsoncol: String?
|
||||
}
|
||||
|
||||
@DataSchema
|
||||
interface Table3MySql {
|
||||
val id: Int
|
||||
val enumcol: String
|
||||
}
|
||||
|
||||
class MySqlH2Test {
|
||||
companion object {
|
||||
private lateinit var connection: Connection
|
||||
|
||||
@BeforeClass
|
||||
@JvmStatic
|
||||
fun setUpClass() {
|
||||
connection = DriverManager.getConnection(URL)
|
||||
|
||||
@Language("SQL")
|
||||
val createTableQuery = """
|
||||
CREATE TABLE IF NOT EXISTS table1 (
|
||||
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||
bitCol BIT NOT NULL,
|
||||
tinyintCol TINYINT NOT NULL,
|
||||
smallintCol SMALLINT NOT NULL,
|
||||
mediumintCol MEDIUMINT NOT NULL,
|
||||
mediumintUnsignedCol MEDIUMINT UNSIGNED NOT NULL,
|
||||
integerCol INTEGER NOT NULL,
|
||||
intCol INT NOT NULL,
|
||||
integerUnsignedCol INTEGER UNSIGNED NOT NULL,
|
||||
bigintCol BIGINT NOT NULL,
|
||||
floatCol FLOAT NOT NULL,
|
||||
doubleCol DOUBLE NOT NULL,
|
||||
decimalCol DECIMAL NOT NULL,
|
||||
dateCol DATE NOT NULL,
|
||||
datetimeCol DATETIME NOT NULL,
|
||||
timestampCol TIMESTAMP NOT NULL,
|
||||
timeCol TIME NOT NULL,
|
||||
yearCol YEAR NOT NULL,
|
||||
varcharCol VARCHAR(255) NOT NULL,
|
||||
charCol CHAR(10) NOT NULL,
|
||||
binaryCol BINARY(64) NOT NULL,
|
||||
varbinaryCol VARBINARY(128) NOT NULL,
|
||||
tinyblobCol TINYBLOB NOT NULL,
|
||||
blobCol BLOB NOT NULL,
|
||||
mediumblobCol MEDIUMBLOB NOT NULL ,
|
||||
longblobCol LONGBLOB NOT NULL,
|
||||
textCol TEXT NOT NULL,
|
||||
mediumtextCol MEDIUMTEXT NOT NULL,
|
||||
longtextCol LONGTEXT NOT NULL,
|
||||
enumCol ENUM('Value1', 'Value2', 'Value3') NOT NULL,
|
||||
data JSON
|
||||
)
|
||||
"""
|
||||
|
||||
connection.createStatement().execute(createTableQuery.trimIndent())
|
||||
|
||||
@Language("SQL")
|
||||
val createTableQuery2 = """
|
||||
CREATE TABLE IF NOT EXISTS table2 (
|
||||
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||
bitCol BIT,
|
||||
tinyintCol TINYINT,
|
||||
smallintCol SMALLINT,
|
||||
mediumintCol MEDIUMINT,
|
||||
mediumintUnsignedCol MEDIUMINT UNSIGNED,
|
||||
integerCol INTEGER,
|
||||
intCol INT,
|
||||
integerUnsignedCol INTEGER UNSIGNED,
|
||||
bigintCol BIGINT,
|
||||
floatCol FLOAT,
|
||||
doubleCol DOUBLE,
|
||||
decimalCol DECIMAL,
|
||||
dateCol DATE,
|
||||
datetimeCol DATETIME,
|
||||
timestampCol TIMESTAMP,
|
||||
timeCol TIME,
|
||||
yearCol YEAR,
|
||||
varcharCol VARCHAR(255),
|
||||
charCol CHAR(10),
|
||||
binaryCol BINARY(64),
|
||||
varbinaryCol VARBINARY(128),
|
||||
tinyblobCol TINYBLOB,
|
||||
blobCol BLOB,
|
||||
mediumblobCol MEDIUMBLOB,
|
||||
longblobCol LONGBLOB,
|
||||
textCol TEXT,
|
||||
mediumtextCol MEDIUMTEXT,
|
||||
longtextCol LONGTEXT,
|
||||
enumCol ENUM('Value1', 'Value2', 'Value3'),
|
||||
data JSON
|
||||
)
|
||||
"""
|
||||
|
||||
connection.createStatement().execute(createTableQuery2.trimIndent())
|
||||
|
||||
@Language("SQL")
|
||||
val insertData1 =
|
||||
"""
|
||||
INSERT INTO table1 (
|
||||
bitCol, tinyintCol, smallintCol, mediumintCol, mediumintUnsignedCol, integerCol, intCol,
|
||||
integerUnsignedCol, bigintCol, floatCol, doubleCol, decimalCol, dateCol, datetimeCol, timestampCol,
|
||||
timeCol, yearCol, varcharCol, charCol, binaryCol, varbinaryCol, tinyblobCol, blobCol,
|
||||
mediumblobCol, longblobCol, textCol, mediumtextCol, longtextCol, enumCol, data
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""".trimIndent()
|
||||
|
||||
@Language("SQL")
|
||||
val insertData2 =
|
||||
"""
|
||||
INSERT INTO table2 (
|
||||
bitCol, tinyintCol, smallintCol, mediumintCol, mediumintUnsignedCol, integerCol, intCol,
|
||||
integerUnsignedCol, bigintCol, floatCol, doubleCol, decimalCol, dateCol, datetimeCol, timestampCol,
|
||||
timeCol, yearCol, varcharCol, charCol, binaryCol, varbinaryCol, tinyblobCol, blobCol,
|
||||
mediumblobCol, longblobCol, textCol, mediumtextCol, longtextCol, enumCol, data
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""".trimIndent()
|
||||
|
||||
connection.prepareStatement(insertData1).use { st ->
|
||||
// Insert data into table1
|
||||
for (i in 1..3) {
|
||||
st.setBoolean(1, true)
|
||||
st.setByte(2, i.toByte())
|
||||
st.setShort(3, (i * 10).toShort())
|
||||
st.setInt(4, i * 100)
|
||||
st.setInt(5, i * 100)
|
||||
st.setInt(6, i * 100)
|
||||
st.setInt(7, i * 100)
|
||||
st.setInt(8, i * 100)
|
||||
st.setInt(9, i * 100)
|
||||
st.setFloat(10, i * 10.0f)
|
||||
st.setDouble(11, i * 10.0)
|
||||
st.setBigDecimal(12, BigDecimal(i * 10))
|
||||
st.setDate(13, java.sql.Date(System.currentTimeMillis()))
|
||||
st.setTimestamp(14, java.sql.Timestamp(System.currentTimeMillis()))
|
||||
st.setTimestamp(15, java.sql.Timestamp(System.currentTimeMillis()))
|
||||
st.setTime(16, java.sql.Time(System.currentTimeMillis()))
|
||||
st.setInt(17, 2023)
|
||||
st.setString(18, "varcharValue$i")
|
||||
st.setString(19, "charValue$i")
|
||||
st.setBytes(20, "binaryValue".toByteArray())
|
||||
st.setBytes(21, "varbinaryValue".toByteArray())
|
||||
st.setBytes(22, "tinyblobValue".toByteArray())
|
||||
st.setBytes(23, "blobValue".toByteArray())
|
||||
st.setBytes(24, "mediumblobValue".toByteArray())
|
||||
st.setBytes(25, "longblobValue".toByteArray())
|
||||
st.setString(26, "textValue$i")
|
||||
st.setString(27, "mediumtextValue$i")
|
||||
st.setString(28, "longtextValue$i")
|
||||
st.setString(29, "Value$i")
|
||||
st.setString(30, "{\"key\": \"value\"}")
|
||||
st.executeUpdate()
|
||||
}
|
||||
}
|
||||
|
||||
connection.prepareStatement(insertData2).use { st ->
|
||||
// Insert data into table2
|
||||
for (i in 1..3) {
|
||||
st.setBoolean(1, false)
|
||||
st.setByte(2, (i * 2).toByte())
|
||||
st.setShort(3, (i * 20).toShort())
|
||||
st.setInt(4, i * 200)
|
||||
st.setInt(5, i * 200)
|
||||
st.setInt(6, i * 200)
|
||||
st.setInt(7, i * 200)
|
||||
st.setInt(8, i * 200)
|
||||
st.setInt(9, i * 200)
|
||||
st.setFloat(10, i * 20.0f)
|
||||
st.setDouble(11, i * 20.0)
|
||||
st.setBigDecimal(12, BigDecimal(i * 20))
|
||||
st.setDate(13, java.sql.Date(System.currentTimeMillis()))
|
||||
st.setTimestamp(14, java.sql.Timestamp(System.currentTimeMillis()))
|
||||
st.setTimestamp(15, java.sql.Timestamp(System.currentTimeMillis()))
|
||||
st.setTime(16, java.sql.Time(System.currentTimeMillis()))
|
||||
st.setInt(17, 2023)
|
||||
st.setString(18, "varcharValue$i")
|
||||
st.setString(19, "charValue$i")
|
||||
st.setBytes(20, "binaryValue".toByteArray())
|
||||
st.setBytes(21, "varbinaryValue".toByteArray())
|
||||
st.setBytes(22, "tinyblobValue".toByteArray())
|
||||
st.setBytes(23, "blobValue".toByteArray())
|
||||
st.setBytes(24, "mediumblobValue".toByteArray())
|
||||
st.setBytes(25, "longblobValue".toByteArray())
|
||||
st.setString(26, null)
|
||||
st.setString(27, null)
|
||||
st.setString(28, "longtextValue$i")
|
||||
st.setString(29, "Value$i")
|
||||
st.setString(30, "{\"key\": \"value\"}")
|
||||
st.executeUpdate()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
@JvmStatic
|
||||
fun tearDownClass() {
|
||||
try {
|
||||
connection.close()
|
||||
} catch (e: SQLException) {
|
||||
e.printStackTrace()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `basic test for reading sql tables`() {
|
||||
val df1 = DataFrame.readSqlTable(connection, "table1").cast<Table1MySql>()
|
||||
val result = df1.filter { it[Table1MySql::id] == 1 }
|
||||
result[0][26] shouldBe "textValue1"
|
||||
|
||||
val schema = DataFrameSchema.readSqlTable(connection, "table1")
|
||||
schema.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["textcol"]!!.type shouldBe typeOf<String>()
|
||||
schema.columns["datecol"]!!.type shouldBe typeOf<Date>()
|
||||
schema.columns["datetimecol"]!!.type shouldBe typeOf<java.sql.Timestamp>()
|
||||
schema.columns["timestampcol"]!!.type shouldBe typeOf<java.sql.Timestamp>()
|
||||
schema.columns["timecol"]!!.type shouldBe typeOf<java.sql.Time>()
|
||||
schema.columns["yearcol"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["varbinarycol"]!!.type shouldBe typeOf<ByteArray>()
|
||||
schema.columns["binarycol"]!!.type shouldBe typeOf<ByteArray>()
|
||||
schema.columns["longblobcol"]!!.type shouldBe typeOf<java.sql.Blob>()
|
||||
schema.columns["tinyblobcol"]!!.type shouldBe typeOf<java.sql.Blob>()
|
||||
|
||||
val df2 = DataFrame.readSqlTable(connection, "table2").cast<Table2MySql>()
|
||||
val result2 = df2.filter { it[Table2MySql::id] == 1 }
|
||||
result2[0][26] shouldBe null
|
||||
|
||||
val schema2 = DataFrameSchema.readSqlTable(connection, "table2")
|
||||
schema2.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
schema2.columns["textcol"]!!.type shouldBe typeOf<String?>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from sql query`() {
|
||||
@Language("SQL")
|
||||
val sqlQuery =
|
||||
"""
|
||||
SELECT
|
||||
t1.id,
|
||||
t1.enumCol
|
||||
FROM table1 t1
|
||||
JOIN table2 t2 ON t1.id = t2.id
|
||||
""".trimIndent()
|
||||
|
||||
val df = DataFrame.readSqlQuery(connection, sqlQuery = sqlQuery).cast<Table3MySql>()
|
||||
val result = df.filter { it[Table3MySql::id] == 1 }
|
||||
result[0][1] shouldBe "Value1"
|
||||
|
||||
val schema = DataFrameSchema.readSqlQuery(connection, sqlQuery = sqlQuery)
|
||||
schema.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["enumcol"]!!.type shouldBe typeOf<Any>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from all tables`() {
|
||||
val dataframes = DataFrame.readAllSqlTables(connection).values.toList()
|
||||
|
||||
val table1Df = dataframes[0].cast<Table1MySql>()
|
||||
|
||||
table1Df.rowsCount() shouldBe 3
|
||||
table1Df.filter { it[Table1MySql::integercol] > 100 }.rowsCount() shouldBe 2
|
||||
table1Df[0][11] shouldBe 10.0
|
||||
table1Df[0][26] shouldBe "textValue1"
|
||||
|
||||
val table2Df = dataframes[1].cast<Table2MySql>()
|
||||
|
||||
table2Df.rowsCount() shouldBe 3
|
||||
table2Df.filter {
|
||||
it[Table2MySql::integercol] != null && it[Table2MySql::integercol]!! > 400
|
||||
}.rowsCount() shouldBe 1
|
||||
table2Df[0][11] shouldBe 20.0
|
||||
table2Df[0][26] shouldBe null
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `reading numeric types`() {
|
||||
val df1 = DataFrame.readSqlTable(connection, "table1").cast<Table1MySql>()
|
||||
|
||||
val result = df1.select("tinyintcol").add("tinyintcol2") { it[Table1MySql::tinyintcol] }
|
||||
|
||||
result[0][1] shouldBe 1.toByte()
|
||||
|
||||
val result1 = df1.select("smallintcol")
|
||||
.add("smallintcol2") { it[Table1MySql::smallintcol] }
|
||||
|
||||
result1[0][1] shouldBe 10.toShort()
|
||||
|
||||
val result2 = df1.select("mediumintcol")
|
||||
.add("mediumintcol2") { it[Table1MySql::mediumintcol] }
|
||||
|
||||
result2[0][1] shouldBe 100
|
||||
|
||||
val result3 = df1.select("mediumintunsignedcol")
|
||||
.add("mediumintunsignedcol2") { it[Table1MySql::mediumintunsignedcol] }
|
||||
|
||||
result3[0][1] shouldBe 100
|
||||
|
||||
val result5 = df1.select("bigintcol")
|
||||
.add("bigintcol2") { it[Table1MySql::bigintcol] }
|
||||
|
||||
result5[0][1] shouldBe 100
|
||||
|
||||
val result7 = df1.select("doublecol")
|
||||
.add("doublecol2") { it[Table1MySql::doublecol] }
|
||||
|
||||
result7[0][1] shouldBe 10.0
|
||||
|
||||
val result8 = df1.select("decimalcol")
|
||||
.add("decimalcol2") { it[Table1MySql::decimalcol] }
|
||||
|
||||
result8[0][1] shouldBe BigDecimal("10")
|
||||
|
||||
val schema = DataFrameSchema.readSqlTable(connection, "table1")
|
||||
|
||||
schema.columns["tinyintcol"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["smallintcol"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["mediumintcol"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["mediumintunsignedcol"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["bigintcol"]!!.type shouldBe typeOf<Long>()
|
||||
schema.columns["floatcol"]!!.type shouldBe typeOf<Double>()
|
||||
schema.columns["doublecol"]!!.type shouldBe typeOf<Double>()
|
||||
schema.columns["decimalcol"]!!.type shouldBe typeOf<BigDecimal>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `infer nullability`() {
|
||||
inferNullability(connection)
|
||||
}
|
||||
}
|
||||
Vendored
+388
@@ -0,0 +1,388 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io.h2
|
||||
|
||||
import io.kotest.matchers.shouldBe
|
||||
import org.intellij.lang.annotations.Language
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
|
||||
import org.jetbrains.kotlinx.dataframe.api.add
|
||||
import org.jetbrains.kotlinx.dataframe.api.cast
|
||||
import org.jetbrains.kotlinx.dataframe.api.filter
|
||||
import org.jetbrains.kotlinx.dataframe.api.select
|
||||
import org.jetbrains.kotlinx.dataframe.io.inferNullability
|
||||
import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
|
||||
import org.jetbrains.kotlinx.dataframe.io.readSqlQuery
|
||||
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
|
||||
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
|
||||
import org.junit.AfterClass
|
||||
import org.junit.BeforeClass
|
||||
import org.junit.Test
|
||||
import java.math.BigDecimal
|
||||
import java.sql.Connection
|
||||
import java.sql.DriverManager
|
||||
import java.sql.SQLException
|
||||
import java.util.UUID
|
||||
import kotlin.reflect.typeOf
|
||||
|
||||
private const val URL =
|
||||
"jdbc:h2:mem:test3;DB_CLOSE_DELAY=-1;MODE=PostgreSQL;DATABASE_TO_LOWER=TRUE;DEFAULT_NULL_ORDERING=HIGH"
|
||||
|
||||
@DataSchema
|
||||
interface Table1 {
|
||||
val id: Int
|
||||
val bigintcol: Long
|
||||
val smallintcol: Int
|
||||
val bigserialcol: Long
|
||||
val booleancol: Boolean
|
||||
val byteacol: ByteArray
|
||||
val charactercol: String
|
||||
val characterncol: String
|
||||
val charcol: String
|
||||
val datecol: java.sql.Date
|
||||
val doublecol: Double
|
||||
val integercol: Int?
|
||||
val jsoncol: String
|
||||
val jsonbcol: String
|
||||
}
|
||||
|
||||
@DataSchema
|
||||
interface Table2 {
|
||||
val id: Int
|
||||
val moneycol: String
|
||||
val numericcol: BigDecimal
|
||||
val realcol: Float
|
||||
val smallintcol: Int
|
||||
val serialcol: Int
|
||||
val textcol: String?
|
||||
val timecol: String
|
||||
val timewithzonecol: String
|
||||
val timestampcol: String
|
||||
val timestampwithzonecol: String
|
||||
val uuidcol: String
|
||||
}
|
||||
|
||||
@DataSchema
|
||||
interface ViewTable {
|
||||
val id: Int
|
||||
val bigintcol: Long
|
||||
val textCol: String?
|
||||
}
|
||||
|
||||
class PostgresH2Test {
|
||||
companion object {
|
||||
private lateinit var connection: Connection
|
||||
|
||||
@BeforeClass
|
||||
@JvmStatic
|
||||
fun setUpClass() {
|
||||
connection = DriverManager.getConnection(URL)
|
||||
|
||||
@Language("SQL")
|
||||
val createTableStatement =
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS table1 (
|
||||
id serial PRIMARY KEY,
|
||||
bigintCol bigint not null,
|
||||
smallintCol smallint not null,
|
||||
bigserialCol bigserial not null,
|
||||
booleanCol boolean not null,
|
||||
byteaCol bytea not null,
|
||||
characterCol character not null,
|
||||
characterNCol character(10) not null,
|
||||
charCol char not null,
|
||||
dateCol date not null,
|
||||
doubleCol double precision not null,
|
||||
integerCol integer,
|
||||
intArrayCol integer array,
|
||||
doubleArrayCol double precision array,
|
||||
dateArrayCol date array,
|
||||
textArrayCol text array,
|
||||
booleanArrayCol boolean array
|
||||
)
|
||||
""".trimIndent()
|
||||
|
||||
connection.createStatement().execute(createTableStatement.trimIndent())
|
||||
|
||||
@Language("SQL")
|
||||
val createTableQuery =
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS table2 (
|
||||
id serial PRIMARY KEY,
|
||||
moneyCol money not null,
|
||||
numericCol numeric not null,
|
||||
realCol real not null,
|
||||
smallintCol smallint not null,
|
||||
serialCol serial not null,
|
||||
textCol text,
|
||||
timeCol time not null,
|
||||
timeWithZoneCol time with time zone not null,
|
||||
timestampCol timestamp not null,
|
||||
timestampWithZoneCol timestamp with time zone not null,
|
||||
uuidCol uuid not null
|
||||
)
|
||||
""".trimIndent()
|
||||
|
||||
connection.createStatement().execute(createTableQuery.trimIndent())
|
||||
|
||||
@Language("SQL")
|
||||
val insertData1 =
|
||||
"""
|
||||
INSERT INTO table1 (
|
||||
bigintCol, smallintCol, bigserialCol, booleanCol,
|
||||
byteaCol, characterCol, characterNCol, charCol,
|
||||
dateCol, doubleCol,
|
||||
integerCol, intArrayCol,
|
||||
doubleArrayCol, dateArrayCol, textArrayCol, booleanArrayCol
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""".trimIndent()
|
||||
|
||||
@Language("SQL")
|
||||
val insertData2 =
|
||||
"""
|
||||
INSERT INTO table2 (
|
||||
moneyCol, numericCol,
|
||||
realCol, smallintCol,
|
||||
serialCol, textCol, timeCol,
|
||||
timeWithZoneCol, timestampCol, timestampWithZoneCol,
|
||||
uuidCol
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""".trimIndent()
|
||||
|
||||
val intArray = connection.createArrayOf("INTEGER", arrayOf(1, 2, 3))
|
||||
val doubleArray = connection.createArrayOf("DOUBLE", arrayOf(1.1, 2.2, 3.3))
|
||||
val dateArray = connection.createArrayOf(
|
||||
"DATE",
|
||||
arrayOf(java.sql.Date.valueOf("2023-08-01"), java.sql.Date.valueOf("2023-08-02")),
|
||||
)
|
||||
val textArray = connection.createArrayOf("TEXT", arrayOf("Hello", "World"))
|
||||
val booleanArray = connection.createArrayOf("BOOLEAN", arrayOf(true, false, true))
|
||||
|
||||
connection.prepareStatement(insertData1).use { st ->
|
||||
// Insert data into table1
|
||||
for (i in 1..3) {
|
||||
st.setLong(1, i * 1000L)
|
||||
st.setShort(2, 11.toShort())
|
||||
st.setLong(3, 1000000000L + i)
|
||||
st.setBoolean(4, i % 2 == 1)
|
||||
st.setBytes(5, byteArrayOf(1, 2, 3))
|
||||
st.setString(6, "A")
|
||||
st.setString(7, "Hello")
|
||||
st.setString(8, "A")
|
||||
st.setDate(9, java.sql.Date.valueOf("2023-08-01"))
|
||||
st.setDouble(10, 12.34)
|
||||
st.setInt(11, 12345 * i)
|
||||
st.setArray(12, intArray)
|
||||
st.setArray(13, doubleArray)
|
||||
st.setArray(14, dateArray)
|
||||
st.setArray(15, textArray)
|
||||
st.setArray(16, booleanArray)
|
||||
st.executeUpdate()
|
||||
}
|
||||
}
|
||||
|
||||
connection.prepareStatement(insertData2).use { st ->
|
||||
// Insert data into table2
|
||||
for (i in 1..3) {
|
||||
st.setBigDecimal(1, BigDecimal("123.45"))
|
||||
st.setBigDecimal(2, BigDecimal("12.34"))
|
||||
st.setFloat(3, 12.34f)
|
||||
st.setInt(4, 1000 + i)
|
||||
st.setInt(5, 1000000 + i)
|
||||
st.setString(6, null)
|
||||
st.setTime(7, java.sql.Time.valueOf("12:34:56"))
|
||||
st.setTimestamp(8, java.sql.Timestamp(System.currentTimeMillis()))
|
||||
st.setTimestamp(9, java.sql.Timestamp(System.currentTimeMillis()))
|
||||
st.setTimestamp(10, java.sql.Timestamp(System.currentTimeMillis()))
|
||||
st.setObject(11, UUID.randomUUID(), java.sql.Types.OTHER)
|
||||
st.executeUpdate()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
@JvmStatic
|
||||
fun tearDownClass() {
|
||||
try {
|
||||
connection.close()
|
||||
} catch (e: SQLException) {
|
||||
e.printStackTrace()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from tables`() {
|
||||
val tableName1 = "table1"
|
||||
val df1 = DataFrame.readSqlTable(connection, tableName1).cast<Table1>()
|
||||
val result = df1.filter { it[Table1::id] == 1 }
|
||||
|
||||
result[0][0] shouldBe 1
|
||||
result[0][8] shouldBe "A"
|
||||
result[0][12] shouldBe arrayOf(1, 2, 3)
|
||||
result[0][13] shouldBe arrayOf(1.1, 2.2, 3.3)
|
||||
result[0][14] shouldBe arrayOf(java.sql.Date.valueOf("2023-08-01"), java.sql.Date.valueOf("2023-08-02"))
|
||||
result[0][15] shouldBe arrayOf("Hello", "World")
|
||||
result[0][16] shouldBe arrayOf(true, false, true)
|
||||
|
||||
val schema = DataFrameSchema.readSqlTable(connection, tableName1)
|
||||
schema.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["integercol"]!!.type shouldBe typeOf<Int?>()
|
||||
schema.columns["smallintcol"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["intarraycol"]!!.type.classifier shouldBe kotlin.Array::class
|
||||
schema.columns["doublearraycol"]!!.type.classifier shouldBe kotlin.Array::class
|
||||
schema.columns["datearraycol"]!!.type.classifier shouldBe kotlin.Array::class
|
||||
schema.columns["textarraycol"]!!.type.classifier shouldBe kotlin.Array::class
|
||||
schema.columns["booleanarraycol"]!!.type.classifier shouldBe kotlin.Array::class
|
||||
|
||||
val tableName2 = "table2"
|
||||
val df2 = DataFrame.readSqlTable(connection, tableName2).cast<Table2>()
|
||||
val result2 = df2.filter { it[Table2::id] == 1 }
|
||||
result2[0][4] shouldBe 1001
|
||||
|
||||
val schema2 = DataFrameSchema.readSqlTable(connection, tableName2)
|
||||
schema2.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
schema2.columns["textcol"]!!.type shouldBe typeOf<String?>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from sql query`() {
|
||||
@Language("SQL")
|
||||
val sqlQuery =
|
||||
"""
|
||||
SELECT
|
||||
t1.id,
|
||||
t1.bigintCol,
|
||||
t2.textCol
|
||||
FROM table1 t1
|
||||
JOIN table2 t2 ON t1.id = t2.id
|
||||
""".trimIndent()
|
||||
|
||||
val df = DataFrame.readSqlQuery(connection, sqlQuery = sqlQuery).cast<ViewTable>()
|
||||
val result = df.filter { it[ViewTable::id] == 1 }
|
||||
result[0][2] shouldBe null
|
||||
|
||||
val schema = DataFrameSchema.readSqlQuery(connection, sqlQuery = sqlQuery)
|
||||
schema.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["bigintcol"]!!.type shouldBe typeOf<Long>()
|
||||
schema.columns["textcol"]!!.type shouldBe typeOf<String?>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from all tables`() {
|
||||
val dataframes = DataFrame.readAllSqlTables(connection).values.toList()
|
||||
|
||||
val table1Df = dataframes[0].cast<Table1>()
|
||||
|
||||
table1Df.rowsCount() shouldBe 3
|
||||
table1Df.filter { it[Table1::integercol] != null && it[Table1::integercol]!! > 12345 }.rowsCount() shouldBe 2
|
||||
table1Df[0][1] shouldBe 1000L
|
||||
table1Df[0][2] shouldBe 11
|
||||
|
||||
val table2Df = dataframes[1].cast<Table2>()
|
||||
|
||||
table2Df.rowsCount() shouldBe 3
|
||||
table2Df.filter {
|
||||
it[Table2::realcol] == 12.34f
|
||||
}.rowsCount() shouldBe 3
|
||||
table2Df[0][4] shouldBe 1001
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read columns of different types to check type mapping`() {
|
||||
val tableName1 = "table1"
|
||||
val df1 = DataFrame.readSqlTable(connection, tableName1).cast<Table1>()
|
||||
val result = df1.select("smallintcol")
|
||||
.add("smallintcol2") { it[Table1::smallintcol] }
|
||||
result[0][1] shouldBe 11
|
||||
|
||||
val result1 = df1.select("bigserialcol")
|
||||
.add("bigserialcol2") { it[Table1::bigserialcol] }
|
||||
result1[0][1] shouldBe 1000000001L
|
||||
|
||||
val result2 = df1.select("doublecol")
|
||||
.add("doublecol2") { it[Table1::doublecol] }
|
||||
result2[0][1] shouldBe 12.34
|
||||
|
||||
val tableName2 = "table2"
|
||||
val df2 = DataFrame.readSqlTable(connection, tableName2).cast<Table2>()
|
||||
|
||||
val result4 = df2.select("numericcol")
|
||||
.add("numericcol2") { it[Table2::numericcol] }
|
||||
result4[0][1] shouldBe BigDecimal("12.34")
|
||||
|
||||
val result5 = df2.select("realcol")
|
||||
.add("realcol2") { it[Table2::realcol] }
|
||||
result5[0][1] shouldBe 12.34f
|
||||
|
||||
val result8 = df2.select("serialcol")
|
||||
.add("serialcol2") { it[Table2::serialcol] }
|
||||
result8[0][1] shouldBe 1000001
|
||||
|
||||
val schema = DataFrameSchema.readSqlTable(connection, tableName1)
|
||||
schema.columns["smallintcol"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["bigserialcol"]!!.type shouldBe typeOf<Long>()
|
||||
schema.columns["doublecol"]!!.type shouldBe typeOf<Double>()
|
||||
|
||||
val schema1 = DataFrameSchema.readSqlTable(connection, tableName2)
|
||||
schema1.columns["numericcol"]!!.type shouldBe typeOf<BigDecimal>()
|
||||
schema1.columns["realcol"]!!.type shouldBe typeOf<Float>()
|
||||
schema1.columns["serialcol"]!!.type shouldBe typeOf<Int>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `infer nullability`() {
|
||||
inferNullability(connection)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `readSqlQuery should execute a WITH clause and return results`() {
|
||||
try {
|
||||
// Step 1: Create a temporary table
|
||||
@Language("SQL")
|
||||
val createTableQuery =
|
||||
"""
|
||||
CREATE TABLE employees (
|
||||
id INT PRIMARY KEY,
|
||||
name VARCHAR(100),
|
||||
salary DOUBLE
|
||||
)
|
||||
""".trimIndent()
|
||||
connection.createStatement().execute(createTableQuery)
|
||||
|
||||
// Step 2: Insert data into the table
|
||||
@Language("SQL")
|
||||
val insertDataQuery =
|
||||
"""
|
||||
INSERT INTO employees (id, name, salary) VALUES
|
||||
(1, 'Alice', 60000.0),
|
||||
(2, 'Bob', 50000.0),
|
||||
(3, 'Charlie', 70000.0)
|
||||
""".trimIndent()
|
||||
|
||||
connection.createStatement().execute(insertDataQuery)
|
||||
|
||||
// Step 3: Execute the query with a WITH clause
|
||||
@Language("SQL")
|
||||
val queryWithClause =
|
||||
"""
|
||||
WITH high_earners AS (
|
||||
SELECT name, salary
|
||||
FROM employees
|
||||
WHERE salary > 55000.0
|
||||
)
|
||||
SELECT * FROM high_earners
|
||||
""".trimIndent()
|
||||
|
||||
val resultDataFrame = DataFrame.readSqlQuery(connection, queryWithClause)
|
||||
|
||||
// Step 4: Validate the results
|
||||
resultDataFrame.rowsCount() shouldBe 2
|
||||
resultDataFrame[0][0] shouldBe "Alice"
|
||||
resultDataFrame[1][0] shouldBe "Charlie"
|
||||
} finally {
|
||||
// Step 5: Clean up the temporary table
|
||||
@Language("SQL")
|
||||
val dropTableQuery = "DROP TABLE IF EXISTS employees"
|
||||
connection.createStatement().execute(dropTableQuery)
|
||||
}
|
||||
}
|
||||
}
|
||||
Vendored
+656
@@ -0,0 +1,656 @@
|
||||
@file:Suppress("SqlDialectInspection")
|
||||
|
||||
package org.jetbrains.kotlinx.dataframe.io.local
|
||||
|
||||
import io.kotest.matchers.shouldBe
|
||||
import org.duckdb.DuckDBConnection
|
||||
import org.duckdb.DuckDBResultSet
|
||||
import org.duckdb.JsonNode
|
||||
import org.jetbrains.kotlinx.dataframe.AnyFrame
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.annotations.ColumnName
|
||||
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
|
||||
import org.jetbrains.kotlinx.dataframe.api.cast
|
||||
import org.jetbrains.kotlinx.dataframe.api.colsOf
|
||||
import org.jetbrains.kotlinx.dataframe.api.convert
|
||||
import org.jetbrains.kotlinx.dataframe.api.reorderColumnsByName
|
||||
import org.jetbrains.kotlinx.dataframe.api.schema
|
||||
import org.jetbrains.kotlinx.dataframe.api.single
|
||||
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.api.values
|
||||
import org.jetbrains.kotlinx.dataframe.api.with
|
||||
import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig
|
||||
import org.jetbrains.kotlinx.dataframe.io.assertInferredTypesMatchSchema
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.DuckDb
|
||||
import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
|
||||
import org.jetbrains.kotlinx.dataframe.io.readDataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.io.readResultSet
|
||||
import org.jetbrains.kotlinx.dataframe.io.readSqlQuery
|
||||
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
|
||||
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
|
||||
import org.junit.Test
|
||||
import java.math.BigDecimal
|
||||
import java.math.BigInteger
|
||||
import java.nio.ByteBuffer
|
||||
import java.nio.file.Files
|
||||
import java.sql.Blob
|
||||
import java.sql.DriverManager
|
||||
import java.sql.Timestamp
|
||||
import java.time.LocalDate
|
||||
import java.time.LocalTime
|
||||
import java.time.OffsetDateTime
|
||||
import java.util.UUID
|
||||
import kotlin.io.path.createTempDirectory
|
||||
|
||||
private const val URL = "jdbc:duckdb:"
|
||||
|
||||
class DuckDbTest {
|
||||
|
||||
// region expected schemas
|
||||
@DataSchema
|
||||
data class Person(
|
||||
val id: Int,
|
||||
val name: String,
|
||||
val age: Int,
|
||||
val salary: Double,
|
||||
@ColumnName("hire_date")
|
||||
val hireDate: LocalDate,
|
||||
) {
|
||||
companion object {
|
||||
val expected = listOf(
|
||||
Person(1, "John Doe", 30, 50000.0, LocalDate.of(2020, 1, 15)),
|
||||
Person(2, "Jane Smith", 28, 55000.0, LocalDate.of(2021, 3, 20)),
|
||||
Person(3, "Bob Johnson", 35, 65000.0, LocalDate.of(2019, 11, 10)),
|
||||
Person(4, "Alice Brown", 32, 60000.0, LocalDate.of(2020, 7, 1)),
|
||||
).toDataFrame()
|
||||
}
|
||||
}
|
||||
|
||||
@DataSchema
|
||||
data class GeneralPurposeTypes(
|
||||
@ColumnName("bigint_col")
|
||||
val bigintCol: Long,
|
||||
@ColumnName("binary_col")
|
||||
val binaryCol: Blob,
|
||||
@ColumnName("bit_col")
|
||||
val bitCol: String,
|
||||
@ColumnName("bitstring_col")
|
||||
val bitstringCol: String,
|
||||
@ColumnName("blob_col")
|
||||
val blobCol: Blob,
|
||||
@ColumnName("bool_col")
|
||||
val boolCol: Boolean,
|
||||
@ColumnName("boolean_col")
|
||||
val booleanCol: Boolean,
|
||||
@ColumnName("bpchar_col")
|
||||
val bpcharCol: String,
|
||||
@ColumnName("bytea_col")
|
||||
val byteaCol: Blob,
|
||||
@ColumnName("char_col")
|
||||
val charCol: String,
|
||||
@ColumnName("date_col")
|
||||
val dateCol: LocalDate,
|
||||
@ColumnName("datetime_col")
|
||||
val datetimeCol: Timestamp,
|
||||
@ColumnName("decimal_col")
|
||||
val decimalCol: BigDecimal,
|
||||
@ColumnName("double_col")
|
||||
val doubleCol: Double,
|
||||
@ColumnName("enum_col")
|
||||
val enumCol: String,
|
||||
@ColumnName("float4_col")
|
||||
val float4Col: Float,
|
||||
@ColumnName("float8_col")
|
||||
val float8Col: Double,
|
||||
@ColumnName("float_col")
|
||||
val floatCol: Float,
|
||||
@ColumnName("hugeint_col")
|
||||
val hugeintCol: BigInteger,
|
||||
@ColumnName("int128_col")
|
||||
val int128Col: BigInteger,
|
||||
@ColumnName("int16_col")
|
||||
val int16Col: Short,
|
||||
@ColumnName("int1_col")
|
||||
val int1Col: Byte,
|
||||
@ColumnName("int2_col")
|
||||
val int2Col: Short,
|
||||
@ColumnName("int32_col")
|
||||
val int32Col: Int,
|
||||
@ColumnName("int4_col")
|
||||
val int4Col: Int,
|
||||
@ColumnName("int64_col")
|
||||
val int64Col: Long,
|
||||
@ColumnName("int8_col")
|
||||
val int8Col: Long,
|
||||
@ColumnName("int_col")
|
||||
val intCol: Int,
|
||||
@ColumnName("integer_col")
|
||||
val integerCol: Int,
|
||||
@ColumnName("interval_col")
|
||||
val intervalCol: String,
|
||||
@ColumnName("json_col")
|
||||
val jsonCol: JsonNode,
|
||||
@ColumnName("logical_col")
|
||||
val logicalCol: Boolean,
|
||||
@ColumnName("long_col")
|
||||
val longCol: Long,
|
||||
@ColumnName("numeric_col")
|
||||
val numericCol: BigDecimal,
|
||||
@ColumnName("real_col")
|
||||
val realCol: Float,
|
||||
@ColumnName("short_col")
|
||||
val shortCol: Short,
|
||||
@ColumnName("signed_col")
|
||||
val signedCol: Int,
|
||||
@ColumnName("smallint_col")
|
||||
val smallintCol: Short,
|
||||
@ColumnName("string_col")
|
||||
val stringCol: String,
|
||||
@ColumnName("text_col")
|
||||
val textCol: String,
|
||||
@ColumnName("time_col")
|
||||
val timeCol: LocalTime,
|
||||
@ColumnName("timestamp_col")
|
||||
val timestampCol: Timestamp,
|
||||
@ColumnName("timestamptz_col")
|
||||
val timestamptzCol: OffsetDateTime,
|
||||
@ColumnName("timestampwtz_col")
|
||||
val timestampwtzCol: OffsetDateTime,
|
||||
@ColumnName("tinyint_col")
|
||||
val tinyintCol: Byte,
|
||||
@ColumnName("ubigint_col")
|
||||
val ubigintCol: BigInteger,
|
||||
@ColumnName("uhugeint_col")
|
||||
val uhugeintCol: BigInteger,
|
||||
@ColumnName("uint128_col")
|
||||
val uint128Col: BigInteger,
|
||||
@ColumnName("uint16_col")
|
||||
val uint16Col: Int,
|
||||
@ColumnName("uint32_col")
|
||||
val uint32Col: Long,
|
||||
@ColumnName("uint64_col")
|
||||
val uint64Col: BigInteger,
|
||||
@ColumnName("uint8_col")
|
||||
val uint8Col: Short,
|
||||
@ColumnName("uint_col")
|
||||
val uintCol: Long,
|
||||
@ColumnName("usmallint_col")
|
||||
val usmallintCol: Int,
|
||||
@ColumnName("utinyint_col")
|
||||
val utinyintCol: Short,
|
||||
@ColumnName("uuid_col")
|
||||
val uuidCol: UUID,
|
||||
@ColumnName("varbinary_col")
|
||||
val varbinaryCol: Blob,
|
||||
@ColumnName("varchar_col")
|
||||
val varcharCol: String,
|
||||
) {
|
||||
companion object {
|
||||
val expected = listOf(
|
||||
GeneralPurposeTypes(
|
||||
bigintCol = 9223372036854775807L,
|
||||
binaryCol = DuckDBResultSet.DuckDBBlobResult(ByteBuffer.wrap("DEADBEEF".toByteArray())),
|
||||
bitCol = "1010",
|
||||
bitstringCol = "1010",
|
||||
blobCol = DuckDBResultSet.DuckDBBlobResult(ByteBuffer.wrap("DEADBEEF".toByteArray())),
|
||||
boolCol = true,
|
||||
booleanCol = true,
|
||||
bpcharCol = "test",
|
||||
byteaCol = DuckDBResultSet.DuckDBBlobResult(ByteBuffer.wrap("DEADBEEF".toByteArray())),
|
||||
charCol = "test",
|
||||
dateCol = LocalDate.parse("2025-06-19"),
|
||||
datetimeCol = Timestamp.valueOf("2025-06-19 12:34:56"),
|
||||
decimalCol = BigDecimal("123.45"),
|
||||
doubleCol = 3.14159,
|
||||
enumCol = "female",
|
||||
float4Col = 3.14f,
|
||||
float8Col = 3.14159,
|
||||
floatCol = 3.14f,
|
||||
hugeintCol = BigInteger("170141183460469231731687303715884105727"),
|
||||
int128Col = BigInteger("170141183460469231731687303715884105727"),
|
||||
int16Col = 32767,
|
||||
int1Col = 127,
|
||||
int2Col = 32767,
|
||||
int32Col = 2147483647,
|
||||
int4Col = 2147483647,
|
||||
int64Col = 9223372036854775807L,
|
||||
int8Col = 9223372036854775807L,
|
||||
intCol = 2147483647,
|
||||
integerCol = 2147483647,
|
||||
intervalCol = "1 year",
|
||||
jsonCol = JsonNode("{\"key\": \"value\"}"),
|
||||
logicalCol = true,
|
||||
longCol = 9223372036854775807L,
|
||||
numericCol = BigDecimal("123.45"),
|
||||
realCol = 3.14f,
|
||||
shortCol = 32767,
|
||||
signedCol = 2147483647,
|
||||
smallintCol = 32767,
|
||||
stringCol = "test string",
|
||||
textCol = "test text",
|
||||
timeCol = LocalTime.parse("12:34:56"),
|
||||
timestampCol = Timestamp.valueOf("2025-06-19 12:34:56"),
|
||||
timestamptzCol = OffsetDateTime.parse("2025-06-19T12:34:56+02:00"),
|
||||
timestampwtzCol = OffsetDateTime.parse("2025-06-19T12:34:56+02:00"),
|
||||
tinyintCol = 127,
|
||||
ubigintCol = BigInteger("18446744073709551615"),
|
||||
uhugeintCol = BigInteger("340282366920938463463374607431768211455"),
|
||||
uint128Col = BigInteger("340282366920938463463374607431768211455"),
|
||||
uint16Col = 65535,
|
||||
uint32Col = 4294967295L,
|
||||
uint64Col = BigInteger("18446744073709551615"),
|
||||
uint8Col = 255,
|
||||
uintCol = 4294967295L,
|
||||
usmallintCol = 65535,
|
||||
utinyintCol = 255,
|
||||
uuidCol = UUID.fromString("a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11"),
|
||||
varbinaryCol = DuckDBResultSet.DuckDBBlobResult(ByteBuffer.wrap("DEADBEEF".toByteArray())),
|
||||
varcharCol = "test string",
|
||||
),
|
||||
).toDataFrame()
|
||||
}
|
||||
}
|
||||
|
||||
@DataSchema
|
||||
data class NestedTypes(
|
||||
@ColumnName("ijstruct_col")
|
||||
val ijstructCol: java.sql.Struct,
|
||||
@ColumnName("intarray_col")
|
||||
val intarrayCol: java.sql.Array,
|
||||
@ColumnName("intlist_col")
|
||||
val intlistCol: java.sql.Array,
|
||||
@ColumnName("intstringmap_col")
|
||||
val intstringmapCol: Map<Int, String?>,
|
||||
@ColumnName("intstrinstinggmap_col")
|
||||
val intstrinstinggmapCol: Map<Int, Map<String, String?>?>,
|
||||
@ColumnName("stringarray_col")
|
||||
val stringarrayCol: java.sql.Array,
|
||||
@ColumnName("stringlist_col")
|
||||
val stringlistCol: java.sql.Array,
|
||||
@ColumnName("stringlistlist_col")
|
||||
val stringlistlistCol: java.sql.Array,
|
||||
@ColumnName("union_col")
|
||||
val unionCol: Any,
|
||||
)
|
||||
|
||||
// endregion
|
||||
|
||||
@Test
|
||||
fun `read simple dataframe from DuckDB`() {
|
||||
val df: AnyFrame
|
||||
val schema: DataFrameSchema
|
||||
val subset: AnyFrame
|
||||
DriverManager.getConnection(URL).use { connection ->
|
||||
connection.prepareStatement(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS test_table (
|
||||
id INTEGER PRIMARY KEY,
|
||||
name VARCHAR,
|
||||
age INTEGER,
|
||||
salary DOUBLE,
|
||||
hire_date DATE
|
||||
)
|
||||
""".trimIndent(),
|
||||
).executeUpdate()
|
||||
|
||||
connection.prepareStatement(
|
||||
"""
|
||||
INSERT INTO test_table (id, name, age, salary, hire_date)
|
||||
VALUES
|
||||
(1, 'John Doe', 30, 50000.00, '2020-01-15'),
|
||||
(2, 'Jane Smith', 28, 55000.00, '2021-03-20'),
|
||||
(3, 'Bob Johnson', 35, 65000.00, '2019-11-10'),
|
||||
(4, 'Alice Brown', 32, 60000.00, '2020-07-01')
|
||||
""".trimIndent(),
|
||||
).executeUpdate()
|
||||
|
||||
df = DataFrame.readSqlTable(connection, "test_table")
|
||||
schema = DataFrameSchema.readSqlTable(connection, "test_table")
|
||||
|
||||
subset = DataFrame.readSqlQuery(connection, """SELECT test_table.name, test_table.age FROM test_table""")
|
||||
}
|
||||
|
||||
schema.compare(Person.expected.schema()).isSuperOrMatches() shouldBe true
|
||||
|
||||
df.cast<Person>(verify = true) shouldBe Person.expected
|
||||
df.assertInferredTypesMatchSchema()
|
||||
|
||||
subset.assertInferredTypesMatchSchema()
|
||||
subset["name"] shouldBe df["name"]
|
||||
subset["age"] shouldBe df["age"]
|
||||
subset.columnsCount() shouldBe 2
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read simple dataframe from DuckDB ResultSet`() {
|
||||
val df: AnyFrame
|
||||
val schema: DataFrameSchema
|
||||
DriverManager.getConnection(URL).use { connection ->
|
||||
connection.prepareStatement(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS test_table (
|
||||
id INTEGER PRIMARY KEY,
|
||||
name VARCHAR,
|
||||
age INTEGER,
|
||||
salary DOUBLE,
|
||||
hire_date DATE
|
||||
)
|
||||
""".trimIndent(),
|
||||
).executeUpdate()
|
||||
|
||||
connection.prepareStatement(
|
||||
"""
|
||||
INSERT INTO test_table (id, name, age, salary, hire_date)
|
||||
VALUES
|
||||
(1, 'John Doe', 30, 50000.00, '2020-01-15'),
|
||||
(2, 'Jane Smith', 28, 55000.00, '2021-03-20'),
|
||||
(3, 'Bob Johnson', 35, 65000.00, '2019-11-10'),
|
||||
(4, 'Alice Brown', 32, 60000.00, '2020-07-01')
|
||||
""".trimIndent(),
|
||||
).executeUpdate()
|
||||
|
||||
connection.prepareStatement("SELECT * FROM test_table").executeQuery().use { rs ->
|
||||
df = DataFrame.readResultSet(rs, DuckDb)
|
||||
schema = DataFrameSchema.readResultSet(rs, DuckDb)
|
||||
}
|
||||
}
|
||||
|
||||
schema.compare(Person.expected.schema()).isSuperOrMatches() shouldBe true
|
||||
|
||||
df.cast<Person>(verify = true) shouldBe Person.expected
|
||||
df.assertInferredTypesMatchSchema()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read all tables`() {
|
||||
val dfs: Map<String, AnyFrame>
|
||||
val schemas: Map<String, DataFrameSchema>
|
||||
DriverManager.getConnection(URL).use { connection ->
|
||||
connection.prepareStatement(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS test_table (
|
||||
id INTEGER PRIMARY KEY,
|
||||
name VARCHAR,
|
||||
age INTEGER,
|
||||
salary DOUBLE,
|
||||
hire_date DATE
|
||||
)
|
||||
""".trimIndent(),
|
||||
).executeUpdate()
|
||||
|
||||
connection.prepareStatement(
|
||||
"""
|
||||
INSERT INTO test_table (id, name, age, salary, hire_date)
|
||||
VALUES
|
||||
(1, 'John Doe', 30, 50000.00, '2020-01-15'),
|
||||
(2, 'Jane Smith', 28, 55000.00, '2021-03-20'),
|
||||
(3, 'Bob Johnson', 35, 65000.00, '2019-11-10'),
|
||||
(4, 'Alice Brown', 32, 60000.00, '2020-07-01')
|
||||
""".trimIndent(),
|
||||
).executeUpdate()
|
||||
|
||||
dfs = DataFrame.readAllSqlTables(connection = connection)
|
||||
schemas = DataFrameSchema.readAllSqlTables(connection = connection)
|
||||
}
|
||||
|
||||
val df = dfs["test_table"]!!
|
||||
val schema = schemas["test_table"]!!
|
||||
|
||||
schema.compare(Person.expected.schema()).isSuperOrMatches() shouldBe true
|
||||
|
||||
df.cast<Person>(verify = true) shouldBe Person.expected
|
||||
df.assertInferredTypesMatchSchema()
|
||||
}
|
||||
|
||||
/**
|
||||
* https://duckdb.org/docs/stable/sql/data_types/overview.html
|
||||
*/
|
||||
@Test
|
||||
fun `read each general-purpose DuckDB type`() {
|
||||
val df: AnyFrame
|
||||
val schema: DataFrameSchema
|
||||
DriverManager.getConnection(URL).use { connection ->
|
||||
connection as DuckDBConnection
|
||||
connection.prepareStatement(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS table1 (
|
||||
bigint_col BIGINT,
|
||||
int8_col INT8,
|
||||
int64_col INT64,
|
||||
long_col LONG,
|
||||
bit_col BIT,
|
||||
bitstring_col BITSTRING,
|
||||
blob_col BLOB,
|
||||
bytea_col BYTEA,
|
||||
binary_col BINARY,
|
||||
varbinary_col VARBINARY,
|
||||
boolean_col BOOLEAN,
|
||||
bool_col BOOL,
|
||||
logical_col LOGICAL,
|
||||
date_col DATE,
|
||||
decimal_col DECIMAL(10,2),
|
||||
numeric_col NUMERIC(10,2),
|
||||
double_col DOUBLE,
|
||||
float8_col FLOAT8,
|
||||
float_col FLOAT,
|
||||
float4_col FLOAT4,
|
||||
real_col REAL,
|
||||
hugeint_col HUGEINT,
|
||||
int128_col INT128,
|
||||
integer_col INTEGER,
|
||||
int4_col INT4,
|
||||
int32_col INT32,
|
||||
int_col INT,
|
||||
signed_col SIGNED,
|
||||
interval_col INTERVAL,
|
||||
json_col JSON,
|
||||
smallint_col SMALLINT,
|
||||
int2_col INT2,
|
||||
int16_col INT16,
|
||||
short_col SHORT,
|
||||
time_col TIME,
|
||||
timestampwtz_col TIMESTAMP WITH TIME ZONE,
|
||||
timestamptz_col TIMESTAMPTZ,
|
||||
timestamp_col TIMESTAMP,
|
||||
datetime_col DATETIME,
|
||||
tinyint_col TINYINT,
|
||||
int1_col INT1,
|
||||
ubigint_col UBIGINT,
|
||||
uint64_col UINT64,
|
||||
uhugeint_col UHUGEINT,
|
||||
uint128_col UINT128,
|
||||
uint_col UINTEGER,
|
||||
uint32_col UINT32,
|
||||
usmallint_col USMALLINT,
|
||||
uint16_col UINT16,
|
||||
utinyint_col UTINYINT,
|
||||
uint8_col UINT8,
|
||||
uuid_col UUID,
|
||||
varchar_col VARCHAR,
|
||||
char_col CHAR(10),
|
||||
bpchar_col BPCHAR(10),
|
||||
text_col TEXT,
|
||||
string_col STRING,
|
||||
enum_col ENUM('male', 'female', 'other')
|
||||
)
|
||||
""".trimIndent(),
|
||||
).executeUpdate()
|
||||
|
||||
connection.prepareStatement(
|
||||
"""
|
||||
INSERT INTO table1 VALUES (
|
||||
9223372036854775807, -- bigint
|
||||
9223372036854775807, -- int8
|
||||
9223372036854775807, -- int64
|
||||
9223372036854775807, -- long
|
||||
'1010', -- bit
|
||||
'1010', -- bitstring
|
||||
'DEADBEEF'::BLOB, -- blob
|
||||
'DEADBEEF'::BLOB, -- bytea
|
||||
'DEADBEEF'::BLOB, -- binary
|
||||
'DEADBEEF'::BLOB, -- varbinary
|
||||
true, -- boolean
|
||||
true, -- bool
|
||||
true, -- logical
|
||||
'2025-06-19', -- date
|
||||
123.45, -- decimal
|
||||
123.45, -- numeric
|
||||
3.14159, -- double
|
||||
3.14159, -- float8
|
||||
3.14, -- float
|
||||
3.14, -- float4
|
||||
3.14, -- real
|
||||
'170141183460469231731687303715884105727', -- hugeint
|
||||
'170141183460469231731687303715884105727', -- int128
|
||||
2147483647, -- integer
|
||||
2147483647, -- int4
|
||||
2147483647, -- int32
|
||||
2147483647, -- int
|
||||
2147483647, -- signed
|
||||
INTERVAL '1' YEAR, -- interval
|
||||
'{"key": "value"}'::JSON, -- json
|
||||
32767, -- smallint
|
||||
32767, -- int2
|
||||
32767, -- int16
|
||||
32767, -- short
|
||||
'12:34:56', -- time
|
||||
'2025-06-19 12:34:56+02', -- timestampwtz
|
||||
'2025-06-19 12:34:56+02', -- timestamptz
|
||||
'2025-06-19 12:34:56', -- timestamp
|
||||
'2025-06-19 12:34:56', -- datetime
|
||||
127, -- tinyint
|
||||
127, -- int1
|
||||
18446744073709551615, -- ubigint
|
||||
18446744073709551615, -- uint64
|
||||
'340282366920938463463374607431768211455', -- uhugeint
|
||||
'340282366920938463463374607431768211455', -- uint128
|
||||
4294967295, -- uinteger
|
||||
4294967295, -- uint32
|
||||
65535, -- usmallint
|
||||
65535, -- uint16
|
||||
255, -- utinyint
|
||||
255, -- uint8
|
||||
'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11', -- uuid
|
||||
'test string', -- varchar
|
||||
'test', -- char
|
||||
'test', -- bpchar
|
||||
'test text', -- text
|
||||
'test string', -- string
|
||||
'female' -- enum
|
||||
)
|
||||
""".trimIndent(),
|
||||
).executeUpdate()
|
||||
|
||||
schema = DataFrameSchema.readSqlTable(connection, "table1")
|
||||
df = DataFrame.readSqlTable(connection, "table1").reorderColumnsByName()
|
||||
}
|
||||
|
||||
schema.compare(GeneralPurposeTypes.expected.schema()).isSuperOrMatches() shouldBe true
|
||||
|
||||
// on some systems OffsetDateTime's get converted to UTC sometimes, let's compare them as Instant instead
|
||||
fun AnyFrame.fixOffsetDateTime() = convert { colsOf<OffsetDateTime>() }.with { it.toInstant() }
|
||||
|
||||
df.cast<GeneralPurposeTypes>(verify = true).fixOffsetDateTime() shouldBe
|
||||
GeneralPurposeTypes.expected.fixOffsetDateTime()
|
||||
df.assertInferredTypesMatchSchema()
|
||||
}
|
||||
|
||||
/**
|
||||
* https://duckdb.org/docs/stable/sql/data_types/overview.html
|
||||
*/
|
||||
@Test
|
||||
fun `read each nested DuckDB type`() {
|
||||
val df: AnyFrame
|
||||
val schema: DataFrameSchema
|
||||
DriverManager.getConnection(URL).use { connection ->
|
||||
connection as DuckDBConnection
|
||||
connection.prepareStatement(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS table2 (
|
||||
intarray_col INTEGER[3],
|
||||
stringarray_col VARCHAR[3],
|
||||
intlist_col INTEGER[],
|
||||
stringlist_col VARCHAR[],
|
||||
stringlistlist_col VARCHAR[][],
|
||||
intstringmap_col MAP(INTEGER, VARCHAR),
|
||||
intstrinstinggmap_col MAP(INTEGER, MAP(VARCHAR, VARCHAR)),
|
||||
ijstruct_col STRUCT(i INTEGER, j VARCHAR),
|
||||
union_col UNION(num INTEGER, text VARCHAR),
|
||||
)
|
||||
""".trimIndent(),
|
||||
).executeUpdate()
|
||||
|
||||
connection.prepareStatement(
|
||||
"""
|
||||
INSERT INTO table2 VALUES (
|
||||
array_value(1, 2, NULL), -- int array
|
||||
array_value('a', 'ab', 'abc'), -- string array
|
||||
list_value(1, 2, 3), -- int list
|
||||
list_value('a', 'ab', 'abc'), -- string list
|
||||
list_value(list_value('a', 'ab'), list_value('abc'), NULL), -- string list list
|
||||
MAP { 1: 'value1', 200: 'value2' }, -- int string map
|
||||
MAP { 1: MAP { 'value1': 'a', 'value2': 'b' }, 200: MAP { 'value1': 'c', 'value2': 'd' } }, -- int string string map
|
||||
{ 'i': 42, 'j': 'answer' }, -- struct
|
||||
union_value(num := 2), -- union
|
||||
)
|
||||
""".trimIndent(),
|
||||
).executeUpdate()
|
||||
|
||||
schema = DataFrameSchema.readSqlTable(connection, "table2")
|
||||
df = DataFrame.readSqlTable(connection, "table2")
|
||||
}
|
||||
|
||||
df.assertInferredTypesMatchSchema()
|
||||
|
||||
df.cast<NestedTypes>(verify = true)
|
||||
df as DataFrame<NestedTypes>
|
||||
|
||||
df.single().let {
|
||||
it[{ "intarray_col"<java.sql.Array>() }].array shouldBe arrayOf(1, 2, null)
|
||||
it[{ "stringarray_col"<java.sql.Array>() }].array shouldBe arrayOf("a", "ab", "abc")
|
||||
it[{ "intlist_col"<java.sql.Array>() }].array shouldBe arrayOf(1, 2, 3)
|
||||
it[{ "stringlist_col"<java.sql.Array>() }].array shouldBe arrayOf("a", "ab", "abc")
|
||||
(it[{ "stringlistlist_col"<java.sql.Array>() }].array as Array<*>)
|
||||
.map { (it as java.sql.Array?)?.array } shouldBe listOf(arrayOf("a", "ab"), arrayOf("abc"), null)
|
||||
it[{ "intstringmap_col"<Map<Int, String?>>() }] shouldBe mapOf(1 to "value1", 200 to "value2")
|
||||
it[{ "intstrinstinggmap_col"<Map<Int, Map<String, String?>>>() }] shouldBe mapOf(
|
||||
1 to mapOf("value1" to "a", "value2" to "b"),
|
||||
200 to mapOf("value1" to "c", "value2" to "d"),
|
||||
)
|
||||
it[{ "ijstruct_col"<java.sql.Struct>() }].attributes shouldBe arrayOf<Any>(42, "answer")
|
||||
it[{ "union_col"<Any>() }] shouldBe 2
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `change read mode`() {
|
||||
// Test in-memory database (cannot be read-only)
|
||||
val config = DbConnectionConfig("jdbc:duckdb:")
|
||||
val df = config.readDataFrame("SELECT 1, 2, 3")
|
||||
df.values().toList() shouldBe listOf(1, 2, 3)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `change read mode with persistent database`() {
|
||||
// Test read-only mode with a temporary file
|
||||
val tempDir = createTempDirectory("duckdb-test-")
|
||||
val dbPath = tempDir.resolve("test.duckdb")
|
||||
try {
|
||||
// First, create the database with actual data using plain JDBC to allow DDL/DML
|
||||
DriverManager.getConnection("jdbc:duckdb:${dbPath.toAbsolutePath()}").use { connection ->
|
||||
connection.createStatement().use { st ->
|
||||
st.executeUpdate("CREATE TABLE test_data(col1 INTEGER, col2 INTEGER, col3 INTEGER)")
|
||||
st.executeUpdate("INSERT INTO test_data VALUES (1, 2, 3)")
|
||||
}
|
||||
}
|
||||
|
||||
// Now test read-only access via our API
|
||||
val config = DbConnectionConfig("jdbc:duckdb:${dbPath.toAbsolutePath()}", readOnly = true)
|
||||
val df = config.readDataFrame("SELECT col1, col2, col3 FROM test_data")
|
||||
df.values().toList() shouldBe listOf(1, 2, 3)
|
||||
} finally {
|
||||
Files.deleteIfExists(dbPath)
|
||||
Files.deleteIfExists(tempDir)
|
||||
}
|
||||
}
|
||||
}
|
||||
Vendored
+116
@@ -0,0 +1,116 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io.local
|
||||
|
||||
import io.kotest.matchers.shouldBe
|
||||
import org.intellij.lang.annotations.Language
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
|
||||
import org.jetbrains.kotlinx.dataframe.api.cast
|
||||
import org.jetbrains.kotlinx.dataframe.api.filter
|
||||
import org.jetbrains.kotlinx.dataframe.io.readSqlQuery
|
||||
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
|
||||
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
|
||||
import org.junit.Ignore
|
||||
import org.junit.Test
|
||||
import java.sql.DriverManager
|
||||
import java.util.Properties
|
||||
import kotlin.reflect.typeOf
|
||||
|
||||
private const val URL = "jdbc:mariadb://localhost:3307/imdb"
|
||||
private const val URL2 = "jdbc:mariadb://localhost:3307"
|
||||
private const val USER_NAME = "root"
|
||||
private const val PASSWORD = "pass"
|
||||
|
||||
@DataSchema
|
||||
interface ActorKDF {
|
||||
val id: Int
|
||||
val firstName: String?
|
||||
val lastName: String?
|
||||
val gender: String?
|
||||
}
|
||||
|
||||
@DataSchema
|
||||
interface RankedMoviesWithGenres {
|
||||
val name: String?
|
||||
val year: Int?
|
||||
val rank: Float?
|
||||
val genres: String?
|
||||
}
|
||||
|
||||
@Ignore
|
||||
class ImdbTestTest {
|
||||
@Test
|
||||
fun `read table`() {
|
||||
val props = Properties()
|
||||
props.setProperty("user", USER_NAME)
|
||||
props.setProperty("password", PASSWORD)
|
||||
|
||||
// generate kdf schemas by database metadata (as interfaces or extensions)
|
||||
// for gradle or as classes under the hood in KNB
|
||||
|
||||
val tableName = "actors"
|
||||
|
||||
DriverManager.getConnection(URL, props).use { connection ->
|
||||
val df = DataFrame.readSqlTable(connection, tableName, 100).cast<ActorKDF>()
|
||||
val result = df.filter { it[ActorKDF::id] in 11..19 }
|
||||
result[0][1] shouldBe "Víctor"
|
||||
|
||||
val schema = DataFrameSchema.readSqlTable(connection, tableName)
|
||||
schema.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["first_name"]!!.type shouldBe typeOf<String?>()
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read table with schema name in table name`() {
|
||||
val props = Properties()
|
||||
props.setProperty("user", USER_NAME)
|
||||
props.setProperty("password", PASSWORD)
|
||||
|
||||
// generate kdf schemas by database metadata (as interfaces or extensions)
|
||||
// for gradle or as classes under the hood in KNB
|
||||
val imdbTableName = "imdb.actors"
|
||||
|
||||
DriverManager.getConnection(URL2, props).use { connection ->
|
||||
val df = DataFrame.readSqlTable(connection, imdbTableName, 100).cast<ActorKDF>()
|
||||
val result = df.filter { it[ActorKDF::id] in 11..19 }
|
||||
result[0][1] shouldBe "Víctor"
|
||||
|
||||
val schema = DataFrameSchema.readSqlTable(connection, imdbTableName)
|
||||
schema.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["first_name"]!!.type shouldBe typeOf<String?>()
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read sql query`() {
|
||||
@Language("sql")
|
||||
val sqlQuery =
|
||||
"""
|
||||
select name, year, rank,
|
||||
GROUP_CONCAT (genre) as "genres"
|
||||
from movies join movies_directors on movie_id = movies.id
|
||||
join directors on directors.id=director_id left join movies_genres on movies.id = movies_genres.movie_id
|
||||
where directors.first_name = "Quentin" and directors.last_name = "Tarantino"
|
||||
and movies.name is not null and movies.name is not null
|
||||
group by name, year, rank
|
||||
order by year
|
||||
""".trimIndent()
|
||||
val props = Properties()
|
||||
props.setProperty("user", USER_NAME)
|
||||
props.setProperty("password", PASSWORD)
|
||||
|
||||
// generate kdf schemas by database metadata (as interfaces or extensions)
|
||||
// for gradle or as classes under the hood in KNB
|
||||
|
||||
DriverManager.getConnection(URL, props).use { connection ->
|
||||
val df = DataFrame.readSqlQuery(connection, sqlQuery).cast<RankedMoviesWithGenres>()
|
||||
val result =
|
||||
df.filter { it[RankedMoviesWithGenres::year] != null && it[RankedMoviesWithGenres::year]!! > 2000 }
|
||||
result[0][1] shouldBe 2003
|
||||
|
||||
val schema = DataFrameSchema.readSqlQuery(connection, sqlQuery)
|
||||
schema.columns["name"]!!.type shouldBe typeOf<String?>()
|
||||
schema.columns["year"]!!.type shouldBe typeOf<Int?>()
|
||||
}
|
||||
}
|
||||
}
|
||||
Vendored
+476
@@ -0,0 +1,476 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io.local
|
||||
|
||||
import io.kotest.matchers.shouldBe
|
||||
import org.intellij.lang.annotations.Language
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
|
||||
import org.jetbrains.kotlinx.dataframe.api.add
|
||||
import org.jetbrains.kotlinx.dataframe.api.cast
|
||||
import org.jetbrains.kotlinx.dataframe.api.filter
|
||||
import org.jetbrains.kotlinx.dataframe.api.select
|
||||
import org.jetbrains.kotlinx.dataframe.io.inferNullability
|
||||
import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
|
||||
import org.jetbrains.kotlinx.dataframe.io.readSqlQuery
|
||||
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
|
||||
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
|
||||
import org.junit.AfterClass
|
||||
import org.junit.BeforeClass
|
||||
import org.junit.Ignore
|
||||
import org.junit.Test
|
||||
import java.math.BigDecimal
|
||||
import java.sql.Blob
|
||||
import java.sql.Connection
|
||||
import java.sql.DriverManager
|
||||
import java.sql.SQLException
|
||||
import java.util.Date
|
||||
import kotlin.reflect.typeOf
|
||||
|
||||
private const val URL = "jdbc:mariadb://localhost:3307"
|
||||
private const val USER_NAME = "root"
|
||||
private const val PASSWORD = "pass"
|
||||
private const val TEST_DATABASE_NAME = "testKDFdatabase"
|
||||
|
||||
@DataSchema
|
||||
interface Table1MariaDb {
|
||||
val id: Int
|
||||
val bitCol: Boolean
|
||||
val tinyintCol: Int
|
||||
val smallintCol: Short?
|
||||
val mediumintCol: Int
|
||||
val mediumintUnsignedCol: Int
|
||||
val integerCol: Int
|
||||
val intCol: Int
|
||||
val integerUnsignedCol: Long
|
||||
val bigintCol: Long
|
||||
val floatCol: Float
|
||||
val doubleCol: Double
|
||||
val decimalCol: BigDecimal
|
||||
val dateCol: String
|
||||
val datetimeCol: String
|
||||
val timestampCol: String
|
||||
val timeCol: String
|
||||
val yearCol: String
|
||||
val varcharCol: String
|
||||
val charCol: String
|
||||
val binaryCol: ByteArray
|
||||
val varbinaryCol: ByteArray
|
||||
val tinyblobCol: ByteArray
|
||||
val blobCol: ByteArray
|
||||
val mediumblobCol: ByteArray
|
||||
val longblobCol: ByteArray
|
||||
val textCol: String
|
||||
val mediumtextCol: String
|
||||
val longtextCol: String
|
||||
val enumCol: String
|
||||
val setCol: Char
|
||||
val jsonCol: String
|
||||
}
|
||||
|
||||
@DataSchema
|
||||
interface Table2MariaDb {
|
||||
val id: Int
|
||||
val bitCol: Boolean?
|
||||
val tinyintCol: Int?
|
||||
val smallintCol: Int?
|
||||
val mediumintCol: Int?
|
||||
val mediumintUnsignedCol: Int?
|
||||
val integerCol: Int?
|
||||
val intCol: Int?
|
||||
val integerUnsignedCol: Long?
|
||||
val bigintCol: Long?
|
||||
val floatCol: Float?
|
||||
val doubleCol: Double?
|
||||
val decimalCol: Double?
|
||||
val dateCol: String?
|
||||
val datetimeCol: String?
|
||||
val timestampCol: String?
|
||||
val timeCol: String?
|
||||
val yearCol: String?
|
||||
val varcharCol: String?
|
||||
val charCol: String?
|
||||
val binaryCol: ByteArray?
|
||||
val varbinaryCol: ByteArray?
|
||||
val tinyblobCol: ByteArray?
|
||||
val blobCol: ByteArray?
|
||||
val mediumblobCol: ByteArray?
|
||||
val longblobCol: ByteArray?
|
||||
val textCol: String?
|
||||
val mediumtextCol: String?
|
||||
val longtextCol: String?
|
||||
val enumCol: String?
|
||||
val setCol: Char?
|
||||
val jsonCol: String?
|
||||
}
|
||||
|
||||
@DataSchema
|
||||
interface Table3MariaDb {
|
||||
val id: Int
|
||||
val enumCol: String
|
||||
val setCol: Char?
|
||||
}
|
||||
|
||||
private const val JSON_STRING =
|
||||
"{\"details\": {\"foodType\": \"Pizza\", \"menu\": \"https://www.loumalnatis.com/our-menu\"}, \n" +
|
||||
" \t\"favorites\": [{\"description\": \"Pepperoni deep dish\", \"price\": 18.75}, \n" +
|
||||
"{\"description\": \"The Lou\", \"price\": 24.75}]}"
|
||||
|
||||
@Ignore
|
||||
class MariadbTest {
|
||||
companion object {
|
||||
private lateinit var connection: Connection
|
||||
|
||||
@BeforeClass
|
||||
@JvmStatic
|
||||
fun setUpClass() {
|
||||
connection = DriverManager.getConnection(URL, USER_NAME, PASSWORD)
|
||||
|
||||
connection.createStatement().use { st ->
|
||||
// Drop the test database if it exists
|
||||
val dropDatabaseQuery = "DROP DATABASE IF EXISTS $TEST_DATABASE_NAME"
|
||||
st.executeUpdate(dropDatabaseQuery)
|
||||
|
||||
// Create the test database
|
||||
val createDatabaseQuery = "CREATE DATABASE $TEST_DATABASE_NAME"
|
||||
st.executeUpdate(createDatabaseQuery)
|
||||
|
||||
// Use the newly created database
|
||||
val useDatabaseQuery = "USE $TEST_DATABASE_NAME"
|
||||
st.executeUpdate(useDatabaseQuery)
|
||||
}
|
||||
|
||||
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table1") }
|
||||
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table2") }
|
||||
|
||||
@Language("SQL")
|
||||
val createTableQuery = """
|
||||
CREATE TABLE IF NOT EXISTS table1 (
|
||||
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||
bitCol BIT NOT NULL,
|
||||
tinyintCol TINYINT NOT NULL,
|
||||
smallintCol SMALLINT,
|
||||
mediumintCol MEDIUMINT NOT NULL,
|
||||
mediumintUnsignedCol MEDIUMINT UNSIGNED NOT NULL,
|
||||
integerCol INTEGER NOT NULL,
|
||||
intCol INT NOT NULL,
|
||||
integerUnsignedCol INTEGER UNSIGNED NOT NULL,
|
||||
bigintCol BIGINT NOT NULL,
|
||||
floatCol FLOAT NOT NULL,
|
||||
doubleCol DOUBLE NOT NULL,
|
||||
decimalCol DECIMAL NOT NULL,
|
||||
dateCol DATE NOT NULL,
|
||||
datetimeCol DATETIME NOT NULL,
|
||||
timestampCol TIMESTAMP NOT NULL,
|
||||
timeCol TIME NOT NULL,
|
||||
yearCol YEAR NOT NULL,
|
||||
varcharCol VARCHAR(255) NOT NULL,
|
||||
charCol CHAR(10) NOT NULL,
|
||||
binaryCol BINARY(64) NOT NULL,
|
||||
varbinaryCol VARBINARY(128) NOT NULL,
|
||||
tinyblobCol TINYBLOB NOT NULL,
|
||||
blobCol BLOB NOT NULL,
|
||||
mediumblobCol MEDIUMBLOB NOT NULL ,
|
||||
longblobCol LONGBLOB NOT NULL,
|
||||
textCol TEXT NOT NULL,
|
||||
mediumtextCol MEDIUMTEXT NOT NULL,
|
||||
longtextCol LONGTEXT NOT NULL,
|
||||
enumCol ENUM('Value1', 'Value2', 'Value3') NOT NULL,
|
||||
setCol SET('Option1', 'Option2', 'Option3') NOT NULL,
|
||||
jsonCol JSON NOT NULL
|
||||
CHECK (JSON_VALID(jsonCol))
|
||||
)
|
||||
"""
|
||||
connection.createStatement().execute(createTableQuery.trimIndent())
|
||||
|
||||
@Language("SQL")
|
||||
val createTableQuery2 = """
|
||||
CREATE TABLE IF NOT EXISTS table2 (
|
||||
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||
bitCol BIT,
|
||||
tinyintCol TINYINT,
|
||||
smallintCol SMALLINT,
|
||||
mediumintCol MEDIUMINT,
|
||||
mediumintUnsignedCol MEDIUMINT UNSIGNED,
|
||||
integerCol INTEGER,
|
||||
intCol INT,
|
||||
integerUnsignedCol INTEGER UNSIGNED,
|
||||
bigintCol BIGINT,
|
||||
floatCol FLOAT,
|
||||
doubleCol DOUBLE,
|
||||
decimalCol DECIMAL,
|
||||
dateCol DATE,
|
||||
datetimeCol DATETIME,
|
||||
timestampCol TIMESTAMP,
|
||||
timeCol TIME,
|
||||
yearCol YEAR,
|
||||
varcharCol VARCHAR(255),
|
||||
charCol CHAR(10),
|
||||
binaryCol BINARY(64),
|
||||
varbinaryCol VARBINARY(128),
|
||||
tinyblobCol TINYBLOB,
|
||||
blobCol BLOB,
|
||||
mediumblobCol MEDIUMBLOB,
|
||||
longblobCol LONGBLOB,
|
||||
textCol TEXT,
|
||||
mediumtextCol MEDIUMTEXT,
|
||||
longtextCol LONGTEXT,
|
||||
enumCol ENUM('Value1', 'Value2', 'Value3'),
|
||||
setCol SET('Option1', 'Option2', 'Option3')
|
||||
)
|
||||
"""
|
||||
connection.createStatement().execute(createTableQuery2.trimIndent())
|
||||
|
||||
@Language("SQL")
|
||||
val insertData1 =
|
||||
"""
|
||||
INSERT INTO table1 (
|
||||
bitCol, tinyintCol, smallintCol, mediumintCol, mediumintUnsignedCol, integerCol, intCol,
|
||||
integerUnsignedCol, bigintCol, floatCol, doubleCol, decimalCol, dateCol, datetimeCol, timestampCol,
|
||||
timeCol, yearCol, varcharCol, charCol, binaryCol, varbinaryCol, tinyblobCol, blobCol,
|
||||
mediumblobCol, longblobCol, textCol, mediumtextCol, longtextCol, enumCol, setCol, jsonCol
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""".trimIndent()
|
||||
|
||||
@Language("SQL")
|
||||
val insertData2 =
|
||||
"""
|
||||
INSERT INTO table2 (
|
||||
bitCol, tinyintCol, smallintCol, mediumintCol, mediumintUnsignedCol, integerCol, intCol,
|
||||
integerUnsignedCol, bigintCol, floatCol, doubleCol, decimalCol, dateCol, datetimeCol, timestampCol,
|
||||
timeCol, yearCol, varcharCol, charCol, binaryCol, varbinaryCol, tinyblobCol, blobCol,
|
||||
mediumblobCol, longblobCol, textCol, mediumtextCol, longtextCol, enumCol, setCol
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""".trimIndent()
|
||||
|
||||
connection.prepareStatement(insertData1).use { st ->
|
||||
// Insert data into table1
|
||||
for (i in 1..3) {
|
||||
st.setBoolean(1, true)
|
||||
st.setByte(2, i.toByte())
|
||||
st.setShort(3, (i * 10).toShort())
|
||||
st.setInt(4, i * 100)
|
||||
st.setInt(5, i * 100)
|
||||
st.setInt(6, i * 100)
|
||||
st.setInt(7, i * 100)
|
||||
st.setInt(8, i * 100)
|
||||
st.setInt(9, i * 100)
|
||||
st.setFloat(10, i * 10.0f)
|
||||
st.setDouble(11, i * 10.0)
|
||||
st.setBigDecimal(12, BigDecimal(i * 10))
|
||||
st.setDate(13, java.sql.Date(System.currentTimeMillis()))
|
||||
st.setTimestamp(14, java.sql.Timestamp(System.currentTimeMillis()))
|
||||
st.setTimestamp(15, java.sql.Timestamp(System.currentTimeMillis()))
|
||||
st.setTime(16, java.sql.Time(System.currentTimeMillis()))
|
||||
st.setInt(17, 2023)
|
||||
st.setString(18, "varcharValue$i")
|
||||
st.setString(19, "charValue$i")
|
||||
st.setBytes(20, "binaryValue".toByteArray())
|
||||
st.setBytes(21, "varbinaryValue".toByteArray())
|
||||
st.setBytes(22, "tinyblobValue".toByteArray())
|
||||
st.setBytes(23, "blobValue".toByteArray())
|
||||
st.setBytes(24, "mediumblobValue".toByteArray())
|
||||
st.setBytes(25, "longblobValue".toByteArray())
|
||||
st.setString(26, "textValue$i")
|
||||
st.setString(27, "mediumtextValue$i")
|
||||
st.setString(28, "longtextValue$i")
|
||||
st.setString(29, "Value$i")
|
||||
st.setString(30, "Option$i")
|
||||
st.setString(31, JSON_STRING)
|
||||
|
||||
st.executeUpdate()
|
||||
}
|
||||
}
|
||||
|
||||
connection.prepareStatement(insertData2).use { st ->
|
||||
// Insert data into table2
|
||||
for (i in 1..3) {
|
||||
st.setBoolean(1, false)
|
||||
st.setByte(2, (i * 2).toByte())
|
||||
st.setShort(3, (i * 20).toShort())
|
||||
st.setInt(4, i * 200)
|
||||
st.setInt(5, i * 200)
|
||||
st.setInt(6, i * 200)
|
||||
st.setInt(7, i * 200)
|
||||
st.setInt(8, i * 200)
|
||||
st.setInt(9, i * 200)
|
||||
st.setFloat(10, i * 20.0f)
|
||||
st.setDouble(11, i * 20.0)
|
||||
st.setBigDecimal(12, BigDecimal(i * 20))
|
||||
st.setDate(13, java.sql.Date(System.currentTimeMillis()))
|
||||
st.setTimestamp(14, java.sql.Timestamp(System.currentTimeMillis()))
|
||||
st.setTimestamp(15, java.sql.Timestamp(System.currentTimeMillis()))
|
||||
st.setTime(16, java.sql.Time(System.currentTimeMillis()))
|
||||
st.setInt(17, 2023)
|
||||
st.setString(18, "varcharValue$i")
|
||||
st.setString(19, "charValue$i")
|
||||
st.setBytes(20, "binaryValue".toByteArray())
|
||||
st.setBytes(21, "varbinaryValue".toByteArray())
|
||||
st.setBytes(22, "tinyblobValue".toByteArray())
|
||||
st.setBytes(23, "blobValue".toByteArray())
|
||||
st.setBytes(24, "mediumblobValue".toByteArray())
|
||||
st.setBytes(25, "longblobValue".toByteArray())
|
||||
st.setString(26, null)
|
||||
st.setString(27, null)
|
||||
st.setString(28, "longtextValue$i")
|
||||
st.setString(29, "Value$i")
|
||||
st.setString(30, "Option$i")
|
||||
st.executeUpdate()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
@JvmStatic
|
||||
fun tearDownClass() {
|
||||
try {
|
||||
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table1") }
|
||||
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table2") }
|
||||
connection.createStatement().use { st -> st.execute("DROP DATABASE IF EXISTS $TEST_DATABASE_NAME") }
|
||||
connection.close()
|
||||
} catch (e: SQLException) {
|
||||
e.printStackTrace()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `basic test for reading sql tables`() {
|
||||
val df1 = DataFrame.readSqlTable(connection, "table1").cast<Table1MariaDb>()
|
||||
val result = df1.filter { it[Table1MariaDb::id] == 1 }
|
||||
result[0][26] shouldBe "textValue1"
|
||||
val byteArray = "tinyblobValue".toByteArray()
|
||||
result[0][22] shouldBe byteArray
|
||||
|
||||
val schema = DataFrameSchema.readSqlTable(connection, "table1")
|
||||
schema.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["textCol"]!!.type shouldBe typeOf<String>()
|
||||
schema.columns["varbinaryCol"]!!.type shouldBe typeOf<ByteArray>()
|
||||
schema.columns["binaryCol"]!!.type shouldBe typeOf<ByteArray>()
|
||||
schema.columns["longblobCol"]!!.type shouldBe typeOf<Blob>()
|
||||
schema.columns["tinyblobCol"]!!.type shouldBe typeOf<Blob>()
|
||||
schema.columns["dateCol"]!!.type shouldBe typeOf<Date>()
|
||||
schema.columns["datetimeCol"]!!.type shouldBe typeOf<java.sql.Timestamp>()
|
||||
schema.columns["timestampCol"]!!.type shouldBe typeOf<java.sql.Timestamp>()
|
||||
schema.columns["timeCol"]!!.type shouldBe typeOf<java.sql.Time>()
|
||||
schema.columns["yearCol"]!!.type shouldBe typeOf<Date>()
|
||||
|
||||
val df2 = DataFrame.readSqlTable(connection, "table2").cast<Table2MariaDb>()
|
||||
val result2 = df2.filter { it[Table2MariaDb::id] == 1 }
|
||||
result2[0][26] shouldBe null
|
||||
|
||||
val schema2 = DataFrameSchema.readSqlTable(connection, "table2")
|
||||
schema2.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
schema2.columns["textCol"]!!.type shouldBe typeOf<String?>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from sql query`() {
|
||||
@Language("SQL")
|
||||
val sqlQuery =
|
||||
"""
|
||||
SELECT
|
||||
t1.id,
|
||||
t1.enumCol,
|
||||
t2.setCol
|
||||
FROM table1 t1
|
||||
JOIN table2 t2 ON t1.id = t2.id
|
||||
""".trimIndent()
|
||||
|
||||
val df = DataFrame.readSqlQuery(connection, sqlQuery = sqlQuery).cast<Table3MariaDb>()
|
||||
val result = df.filter { it[Table3MariaDb::id] == 1 }
|
||||
result[0][2] shouldBe "Option1"
|
||||
|
||||
val schema = DataFrameSchema.readSqlQuery(connection, sqlQuery = sqlQuery)
|
||||
schema.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["enumCol"]!!.type shouldBe typeOf<String>()
|
||||
schema.columns["setCol"]!!.type shouldBe typeOf<String?>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from all tables`() {
|
||||
val dataframes = DataFrame.readAllSqlTables(connection, TEST_DATABASE_NAME, 1000).values.toList()
|
||||
|
||||
val table1Df = dataframes[0].cast<Table1MariaDb>()
|
||||
|
||||
table1Df.rowsCount() shouldBe 3
|
||||
table1Df.filter { it[Table1MariaDb::integerCol] > 100 }.rowsCount() shouldBe 2
|
||||
table1Df[0][11] shouldBe 10.0
|
||||
table1Df[0][26] shouldBe "textValue1"
|
||||
table1Df[0][31] shouldBe JSON_STRING // TODO: https://github.com/Kotlin/dataframe/issues/462
|
||||
|
||||
val table2Df = dataframes[1].cast<Table2MariaDb>()
|
||||
|
||||
table2Df.rowsCount() shouldBe 3
|
||||
table2Df.filter {
|
||||
it[Table2MariaDb::integerCol] != null && it[Table2MariaDb::integerCol]!! > 400
|
||||
}.rowsCount() shouldBe 1
|
||||
table2Df[0][11] shouldBe 20.0
|
||||
table2Df[0][26] shouldBe null
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `reading numeric types`() {
|
||||
val df1 = DataFrame.readSqlTable(connection, "table1").cast<Table1MariaDb>()
|
||||
|
||||
val result = df1.select("tinyintCol")
|
||||
.add("tinyintCol2") { it[Table1MariaDb::tinyintCol] }
|
||||
|
||||
result[0][1] shouldBe 1
|
||||
|
||||
val result1 = df1.select("smallintCol")
|
||||
.add("smallintCol2") { it[Table1MariaDb::smallintCol] }
|
||||
|
||||
result1[0][1] shouldBe 10
|
||||
|
||||
val result2 = df1.select("mediumintCol")
|
||||
.add("mediumintCol2") { it[Table1MariaDb::mediumintCol] }
|
||||
|
||||
result2[0][1] shouldBe 100
|
||||
|
||||
val result3 = df1.select("mediumintUnsignedCol")
|
||||
.add("mediumintUnsignedCol2") { it[Table1MariaDb::mediumintUnsignedCol] }
|
||||
|
||||
result3[0][1] shouldBe 100
|
||||
|
||||
val result4 = df1.select("integerUnsignedCol")
|
||||
.add("integerUnsignedCol2") { it[Table1MariaDb::integerUnsignedCol] }
|
||||
|
||||
result4[0][1] shouldBe 100L
|
||||
|
||||
val result5 = df1.select("bigintCol")
|
||||
.add("bigintCol2") { it[Table1MariaDb::bigintCol] }
|
||||
|
||||
result5[0][1] shouldBe 100
|
||||
|
||||
val result6 = df1.select("floatCol")
|
||||
.add("floatCol2") { it[Table1MariaDb::floatCol] }
|
||||
|
||||
result6[0][1] shouldBe 10.0f
|
||||
|
||||
val result7 = df1.select("doubleCol")
|
||||
.add("doubleCol2") { it[Table1MariaDb::doubleCol] }
|
||||
|
||||
result7[0][1] shouldBe 10.0
|
||||
|
||||
val result8 = df1.select("decimalCol")
|
||||
.add("decimalCol2") { it[Table1MariaDb::decimalCol] }
|
||||
|
||||
result8[0][1] shouldBe BigDecimal("10")
|
||||
|
||||
val schema = DataFrameSchema.readSqlTable(connection, "table1")
|
||||
|
||||
schema.columns["tinyintCol"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["smallintCol"]!!.type shouldBe typeOf<Short?>()
|
||||
schema.columns["mediumintCol"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["mediumintUnsignedCol"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["integerUnsignedCol"]!!.type shouldBe typeOf<Long>()
|
||||
schema.columns["bigintCol"]!!.type shouldBe typeOf<Long>()
|
||||
schema.columns["floatCol"]!!.type shouldBe typeOf<Float>()
|
||||
schema.columns["doubleCol"]!!.type shouldBe typeOf<Double>()
|
||||
schema.columns["decimalCol"]!!.type shouldBe typeOf<BigDecimal>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `infer nullability`() {
|
||||
inferNullability(connection)
|
||||
}
|
||||
}
|
||||
Vendored
+298
@@ -0,0 +1,298 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io.local
|
||||
|
||||
import io.kotest.matchers.shouldBe
|
||||
import org.intellij.lang.annotations.Language
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
|
||||
import org.jetbrains.kotlinx.dataframe.api.cast
|
||||
import org.jetbrains.kotlinx.dataframe.api.filter
|
||||
import org.jetbrains.kotlinx.dataframe.io.inferNullability
|
||||
import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
|
||||
import org.jetbrains.kotlinx.dataframe.io.readSqlQuery
|
||||
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
|
||||
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
|
||||
import org.junit.AfterClass
|
||||
import org.junit.BeforeClass
|
||||
import org.junit.Ignore
|
||||
import org.junit.Test
|
||||
import java.math.BigDecimal
|
||||
import java.sql.Connection
|
||||
import java.sql.DriverManager
|
||||
import java.sql.SQLException
|
||||
import java.util.Date
|
||||
import java.util.UUID
|
||||
import kotlin.reflect.typeOf
|
||||
|
||||
private const val URL = "jdbc:sqlserver://localhost:1433;encrypt=true;trustServerCertificate=true"
|
||||
private const val USER_NAME = "root"
|
||||
private const val PASSWORD = "pass"
|
||||
private const val TEST_DATABASE_NAME = "testKDFdatabase"
|
||||
|
||||
@DataSchema
|
||||
interface Table1MSSSQL {
|
||||
val id: Int
|
||||
val bigintColumn: Long
|
||||
val binaryColumn: ByteArray
|
||||
val bitColumn: Boolean
|
||||
val charColumn: Char
|
||||
val dateColumn: Date
|
||||
val datetime3Column: java.sql.Timestamp
|
||||
val datetime2Column: java.sql.Timestamp
|
||||
val datetimeoffset2Column: String
|
||||
val decimalColumn: BigDecimal
|
||||
val floatColumn: Double
|
||||
val imageColumn: ByteArray?
|
||||
val intColumn: Int
|
||||
val moneyColumn: BigDecimal
|
||||
val ncharColumn: Char
|
||||
val ntextColumn: String
|
||||
val numericColumn: BigDecimal
|
||||
val nvarcharColumn: String
|
||||
val nvarcharMaxColumn: String
|
||||
val realColumn: Float
|
||||
val smalldatetimeColumn: java.sql.Timestamp
|
||||
val smallintColumn: Int
|
||||
val smallmoneyColumn: BigDecimal
|
||||
val timeColumn: java.sql.Time
|
||||
val timestampColumn: java.sql.Timestamp
|
||||
val tinyintColumn: Int
|
||||
val uniqueidentifierColumn: Char
|
||||
val varbinaryColumn: ByteArray
|
||||
val varbinaryMaxColumn: ByteArray
|
||||
val varcharColumn: String
|
||||
val varcharMaxColumn: String
|
||||
val xmlColumn: String
|
||||
val sqlvariantColumn: String
|
||||
val geometryColumn: String
|
||||
val geographyColumn: String
|
||||
}
|
||||
|
||||
@Ignore
|
||||
class MSSQLTest {
|
||||
companion object {
|
||||
private lateinit var connection: Connection
|
||||
|
||||
@BeforeClass
|
||||
@JvmStatic
|
||||
fun setUpClass() {
|
||||
connection = DriverManager.getConnection(URL, USER_NAME, PASSWORD)
|
||||
|
||||
connection.createStatement().use { st ->
|
||||
// Drop the test database if it exists
|
||||
val dropDatabaseQuery = "IF DB_ID('$TEST_DATABASE_NAME') IS NOT NULL\n" +
|
||||
"DROP DATABASE $TEST_DATABASE_NAME"
|
||||
st.executeUpdate(dropDatabaseQuery)
|
||||
|
||||
// Create the test database
|
||||
val createDatabaseQuery = "CREATE DATABASE $TEST_DATABASE_NAME"
|
||||
st.executeUpdate(createDatabaseQuery)
|
||||
|
||||
// Use the newly created database
|
||||
val useDatabaseQuery = "USE $TEST_DATABASE_NAME"
|
||||
st.executeUpdate(useDatabaseQuery)
|
||||
}
|
||||
|
||||
@Language("SQL")
|
||||
val createTableQuery = """
|
||||
CREATE TABLE Table1 (
|
||||
id INT NOT NULL IDENTITY PRIMARY KEY,
|
||||
bigintColumn BIGINT,
|
||||
binaryColumn BINARY(50),
|
||||
bitColumn BIT,
|
||||
charColumn CHAR(10),
|
||||
dateColumn DATE,
|
||||
datetime3Column DATETIME2(3),
|
||||
datetime2Column DATETIME2,
|
||||
datetimeoffset2Column DATETIMEOFFSET(2),
|
||||
decimalColumn DECIMAL(10,2),
|
||||
floatColumn FLOAT,
|
||||
imageColumn IMAGE,
|
||||
intColumn INT,
|
||||
moneyColumn MONEY,
|
||||
ncharColumn NCHAR(10),
|
||||
ntextColumn NTEXT,
|
||||
numericColumn NUMERIC(10,2),
|
||||
nvarcharColumn NVARCHAR(50),
|
||||
nvarcharMaxColumn NVARCHAR(MAX),
|
||||
realColumn REAL,
|
||||
smalldatetimeColumn SMALLDATETIME,
|
||||
smallintColumn SMALLINT,
|
||||
smallmoneyColumn SMALLMONEY,
|
||||
textColumn TEXT,
|
||||
timeColumn TIME,
|
||||
timestampColumn DATETIME2,
|
||||
tinyintColumn TINYINT,
|
||||
uniqueidentifierColumn UNIQUEIDENTIFIER,
|
||||
varbinaryColumn VARBINARY(50),
|
||||
varbinaryMaxColumn VARBINARY(MAX),
|
||||
varcharColumn VARCHAR(50),
|
||||
varcharMaxColumn VARCHAR(MAX),
|
||||
xmlColumn XML,
|
||||
sqlvariantColumn SQL_VARIANT,
|
||||
geometryColumn GEOMETRY,
|
||||
geographyColumn GEOGRAPHY
|
||||
);
|
||||
"""
|
||||
|
||||
connection.createStatement().execute(createTableQuery.trimIndent())
|
||||
|
||||
@Language("SQL")
|
||||
val insertData1 =
|
||||
"""
|
||||
INSERT INTO Table1 (
|
||||
bigintColumn, binaryColumn, bitColumn, charColumn, dateColumn, datetime3Column, datetime2Column,
|
||||
datetimeoffset2Column, decimalColumn, floatColumn, imageColumn, intColumn, moneyColumn, ncharColumn,
|
||||
ntextColumn, numericColumn, nvarcharColumn, nvarcharMaxColumn, realColumn, smalldatetimeColumn,
|
||||
smallintColumn, smallmoneyColumn, textColumn, timeColumn, timestampColumn, tinyintColumn,
|
||||
uniqueidentifierColumn, varbinaryColumn, varbinaryMaxColumn, varcharColumn, varcharMaxColumn,
|
||||
xmlColumn, sqlvariantColumn, geometryColumn, geographyColumn
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""".trimIndent()
|
||||
|
||||
connection.prepareStatement(insertData1).use { st ->
|
||||
for (i in 1..5) {
|
||||
st.setLong(1, 123456789012345L) // bigintColumn
|
||||
st.setBytes(2, byteArrayOf(0x01, 0x23, 0x45, 0x67, 0x67, 0x67, 0x67, 0x67)) // binaryColumn
|
||||
st.setBoolean(3, true) // bitColumn
|
||||
st.setString(4, "Sample") // charColumn
|
||||
st.setDate(5, java.sql.Date(System.currentTimeMillis())) // dateColumn
|
||||
st.setTimestamp(6, java.sql.Timestamp(System.currentTimeMillis())) // datetime3Column
|
||||
st.setTimestamp(7, java.sql.Timestamp(System.currentTimeMillis())) // datetime2Column
|
||||
st.setTimestamp(8, java.sql.Timestamp(System.currentTimeMillis())) // datetimeoffset2Column
|
||||
st.setBigDecimal(9, BigDecimal("12345.67")) // decimalColumn
|
||||
st.setFloat(10, 123.45f) // floatColumn
|
||||
st.setNull(11, java.sql.Types.NULL) // imageColumn (assuming nullable)
|
||||
st.setInt(12, 123456) // intColumn
|
||||
st.setBigDecimal(13, BigDecimal("123.45")) // moneyColumn
|
||||
st.setString(14, "Sample") // ncharColumn
|
||||
st.setString(15, "Sample$i text") // ntextColumn
|
||||
st.setBigDecimal(16, BigDecimal("1234.56")) // numericColumn
|
||||
st.setString(17, "Sample") // nvarcharColumn
|
||||
st.setString(18, "Sample$i text") // nvarcharMaxColumn
|
||||
st.setFloat(19, 123.45f) // realColumn
|
||||
st.setTimestamp(20, java.sql.Timestamp(System.currentTimeMillis())) // smalldatetimeColumn
|
||||
st.setInt(21, 123) // smallintColumn
|
||||
st.setBigDecimal(22, BigDecimal("123.45")) // smallmoneyColumn
|
||||
st.setString(23, "Sample$i text") // textColumn
|
||||
st.setTime(24, java.sql.Time(System.currentTimeMillis())) // timeColumn
|
||||
st.setTimestamp(25, java.sql.Timestamp(System.currentTimeMillis())) // timestampColumn
|
||||
st.setInt(26, 123) // tinyintColumn
|
||||
// st.setObject(27, null) // udtColumn (assuming nullable)
|
||||
st.setObject(27, UUID.randomUUID()) // uniqueidentifierColumn
|
||||
st.setBytes(28, byteArrayOf(0x01, 0x23, 0x45, 0x67, 0x67, 0x67, 0x67, 0x67)) // varbinaryColumn
|
||||
st.setBytes(29, byteArrayOf(0x01, 0x23, 0x45, 0x67, 0x67, 0x67, 0x67, 0x67)) // varbinaryMaxColumn
|
||||
st.setString(30, "Sample$i") // varcharColumn
|
||||
st.setString(31, "Sample$i text") // varcharMaxColumn
|
||||
st.setString(32, "<xml>Sample$i</xml>") // xmlColumn
|
||||
st.setString(33, "SQL_VARIANT") // sqlvariantColumn
|
||||
st.setBytes(
|
||||
34,
|
||||
@Suppress("ktlint:standard:argument-list-wrapping")
|
||||
byteArrayOf(
|
||||
0xE6.toByte(), 0x10, 0x00, 0x00, 0x01, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
|
||||
0x44, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x05, 0x4C, 0x0,
|
||||
),
|
||||
) // geometryColumn
|
||||
st.setString(35, "POINT(1 1)") // geographyColumn
|
||||
st.executeUpdate()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
@JvmStatic
|
||||
fun tearDownClass() {
|
||||
try {
|
||||
connection.createStatement().use { st -> st.execute("DROP DATABASE IF EXISTS $TEST_DATABASE_NAME") }
|
||||
connection.close()
|
||||
} catch (e: SQLException) {
|
||||
e.printStackTrace()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `basic test for reading sql tables`() {
|
||||
val df1 = DataFrame.readSqlTable(connection, "table1", limit = 5).cast<Table1MSSSQL>()
|
||||
|
||||
val result = df1.filter { it[Table1MSSSQL::id] == 1 }
|
||||
result[0][30] shouldBe "Sample1"
|
||||
result[0][Table1MSSSQL::bigintColumn] shouldBe 123456789012345L
|
||||
result[0][Table1MSSSQL::bitColumn] shouldBe true
|
||||
result[0][Table1MSSSQL::intColumn] shouldBe 123456
|
||||
result[0][Table1MSSSQL::ntextColumn] shouldBe "Sample1 text"
|
||||
|
||||
val schema = DataFrameSchema.readSqlTable(connection, "table1")
|
||||
schema.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["bigintColumn"]!!.type shouldBe typeOf<Long?>()
|
||||
schema.columns["binaryColumn"]!!.type shouldBe typeOf<ByteArray?>()
|
||||
schema.columns["bitColumn"]!!.type shouldBe typeOf<Boolean?>()
|
||||
schema.columns["charColumn"]!!.type shouldBe typeOf<Char?>()
|
||||
schema.columns["dateColumn"]!!.type shouldBe typeOf<Date?>()
|
||||
schema.columns["datetime3Column"]!!.type shouldBe typeOf<java.sql.Timestamp?>()
|
||||
schema.columns["datetime2Column"]!!.type shouldBe typeOf<java.sql.Timestamp?>()
|
||||
schema.columns["datetimeoffset2Column"]!!.type shouldBe typeOf<String?>()
|
||||
schema.columns["decimalColumn"]!!.type shouldBe typeOf<BigDecimal?>()
|
||||
schema.columns["floatColumn"]!!.type shouldBe typeOf<Double?>()
|
||||
schema.columns["imageColumn"]!!.type shouldBe typeOf<ByteArray?>()
|
||||
schema.columns["intColumn"]!!.type shouldBe typeOf<Int?>()
|
||||
schema.columns["moneyColumn"]!!.type shouldBe typeOf<BigDecimal?>()
|
||||
schema.columns["ncharColumn"]!!.type shouldBe typeOf<Char?>()
|
||||
schema.columns["ntextColumn"]!!.type shouldBe typeOf<String?>()
|
||||
schema.columns["numericColumn"]!!.type shouldBe typeOf<BigDecimal?>()
|
||||
schema.columns["nvarcharColumn"]!!.type shouldBe typeOf<String?>()
|
||||
schema.columns["nvarcharMaxColumn"]!!.type shouldBe typeOf<String?>()
|
||||
schema.columns["realColumn"]!!.type shouldBe typeOf<Float?>()
|
||||
schema.columns["smalldatetimeColumn"]!!.type shouldBe typeOf<java.sql.Timestamp?>()
|
||||
schema.columns["smallintColumn"]!!.type shouldBe typeOf<Int?>()
|
||||
schema.columns["smallmoneyColumn"]!!.type shouldBe typeOf<BigDecimal?>()
|
||||
schema.columns["timeColumn"]!!.type shouldBe typeOf<java.sql.Time?>()
|
||||
schema.columns["timestampColumn"]!!.type shouldBe typeOf<java.sql.Timestamp?>()
|
||||
schema.columns["tinyintColumn"]!!.type shouldBe typeOf<Int?>()
|
||||
schema.columns["uniqueidentifierColumn"]!!.type shouldBe typeOf<Char?>()
|
||||
schema.columns["varbinaryColumn"]!!.type shouldBe typeOf<ByteArray?>()
|
||||
schema.columns["varbinaryMaxColumn"]!!.type shouldBe typeOf<ByteArray?>()
|
||||
schema.columns["varcharColumn"]!!.type shouldBe typeOf<String?>()
|
||||
schema.columns["varcharMaxColumn"]!!.type shouldBe typeOf<String?>()
|
||||
schema.columns["xmlColumn"]!!.type shouldBe typeOf<String?>()
|
||||
schema.columns["sqlvariantColumn"]!!.type shouldBe typeOf<String?>()
|
||||
schema.columns["geometryColumn"]!!.type shouldBe typeOf<String?>()
|
||||
schema.columns["geographyColumn"]!!.type shouldBe typeOf<String?>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from sql query`() {
|
||||
@Language("SQL")
|
||||
val sqlQuery =
|
||||
"""
|
||||
SELECT
|
||||
Table1.id,
|
||||
Table1.bigintColumn
|
||||
FROM Table1
|
||||
""".trimIndent()
|
||||
|
||||
val df = DataFrame.readSqlQuery(connection, sqlQuery = sqlQuery, limit = 3).cast<Table1MSSSQL>()
|
||||
val result = df.filter { it[Table1MSSSQL::id] == 1 }
|
||||
result[0][Table1MSSSQL::bigintColumn] shouldBe 123456789012345L
|
||||
|
||||
val schema = DataFrameSchema.readSqlQuery(connection, sqlQuery = sqlQuery)
|
||||
schema.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["bigintColumn"]!!.type shouldBe typeOf<Long?>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from all tables`() {
|
||||
val dataframes = DataFrame.readAllSqlTables(connection, TEST_DATABASE_NAME, 4).values.toList()
|
||||
|
||||
val table1Df = dataframes[0].cast<Table1MSSSQL>()
|
||||
|
||||
table1Df.rowsCount() shouldBe 4
|
||||
table1Df.filter { it[Table1MSSSQL::id] > 2 }.rowsCount() shouldBe 2
|
||||
table1Df[0][Table1MSSSQL::bigintColumn] shouldBe 123456789012345L
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `infer nullability`() {
|
||||
inferNullability(connection)
|
||||
}
|
||||
}
|
||||
Vendored
+479
@@ -0,0 +1,479 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io.local
|
||||
|
||||
import io.kotest.matchers.shouldBe
|
||||
import org.intellij.lang.annotations.Language
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
|
||||
import org.jetbrains.kotlinx.dataframe.api.add
|
||||
import org.jetbrains.kotlinx.dataframe.api.cast
|
||||
import org.jetbrains.kotlinx.dataframe.api.filter
|
||||
import org.jetbrains.kotlinx.dataframe.api.select
|
||||
import org.jetbrains.kotlinx.dataframe.io.inferNullability
|
||||
import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
|
||||
import org.jetbrains.kotlinx.dataframe.io.readSqlQuery
|
||||
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
|
||||
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
|
||||
import org.junit.AfterClass
|
||||
import org.junit.BeforeClass
|
||||
import org.junit.Ignore
|
||||
import org.junit.Test
|
||||
import java.math.BigDecimal
|
||||
import java.sql.Connection
|
||||
import java.sql.DriverManager
|
||||
import java.sql.SQLException
|
||||
import java.util.Date
|
||||
import kotlin.reflect.typeOf
|
||||
|
||||
private const val URL = "jdbc:mysql://localhost:3306"
|
||||
private const val USER_NAME = "root"
|
||||
private const val PASSWORD = "pass"
|
||||
private const val TEST_DATABASE_NAME = "testKDFdatabase"
|
||||
private const val TIMESTAMP = 1726246245460
|
||||
|
||||
@DataSchema
|
||||
interface Table1MySql {
|
||||
val id: Int
|
||||
val bitCol: Boolean
|
||||
val tinyintCol: Int
|
||||
val smallintCol: Int
|
||||
val mediumintCol: Int
|
||||
val mediumintUnsignedCol: Int
|
||||
val integerCol: Int
|
||||
val intCol: Int
|
||||
val integerUnsignedCol: Long
|
||||
val bigintCol: Long
|
||||
val floatCol: Float
|
||||
val doubleCol: Double
|
||||
val decimalCol: BigDecimal
|
||||
val dateCol: String
|
||||
val datetimeCol: String
|
||||
val timestampCol: String
|
||||
val timeCol: String
|
||||
val yearCol: String
|
||||
val varcharCol: String
|
||||
val charCol: String
|
||||
val binaryCol: ByteArray
|
||||
val varbinaryCol: ByteArray
|
||||
val tinyblobCol: ByteArray
|
||||
val blobCol: ByteArray
|
||||
val mediumblobCol: ByteArray
|
||||
val longblobCol: ByteArray
|
||||
val textCol: String
|
||||
val mediumtextCol: String
|
||||
val longtextCol: String
|
||||
val enumCol: String
|
||||
val setCol: Char
|
||||
}
|
||||
|
||||
@DataSchema
|
||||
interface Table2MySql {
|
||||
val id: Int
|
||||
val bitCol: Boolean?
|
||||
val tinyintCol: Int?
|
||||
val smallintCol: Int?
|
||||
val mediumintCol: Int?
|
||||
val mediumintUnsignedCol: Int?
|
||||
val integerCol: Int?
|
||||
val intCol: Int?
|
||||
val integerUnsignedCol: Long?
|
||||
val bigintCol: Long?
|
||||
val floatCol: Float?
|
||||
val doubleCol: Double?
|
||||
val decimalCol: Double?
|
||||
val dateCol: String?
|
||||
val datetimeCol: String?
|
||||
val timestampCol: String?
|
||||
val timeCol: String?
|
||||
val yearCol: String?
|
||||
val varcharCol: String?
|
||||
val charCol: String?
|
||||
val binaryCol: ByteArray?
|
||||
val varbinaryCol: ByteArray?
|
||||
val tinyblobCol: ByteArray?
|
||||
val blobCol: ByteArray?
|
||||
val mediumblobCol: ByteArray?
|
||||
val longblobCol: ByteArray?
|
||||
val textCol: String?
|
||||
val mediumtextCol: String?
|
||||
val longtextCol: String?
|
||||
val enumCol: String?
|
||||
val setCol: Char?
|
||||
val jsonCol: String?
|
||||
}
|
||||
|
||||
@DataSchema
|
||||
interface Table3MySql {
|
||||
val id: Int
|
||||
val enumCol: String
|
||||
val setCol: Char?
|
||||
}
|
||||
|
||||
@Ignore
|
||||
class MySqlTest {
|
||||
companion object {
|
||||
private lateinit var connection: Connection
|
||||
|
||||
@BeforeClass
|
||||
@JvmStatic
|
||||
fun setUpClass() {
|
||||
connection = DriverManager.getConnection(URL, USER_NAME, PASSWORD)
|
||||
|
||||
connection.createStatement().use { st ->
|
||||
// Drop the test database if it exists
|
||||
val dropDatabaseQuery = "DROP DATABASE IF EXISTS $TEST_DATABASE_NAME"
|
||||
st.executeUpdate(dropDatabaseQuery)
|
||||
|
||||
// Create the test database
|
||||
val createDatabaseQuery = "CREATE DATABASE $TEST_DATABASE_NAME"
|
||||
st.executeUpdate(createDatabaseQuery)
|
||||
|
||||
// Use the newly created database
|
||||
val useDatabaseQuery = "USE $TEST_DATABASE_NAME"
|
||||
st.executeUpdate(useDatabaseQuery)
|
||||
}
|
||||
|
||||
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table1") }
|
||||
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table2") }
|
||||
|
||||
@Language("SQL")
|
||||
val createTableQuery = """
|
||||
CREATE TABLE IF NOT EXISTS table1 (
|
||||
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||
bitCol BIT NOT NULL,
|
||||
tinyintCol TINYINT NOT NULL,
|
||||
smallintCol SMALLINT NOT NULL,
|
||||
mediumintCol MEDIUMINT NOT NULL,
|
||||
mediumintUnsignedCol MEDIUMINT UNSIGNED NOT NULL,
|
||||
integerCol INTEGER NOT NULL,
|
||||
intCol INT NOT NULL,
|
||||
integerUnsignedCol INTEGER UNSIGNED NOT NULL,
|
||||
bigintCol BIGINT NOT NULL,
|
||||
floatCol FLOAT NOT NULL,
|
||||
doubleCol DOUBLE NOT NULL,
|
||||
decimalCol DECIMAL NOT NULL,
|
||||
dateCol DATE NOT NULL,
|
||||
datetimeCol DATETIME NOT NULL,
|
||||
timestampCol TIMESTAMP NOT NULL,
|
||||
timeCol TIME NOT NULL,
|
||||
yearCol YEAR NOT NULL,
|
||||
varcharCol VARCHAR(255) NOT NULL,
|
||||
charCol CHAR(10) NOT NULL,
|
||||
binaryCol BINARY(64) NOT NULL,
|
||||
varbinaryCol VARBINARY(128) NOT NULL,
|
||||
tinyblobCol TINYBLOB NOT NULL,
|
||||
blobCol BLOB NOT NULL,
|
||||
mediumblobCol MEDIUMBLOB NOT NULL ,
|
||||
longblobCol LONGBLOB NOT NULL,
|
||||
textCol TEXT NOT NULL,
|
||||
mediumtextCol MEDIUMTEXT NOT NULL,
|
||||
longtextCol LONGTEXT NOT NULL,
|
||||
enumCol ENUM('Value1', 'Value2', 'Value3') NOT NULL,
|
||||
setCol SET('Option1', 'Option2', 'Option3') NOT NULL,
|
||||
location GEOMETRY,
|
||||
data JSON
|
||||
CHECK (JSON_VALID(data))
|
||||
)
|
||||
"""
|
||||
|
||||
connection.createStatement().execute(createTableQuery.trimIndent())
|
||||
|
||||
@Language("SQL")
|
||||
val createTableQuery2 = """
|
||||
CREATE TABLE IF NOT EXISTS table2 (
|
||||
id INT AUTO_INCREMENT PRIMARY KEY,
|
||||
bitCol BIT,
|
||||
tinyintCol TINYINT,
|
||||
smallintCol SMALLINT,
|
||||
mediumintCol MEDIUMINT,
|
||||
mediumintUnsignedCol MEDIUMINT UNSIGNED,
|
||||
integerCol INTEGER,
|
||||
intCol INT,
|
||||
integerUnsignedCol INTEGER UNSIGNED,
|
||||
bigintCol BIGINT,
|
||||
floatCol FLOAT,
|
||||
doubleCol DOUBLE,
|
||||
decimalCol DECIMAL,
|
||||
dateCol DATE,
|
||||
datetimeCol DATETIME,
|
||||
timestampCol TIMESTAMP,
|
||||
timeCol TIME,
|
||||
yearCol YEAR,
|
||||
varcharCol VARCHAR(255),
|
||||
charCol CHAR(10),
|
||||
binaryCol BINARY(64),
|
||||
varbinaryCol VARBINARY(128),
|
||||
tinyblobCol TINYBLOB,
|
||||
blobCol BLOB,
|
||||
mediumblobCol MEDIUMBLOB,
|
||||
longblobCol LONGBLOB,
|
||||
textCol TEXT,
|
||||
mediumtextCol MEDIUMTEXT,
|
||||
longtextCol LONGTEXT,
|
||||
enumCol ENUM('Value1', 'Value2', 'Value3'),
|
||||
setCol SET('Option1', 'Option2', 'Option3'),
|
||||
location GEOMETRY,
|
||||
data JSON
|
||||
CHECK (JSON_VALID(data))
|
||||
)
|
||||
"""
|
||||
|
||||
connection.createStatement().execute(createTableQuery2.trimIndent())
|
||||
|
||||
@Language("SQL")
|
||||
val insertData1 =
|
||||
"""
|
||||
INSERT INTO table1 (
|
||||
bitCol, tinyintCol, smallintCol, mediumintCol, mediumintUnsignedCol, integerCol, intCol,
|
||||
integerUnsignedCol, bigintCol, floatCol, doubleCol, decimalCol, dateCol, datetimeCol, timestampCol,
|
||||
timeCol, yearCol, varcharCol, charCol, binaryCol, varbinaryCol, tinyblobCol, blobCol,
|
||||
mediumblobCol, longblobCol, textCol, mediumtextCol, longtextCol, enumCol, setCol, location, data
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ST_GeomFromText('POINT(1 1)'), ?)
|
||||
""".trimIndent()
|
||||
|
||||
@Language("SQL")
|
||||
val insertData2 =
|
||||
"""
|
||||
INSERT INTO table2 (
|
||||
bitCol, tinyintCol, smallintCol, mediumintCol, mediumintUnsignedCol, integerCol, intCol,
|
||||
integerUnsignedCol, bigintCol, floatCol, doubleCol, decimalCol, dateCol, datetimeCol, timestampCol,
|
||||
timeCol, yearCol, varcharCol, charCol, binaryCol, varbinaryCol, tinyblobCol, blobCol,
|
||||
mediumblobCol, longblobCol, textCol, mediumtextCol, longtextCol, enumCol, setCol, location, data
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ST_GeomFromText('POINT(1 1)'), ?)
|
||||
""".trimIndent()
|
||||
|
||||
connection.prepareStatement(insertData1).use { st ->
|
||||
// Insert data into table1
|
||||
for (i in 1..3) {
|
||||
st.setBoolean(1, true)
|
||||
st.setByte(2, i.toByte())
|
||||
st.setShort(3, (i * 10).toShort())
|
||||
st.setInt(4, i * 100)
|
||||
st.setInt(5, i * 100)
|
||||
st.setInt(6, i * 100)
|
||||
st.setInt(7, i * 100)
|
||||
st.setInt(8, i * 100)
|
||||
st.setInt(9, i * 100)
|
||||
st.setFloat(10, i * 10.0f)
|
||||
st.setDouble(11, i * 10.0)
|
||||
st.setBigDecimal(12, BigDecimal(i * 10))
|
||||
st.setDate(13, java.sql.Date(TIMESTAMP))
|
||||
st.setTimestamp(14, java.sql.Timestamp(TIMESTAMP))
|
||||
st.setTimestamp(15, java.sql.Timestamp(TIMESTAMP))
|
||||
st.setTime(16, java.sql.Time(TIMESTAMP))
|
||||
st.setInt(17, 2023)
|
||||
st.setString(18, "varcharValue$i")
|
||||
st.setString(19, "charValue$i")
|
||||
st.setBytes(20, "binaryValue".toByteArray())
|
||||
st.setBytes(21, "varbinaryValue".toByteArray())
|
||||
st.setBytes(22, "tinyblobValue".toByteArray())
|
||||
st.setBytes(23, "blobValue".toByteArray())
|
||||
st.setBytes(24, "mediumblobValue".toByteArray())
|
||||
st.setBytes(25, "longblobValue".toByteArray())
|
||||
st.setString(26, "textValue$i")
|
||||
st.setString(27, "mediumtextValue$i")
|
||||
st.setString(28, "longtextValue$i")
|
||||
st.setString(29, "Value$i")
|
||||
st.setString(30, "Option$i")
|
||||
st.setString(31, "{\"key\": \"value\"}")
|
||||
st.executeUpdate()
|
||||
}
|
||||
}
|
||||
|
||||
connection.prepareStatement(insertData2).use { st ->
|
||||
// Insert data into table2
|
||||
for (i in 1..3) {
|
||||
st.setBoolean(1, false)
|
||||
st.setByte(2, (i * 2).toByte())
|
||||
st.setShort(3, (i * 20).toShort())
|
||||
st.setInt(4, i * 200)
|
||||
st.setInt(5, i * 200)
|
||||
st.setInt(6, i * 200)
|
||||
st.setInt(7, i * 200)
|
||||
st.setInt(8, i * 200)
|
||||
st.setInt(9, i * 200)
|
||||
st.setFloat(10, i * 20.0f)
|
||||
st.setDouble(11, i * 20.0)
|
||||
st.setBigDecimal(12, BigDecimal(i * 20))
|
||||
st.setDate(13, java.sql.Date(TIMESTAMP))
|
||||
st.setTimestamp(14, java.sql.Timestamp(TIMESTAMP))
|
||||
st.setTimestamp(15, java.sql.Timestamp(TIMESTAMP))
|
||||
st.setTime(16, java.sql.Time(TIMESTAMP))
|
||||
st.setInt(17, 2023)
|
||||
st.setString(18, "varcharValue$i")
|
||||
st.setString(19, "charValue$i")
|
||||
st.setBytes(20, "binaryValue".toByteArray())
|
||||
st.setBytes(21, "varbinaryValue".toByteArray())
|
||||
st.setBytes(22, "tinyblobValue".toByteArray())
|
||||
st.setBytes(23, "blobValue".toByteArray())
|
||||
st.setBytes(24, "mediumblobValue".toByteArray())
|
||||
st.setBytes(25, "longblobValue".toByteArray())
|
||||
st.setString(26, null)
|
||||
st.setString(27, null)
|
||||
st.setString(28, "longtextValue$i")
|
||||
st.setString(29, "Value$i")
|
||||
st.setString(30, "Option$i")
|
||||
st.setString(31, "{\"key\": \"value\"}")
|
||||
st.executeUpdate()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
@JvmStatic
|
||||
fun tearDownClass() {
|
||||
try {
|
||||
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table1") }
|
||||
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table2") }
|
||||
connection.createStatement().use { st -> st.execute("DROP DATABASE IF EXISTS $TEST_DATABASE_NAME") }
|
||||
connection.close()
|
||||
} catch (e: SQLException) {
|
||||
e.printStackTrace()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `basic test for reading sql tables`() {
|
||||
val df1 = DataFrame.readSqlTable(connection, "table1").cast<Table1MySql>()
|
||||
val result = df1.filter { it[Table1MySql::id] == 1 }
|
||||
result[0][26] shouldBe "textValue1"
|
||||
result[0][22] shouldBe "tinyblobValue".toByteArray()
|
||||
|
||||
val schema = DataFrameSchema.readSqlTable(connection, "table1")
|
||||
schema.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["textCol"]!!.type shouldBe typeOf<String>()
|
||||
schema.columns["dateCol"]!!.type shouldBe typeOf<Date>()
|
||||
schema.columns["datetimeCol"]!!.type shouldBe typeOf<java.time.LocalDateTime>()
|
||||
schema.columns["timestampCol"]!!.type shouldBe typeOf<java.sql.Timestamp>()
|
||||
schema.columns["timeCol"]!!.type shouldBe typeOf<java.sql.Time>()
|
||||
schema.columns["yearCol"]!!.type shouldBe typeOf<Date>()
|
||||
schema.columns["textCol"]!!.type shouldBe typeOf<String>()
|
||||
schema.columns["varbinaryCol"]!!.type shouldBe typeOf<ByteArray>()
|
||||
schema.columns["binaryCol"]!!.type shouldBe typeOf<ByteArray>()
|
||||
schema.columns["longblobCol"]!!.type shouldBe typeOf<ByteArray>()
|
||||
schema.columns["tinyblobCol"]!!.type shouldBe typeOf<ByteArray>()
|
||||
|
||||
val df2 = DataFrame.readSqlTable(connection, "table2").cast<Table2MySql>()
|
||||
val result2 = df2.filter { it[Table2MySql::id] == 1 }
|
||||
result2[0][26] shouldBe null
|
||||
|
||||
val schema2 = DataFrameSchema.readSqlTable(connection, "table2")
|
||||
schema2.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
schema2.columns["textCol"]!!.type shouldBe typeOf<String?>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from sql query`() {
|
||||
@Language("SQL")
|
||||
val sqlQuery =
|
||||
"""
|
||||
SELECT
|
||||
t1.id,
|
||||
t1.enumCol,
|
||||
t2.setCol
|
||||
FROM table1 t1
|
||||
JOIN table2 t2 ON t1.id = t2.id
|
||||
""".trimIndent()
|
||||
|
||||
val df = DataFrame.readSqlQuery(connection, sqlQuery = sqlQuery).cast<Table3MySql>()
|
||||
val result = df.filter { it[Table3MySql::id] == 1 }
|
||||
result[0][2] shouldBe "Option1"
|
||||
|
||||
val schema = DataFrameSchema.readSqlQuery(connection, sqlQuery = sqlQuery)
|
||||
schema.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["enumCol"]!!.type shouldBe typeOf<String>()
|
||||
schema.columns["setCol"]!!.type shouldBe typeOf<String?>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from all tables`() {
|
||||
val dataframes = DataFrame.readAllSqlTables(connection).values.toList()
|
||||
|
||||
val table1Df = dataframes[0].cast<Table1MySql>()
|
||||
|
||||
table1Df.rowsCount() shouldBe 3
|
||||
table1Df.filter { it[Table1MySql::integerCol] > 100 }.rowsCount() shouldBe 2
|
||||
table1Df[0][11] shouldBe 10.0
|
||||
table1Df[0][26] shouldBe "textValue1"
|
||||
|
||||
val table2Df = dataframes[1].cast<Table2MySql>()
|
||||
|
||||
table2Df.rowsCount() shouldBe 3
|
||||
table2Df.filter {
|
||||
it[Table2MySql::integerCol] != null && it[Table2MySql::integerCol]!! > 400
|
||||
}.rowsCount() shouldBe 1
|
||||
table2Df[0][11] shouldBe 20.0
|
||||
table2Df[0][26] shouldBe null
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `reading numeric types`() {
|
||||
val df1 = DataFrame.readSqlTable(connection, "table1").cast<Table1MySql>()
|
||||
|
||||
val result = df1.select("tinyintCol").add("tinyintCol2") { it[Table1MySql::tinyintCol] }
|
||||
|
||||
result[0][1] shouldBe 1.toByte()
|
||||
|
||||
val result1 = df1.select("smallintCol")
|
||||
.add("smallintCol2") { it[Table1MySql::smallintCol] }
|
||||
|
||||
result1[0][1] shouldBe 10.toShort()
|
||||
|
||||
val result2 = df1.select("mediumintCol")
|
||||
.add("mediumintCol2") { it[Table1MySql::mediumintCol] }
|
||||
|
||||
result2[0][1] shouldBe 100
|
||||
|
||||
val result3 = df1.select("mediumintUnsignedCol")
|
||||
.add("mediumintUnsignedCol2") { it[Table1MySql::mediumintUnsignedCol] }
|
||||
|
||||
result3[0][1] shouldBe 100
|
||||
|
||||
val result4 = df1.select("integerUnsignedCol")
|
||||
.add("integerUnsignedCol2") { it[Table1MySql::integerUnsignedCol] }
|
||||
|
||||
result4[0][1] shouldBe 100L
|
||||
|
||||
val result5 = df1.select("bigintCol")
|
||||
.add("bigintCol2") { it[Table1MySql::bigintCol] }
|
||||
|
||||
result5[0][1] shouldBe 100
|
||||
|
||||
val result6 = df1.select("floatCol")
|
||||
.add("floatCol2") { it[Table1MySql::floatCol] }
|
||||
|
||||
result6[0][1] shouldBe 10.0f
|
||||
|
||||
val result7 = df1.select("doubleCol")
|
||||
.add("doubleCol2") { it[Table1MySql::doubleCol] }
|
||||
|
||||
result7[0][1] shouldBe 10.0
|
||||
|
||||
val result8 = df1.select("decimalCol")
|
||||
.add("decimalCol2") { it[Table1MySql::decimalCol] }
|
||||
|
||||
result8[0][1] shouldBe BigDecimal("10")
|
||||
|
||||
val schema = DataFrameSchema.readSqlTable(connection, "table1")
|
||||
|
||||
schema.columns["tinyintCol"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["smallintCol"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["mediumintCol"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["mediumintUnsignedCol"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["integerUnsignedCol"]!!.type shouldBe typeOf<Long>()
|
||||
schema.columns["bigintCol"]!!.type shouldBe typeOf<Long>()
|
||||
schema.columns["floatCol"]!!.type shouldBe typeOf<Float>()
|
||||
schema.columns["doubleCol"]!!.type shouldBe typeOf<Double>()
|
||||
schema.columns["decimalCol"]!!.type shouldBe typeOf<BigDecimal>()
|
||||
// TODO: all unsigned types
|
||||
// TODO: new mapping system based on class names
|
||||
// validation after mapping in getObject
|
||||
// getObject(i+1, type) catch getObject catch getString
|
||||
// add direct mapping to getString and other methods
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `infer nullability`() {
|
||||
inferNullability(connection)
|
||||
}
|
||||
}
|
||||
+108
@@ -0,0 +1,108 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io.local
|
||||
|
||||
import io.kotest.assertions.throwables.shouldThrow
|
||||
import io.kotest.matchers.shouldBe
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.api.cast
|
||||
import org.jetbrains.kotlinx.dataframe.api.filter
|
||||
import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig
|
||||
import org.jetbrains.kotlinx.dataframe.io.readDataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
|
||||
import org.junit.Ignore
|
||||
import org.junit.Test
|
||||
import java.sql.DriverManager
|
||||
|
||||
private const val URL_WITH_LOGIN_PASSWORD = "jdbc:postgresql://localhost:5432/test?" +
|
||||
"user=postgres&password=pass&connectTimeout=10&tcpKeepAlive=true"
|
||||
|
||||
private const val URL_NO_LOGIN_PASSWORD = "jdbc:postgresql://localhost:5432/test?connectTimeout=10&tcpKeepAlive=true"
|
||||
|
||||
private const val URL_WITH_PASSWORD =
|
||||
"jdbc:postgresql://localhost:5432/test?password=pass&connectTimeout=10&tcpKeepAlive=true"
|
||||
|
||||
private const val URL_WITH_LOGIN =
|
||||
"jdbc:postgresql://localhost:5432/test?user=postgres&connectTimeout=10&tcpKeepAlive=true"
|
||||
|
||||
private const val TABLE_NAME = "table1"
|
||||
|
||||
@Ignore
|
||||
class PostgresConnectionUrlTest {
|
||||
@Test
|
||||
fun `read from table with login and password in connection URL`() {
|
||||
DriverManager.getConnection(URL_WITH_LOGIN_PASSWORD).use { connection ->
|
||||
createTestData(connection)
|
||||
|
||||
val df1 = DataFrame.readSqlTable(connection, TABLE_NAME).cast<Table1>()
|
||||
val result1 = df1.filter { it[Table1::id] == 1 }
|
||||
|
||||
result1[0][2] shouldBe 11
|
||||
|
||||
val df2 = connection.readDataFrame(TABLE_NAME).cast<Table1>()
|
||||
val result2 = df2.filter { it[Table1::id] == 1 }
|
||||
|
||||
result2[0][2] shouldBe 11
|
||||
|
||||
clearTestData(connection)
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from table with login and password in connection URL for DBConfig`() {
|
||||
DriverManager.getConnection(URL_WITH_LOGIN_PASSWORD).use { connection ->
|
||||
createTestData(connection)
|
||||
|
||||
val dbConfig = DbConnectionConfig(URL_WITH_LOGIN_PASSWORD)
|
||||
val df1 = DataFrame.readSqlTable(dbConfig = dbConfig, TABLE_NAME).cast<Table1>()
|
||||
val result1 = df1.filter { it[Table1::id] == 1 }
|
||||
|
||||
result1[0][2] shouldBe 11
|
||||
|
||||
val df2 = dbConfig.readDataFrame(TABLE_NAME).cast<Table1>()
|
||||
val result2 = df2.filter { it[Table1::id] == 1 }
|
||||
|
||||
result2[0][2] shouldBe 11
|
||||
|
||||
clearTestData(connection)
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from table without login and password`() {
|
||||
val dbConfig = DbConnectionConfig(URL_NO_LOGIN_PASSWORD)
|
||||
|
||||
shouldThrow<org.postgresql.util.PSQLException> {
|
||||
testReadFromTable(dbConfig)
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from table with password only`() {
|
||||
val dbConfig = DbConnectionConfig(URL_WITH_PASSWORD)
|
||||
|
||||
shouldThrow<org.postgresql.util.PSQLException> {
|
||||
testReadFromTable(dbConfig)
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from table with login only`() {
|
||||
val dbConfig = DbConnectionConfig(URL_WITH_LOGIN)
|
||||
|
||||
shouldThrow<org.postgresql.util.PSQLException> {
|
||||
testReadFromTable(dbConfig)
|
||||
}
|
||||
}
|
||||
|
||||
private fun testReadFromTable(dbConfig: DbConnectionConfig) {
|
||||
DriverManager.getConnection(URL_WITH_LOGIN_PASSWORD).use { connection ->
|
||||
createTestData(connection)
|
||||
|
||||
val df2 = dbConfig.readDataFrame(TABLE_NAME).cast<Table1>()
|
||||
val result2 = df2.filter { it[Table1::id] == 1 }
|
||||
|
||||
result2[0][2] shouldBe 11
|
||||
|
||||
clearTestData(connection)
|
||||
}
|
||||
}
|
||||
}
|
||||
Vendored
+421
@@ -0,0 +1,421 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io.local
|
||||
|
||||
import io.kotest.matchers.shouldBe
|
||||
import org.intellij.lang.annotations.Language
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
|
||||
import org.jetbrains.kotlinx.dataframe.api.add
|
||||
import org.jetbrains.kotlinx.dataframe.api.cast
|
||||
import org.jetbrains.kotlinx.dataframe.api.filter
|
||||
import org.jetbrains.kotlinx.dataframe.api.select
|
||||
import org.jetbrains.kotlinx.dataframe.io.inferNullability
|
||||
import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
|
||||
import org.jetbrains.kotlinx.dataframe.io.readSqlQuery
|
||||
import org.jetbrains.kotlinx.dataframe.io.readSqlTable
|
||||
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
|
||||
import org.junit.AfterClass
|
||||
import org.junit.BeforeClass
|
||||
import org.junit.Ignore
|
||||
import org.junit.Test
|
||||
import org.postgresql.geometric.PGbox
|
||||
import org.postgresql.geometric.PGcircle
|
||||
import org.postgresql.geometric.PGline
|
||||
import org.postgresql.geometric.PGlseg
|
||||
import org.postgresql.geometric.PGpath
|
||||
import org.postgresql.geometric.PGpoint
|
||||
import org.postgresql.geometric.PGpolygon
|
||||
import org.postgresql.util.PGInterval
|
||||
import org.postgresql.util.PGobject
|
||||
import java.math.BigDecimal
|
||||
import java.sql.Connection
|
||||
import java.sql.Date
|
||||
import java.sql.DriverManager
|
||||
import java.sql.SQLException
|
||||
import java.sql.Time
|
||||
import java.sql.Timestamp
|
||||
import java.sql.Types
|
||||
import java.util.UUID
|
||||
import kotlin.reflect.typeOf
|
||||
|
||||
private const val BASIC_URL = "jdbc:postgresql://localhost:5432/test"
|
||||
private const val USER_NAME = "postgres"
|
||||
private const val PASSWORD = "pass"
|
||||
|
||||
@DataSchema
|
||||
interface Table1 {
|
||||
val id: Int
|
||||
val bigintcol: Long
|
||||
val smallintcol: Int
|
||||
val bigserialcol: Long
|
||||
val booleancol: Boolean
|
||||
val boxcol: String
|
||||
val byteacol: ByteArray
|
||||
val charactercol: String
|
||||
val characterncol: String
|
||||
val charcol: String
|
||||
val circlecol: String
|
||||
val datecol: java.sql.Date
|
||||
val doublecol: Double
|
||||
val integercol: Int?
|
||||
val intervalcol: String
|
||||
val jsoncol: String
|
||||
val jsonbcol: String
|
||||
}
|
||||
|
||||
@DataSchema
|
||||
interface Table2 {
|
||||
val id: Int
|
||||
val linecol: org.postgresql.geometric.PGline
|
||||
val lsegcol: String
|
||||
val macaddrcol: String
|
||||
val moneycol: String
|
||||
val numericcol: BigDecimal
|
||||
val pathcol: org.postgresql.geometric.PGpath
|
||||
val pointcol: String
|
||||
val polygoncol: String
|
||||
val realcol: Float
|
||||
val smallintcol: Int
|
||||
val smallserialcol: Int
|
||||
val serialcol: Int
|
||||
val textcol: String?
|
||||
val timecol: String
|
||||
val timewithzonecol: String
|
||||
val timestampcol: String
|
||||
val timestampwithzonecol: String
|
||||
val uuidcol: String
|
||||
val xmlcol: String
|
||||
}
|
||||
|
||||
@DataSchema
|
||||
interface ViewTable {
|
||||
val id: Int
|
||||
val bigintcol: Long
|
||||
val linecol: String
|
||||
val textCol: String?
|
||||
}
|
||||
|
||||
internal fun createTestData(connection: Connection) {
|
||||
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table1") }
|
||||
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table2") }
|
||||
|
||||
val createTableStatement = """
|
||||
CREATE TABLE IF NOT EXISTS table1 (
|
||||
id serial PRIMARY KEY,
|
||||
bigintCol bigint not null,
|
||||
smallintCol smallint not null,
|
||||
bigserialCol bigserial not null,
|
||||
booleanCol boolean not null,
|
||||
boxCol box not null,
|
||||
byteaCol bytea not null,
|
||||
characterCol character not null,
|
||||
characterNCol character(10) not null,
|
||||
charCol char not null,
|
||||
circleCol circle not null,
|
||||
dateCol date not null,
|
||||
doubleCol double precision not null,
|
||||
integerCol integer,
|
||||
intervalCol interval not null,
|
||||
jsonCol json not null,
|
||||
jsonbCol jsonb not null,
|
||||
intArrayCol integer[],
|
||||
doubleArrayCol double precision array,
|
||||
dateArrayCol date array,
|
||||
textArrayCol text array,
|
||||
booleanArrayCol boolean array
|
||||
)
|
||||
"""
|
||||
connection.createStatement().execute(createTableStatement.trimIndent())
|
||||
|
||||
val createTableQuery = """
|
||||
CREATE TABLE IF NOT EXISTS table2 (
|
||||
id serial PRIMARY KEY,
|
||||
lineCol line not null,
|
||||
lsegCol lseg not null,
|
||||
macaddrCol macaddr not null,
|
||||
moneyCol money not null,
|
||||
numericCol numeric not null,
|
||||
pathCol path not null,
|
||||
pointCol point not null,
|
||||
polygonCol polygon not null,
|
||||
realCol real not null,
|
||||
smallintCol smallint not null,
|
||||
smallserialCol smallserial not null,
|
||||
serialCol serial not null,
|
||||
textCol text,
|
||||
timeCol time not null,
|
||||
timeWithZoneCol time with time zone not null,
|
||||
timestampCol timestamp not null,
|
||||
timestampWithZoneCol timestamp with time zone not null,
|
||||
uuidCol uuid not null,
|
||||
xmlCol xml not null
|
||||
)
|
||||
"""
|
||||
connection.createStatement().execute(createTableQuery.trimIndent())
|
||||
|
||||
@Language("SQL")
|
||||
val insertData1 = """
|
||||
INSERT INTO table1 (
|
||||
bigintCol, smallintCol, bigserialCol, booleanCol,
|
||||
boxCol, byteaCol, characterCol, characterNCol, charCol,
|
||||
circleCol, dateCol, doubleCol,
|
||||
integerCol, intervalCol, jsonCol, jsonbCol, intArrayCol,
|
||||
doubleArrayCol, dateArrayCol, textArrayCol, booleanArrayCol
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
"""
|
||||
|
||||
@Language("SQL")
|
||||
val insertData2 = """
|
||||
INSERT INTO table2 (
|
||||
lineCol, lsegCol, macaddrCol, moneyCol, numericCol,
|
||||
pathCol, pointCol, polygonCol, realCol, smallintCol,
|
||||
smallserialCol, serialCol, textCol, timeCol,
|
||||
timeWithZoneCol, timestampCol, timestampWithZoneCol,
|
||||
uuidCol, xmlCol
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
"""
|
||||
|
||||
val intArray = connection.createArrayOf("INTEGER", arrayOf(1, 2, 3))
|
||||
val doubleArray = connection.createArrayOf("DOUBLE", arrayOf(1.1, 2.2, 3.3))
|
||||
val dateArray = connection.createArrayOf("DATE", arrayOf(Date.valueOf("2023-08-01"), Date.valueOf("2023-08-02")))
|
||||
val textArray = connection.createArrayOf("TEXT", arrayOf("Hello", "World"))
|
||||
val booleanArray = connection.createArrayOf("BOOLEAN", arrayOf(true, false, true))
|
||||
|
||||
connection.prepareStatement(insertData1).use { st ->
|
||||
// Insert data into table1
|
||||
for (i in 1..3) {
|
||||
st.setLong(1, i * 1000L)
|
||||
st.setShort(2, 11.toShort())
|
||||
st.setLong(3, 1000000000L + i)
|
||||
st.setBoolean(4, i % 2 == 1)
|
||||
st.setObject(5, PGbox("(1,1),(2,2)"))
|
||||
st.setBytes(6, byteArrayOf(1, 2, 3))
|
||||
st.setString(7, "A")
|
||||
st.setString(8, "Hello")
|
||||
st.setString(9, "A")
|
||||
st.setObject(10, PGcircle("<(1,2),3>"))
|
||||
st.setDate(11, Date.valueOf("2023-08-01"))
|
||||
st.setDouble(12, 12.34)
|
||||
st.setInt(13, 12345 * i)
|
||||
st.setObject(14, PGInterval("1 year"))
|
||||
|
||||
val jsonbObject = PGobject()
|
||||
jsonbObject.type = "jsonb"
|
||||
jsonbObject.value = "{\"key\": \"value\"}"
|
||||
|
||||
st.setObject(15, jsonbObject)
|
||||
st.setObject(16, jsonbObject)
|
||||
st.setArray(17, intArray)
|
||||
st.setArray(18, doubleArray)
|
||||
st.setArray(19, dateArray)
|
||||
st.setArray(20, textArray)
|
||||
st.setArray(21, booleanArray)
|
||||
st.executeUpdate()
|
||||
}
|
||||
}
|
||||
|
||||
connection.prepareStatement(insertData2).use { st ->
|
||||
// Insert data into table2
|
||||
for (i in 1..3) {
|
||||
st.setObject(1, PGline("{1,2,3}"))
|
||||
st.setObject(2, PGlseg("[(-1,0),(1,0)]"))
|
||||
|
||||
val macaddrObject = PGobject()
|
||||
macaddrObject.type = "macaddr"
|
||||
macaddrObject.value = "00:00:00:00:00:0$i"
|
||||
|
||||
st.setObject(3, macaddrObject)
|
||||
st.setBigDecimal(4, BigDecimal("123.45"))
|
||||
st.setBigDecimal(5, BigDecimal("12.34"))
|
||||
st.setObject(6, PGpath("((1,2),(3,$i))"))
|
||||
st.setObject(7, PGpoint("(1,2)"))
|
||||
st.setObject(8, PGpolygon("((1,1),(2,2),(3,3))"))
|
||||
st.setFloat(9, 12.34f)
|
||||
st.setShort(10, (i * 100).toShort())
|
||||
st.setInt(11, 1000 + i)
|
||||
st.setInt(12, 1000000 + i)
|
||||
st.setString(13, null)
|
||||
st.setTime(14, Time.valueOf("12:34:56"))
|
||||
|
||||
st.setTimestamp(15, Timestamp(System.currentTimeMillis()))
|
||||
st.setTimestamp(16, Timestamp(System.currentTimeMillis()))
|
||||
st.setTimestamp(17, Timestamp(System.currentTimeMillis()))
|
||||
|
||||
st.setObject(18, UUID.randomUUID(), Types.OTHER)
|
||||
val xmlObject = PGobject()
|
||||
xmlObject.type = "xml"
|
||||
xmlObject.value = "<root><element>data</element></root>"
|
||||
|
||||
st.setObject(19, xmlObject)
|
||||
st.executeUpdate()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
internal fun clearTestData(connection: Connection) {
|
||||
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table1") }
|
||||
connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table2") }
|
||||
}
|
||||
|
||||
@Ignore
|
||||
class PostgresTest {
|
||||
companion object {
|
||||
private lateinit var connection: Connection
|
||||
|
||||
@BeforeClass
|
||||
@JvmStatic
|
||||
fun setUpClass() {
|
||||
connection = DriverManager.getConnection(BASIC_URL, USER_NAME, PASSWORD)
|
||||
createTestData(connection)
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
@JvmStatic
|
||||
fun tearDownClass() {
|
||||
try {
|
||||
clearTestData(connection)
|
||||
connection.close()
|
||||
} catch (e: SQLException) {
|
||||
e.printStackTrace()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from tables`() {
|
||||
val tableName1 = "table1"
|
||||
val df1 = DataFrame.readSqlTable(connection, tableName1).cast<Table1>()
|
||||
val result = df1.filter { it[Table1::id] == 1 }
|
||||
|
||||
result[0][2] shouldBe 11
|
||||
result[0][13] shouldBe 12345
|
||||
result[0][17] shouldBe arrayOf(1, 2, 3)
|
||||
result[0][18] shouldBe arrayOf(1.1, 2.2, 3.3)
|
||||
result[0][19] shouldBe arrayOf(Date.valueOf("2023-08-01"), Date.valueOf("2023-08-02"))
|
||||
result[0][20] shouldBe arrayOf("Hello", "World")
|
||||
result[0][21] shouldBe arrayOf(true, false, true)
|
||||
|
||||
val schema = DataFrameSchema.readSqlTable(connection, tableName1)
|
||||
schema.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["integercol"]!!.type shouldBe typeOf<Int?>()
|
||||
schema.columns["smallintcol"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["circlecol"]!!.type shouldBe typeOf<Any>()
|
||||
schema.columns["intarraycol"]!!.type.classifier shouldBe kotlin.Array::class
|
||||
schema.columns["doublearraycol"]!!.type.classifier shouldBe kotlin.Array::class
|
||||
schema.columns["datearraycol"]!!.type.classifier shouldBe kotlin.Array::class
|
||||
schema.columns["textarraycol"]!!.type.classifier shouldBe kotlin.Array::class
|
||||
schema.columns["booleanarraycol"]!!.type.classifier shouldBe kotlin.Array::class
|
||||
|
||||
val tableName2 = "table2"
|
||||
val df2 = DataFrame.readSqlTable(connection, tableName2).cast<Table2>()
|
||||
val result2 = df2.filter { it[Table2::id] == 1 }
|
||||
result2[0][11] shouldBe 1001
|
||||
result2[0][13] shouldBe null
|
||||
|
||||
val schema2 = DataFrameSchema.readSqlTable(connection, tableName2)
|
||||
schema2.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
schema2.columns["pathcol"]!!.type shouldBe typeOf<Any>() // TODO: https://github.com/Kotlin/dataframe/issues/537
|
||||
schema2.columns["textcol"]!!.type shouldBe typeOf<String?>()
|
||||
schema2.columns["linecol"]!!.type shouldBe typeOf<Any>() // TODO: https://github.com/Kotlin/dataframe/issues/537
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from sql query`() {
|
||||
@Language("SQL")
|
||||
val sqlQuery =
|
||||
"""
|
||||
SELECT
|
||||
t1.id,
|
||||
t1.bigintCol,
|
||||
t2.lineCol,
|
||||
t2.textCol
|
||||
FROM table1 t1
|
||||
JOIN table2 t2 ON t1.id = t2.id
|
||||
""".trimIndent()
|
||||
|
||||
val df = DataFrame.readSqlQuery(connection, sqlQuery = sqlQuery).cast<ViewTable>()
|
||||
val result = df.filter { it[ViewTable::id] == 1 }
|
||||
result[0][3] shouldBe null
|
||||
|
||||
val schema = DataFrameSchema.readSqlQuery(connection, sqlQuery = sqlQuery)
|
||||
schema.columns["id"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["bigintcol"]!!.type shouldBe typeOf<Long>()
|
||||
schema.columns["textcol"]!!.type shouldBe typeOf<String?>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from all tables`() {
|
||||
val dataframes = DataFrame.readAllSqlTables(connection).values.toList()
|
||||
|
||||
val table1Df = dataframes[0].cast<Table1>()
|
||||
|
||||
table1Df.rowsCount() shouldBe 3
|
||||
table1Df.filter { it[Table1::integercol] != null && it[Table1::integercol]!! > 12345 }.rowsCount() shouldBe 2
|
||||
table1Df[0][1] shouldBe 1000L
|
||||
table1Df[0][2] shouldBe 11
|
||||
|
||||
val table2Df = dataframes[1].cast<Table2>()
|
||||
|
||||
table2Df.rowsCount() shouldBe 3
|
||||
table2Df.filter {
|
||||
it[Table2::pathcol] == org.postgresql.geometric.PGpath("((1,2),(3,1))")
|
||||
}.rowsCount() shouldBe 1
|
||||
table2Df[0][11] shouldBe 1001
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read columns of different types to check type mapping`() {
|
||||
val tableName1 = "table1"
|
||||
val df1 = DataFrame.readSqlTable(connection, tableName1).cast<Table1>()
|
||||
val result = df1.select("smallintcol")
|
||||
.add("smallintcol2") { it[Table1::smallintcol] }
|
||||
result[0][1] shouldBe 11
|
||||
|
||||
val result1 = df1.select("bigserialcol")
|
||||
.add("bigserialcol2") { it[Table1::bigserialcol] }
|
||||
result1[0][1] shouldBe 1000000001L
|
||||
|
||||
val result2 = df1.select("doublecol")
|
||||
.add("doublecol2") { it[Table1::doublecol] }
|
||||
result2[0][1] shouldBe 12.34
|
||||
|
||||
val tableName2 = "table2"
|
||||
val df2 = DataFrame.readSqlTable(connection, tableName2).cast<Table2>()
|
||||
|
||||
val result3 = df2.select("moneycol")
|
||||
.add("moneycol2") { it[Table2::moneycol] }
|
||||
result3[0][1] shouldBe "123,45 ?" // TODO: weird mapping
|
||||
|
||||
val result4 = df2.select("numericcol")
|
||||
.add("numericcol2") { it[Table2::numericcol] }
|
||||
result4[0][1] shouldBe BigDecimal("12.34")
|
||||
|
||||
val result5 = df2.select("realcol")
|
||||
.add("realcol2") { it[Table2::realcol] }
|
||||
result5[0][1] shouldBe 12.34f
|
||||
|
||||
val result7 = df2.select("smallserialcol")
|
||||
.add("smallserialcol2") { it[Table2::smallserialcol] }
|
||||
result7[0][1] shouldBe 1001
|
||||
|
||||
val result8 = df2.select("serialcol")
|
||||
.add("serialcol2") { it[Table2::serialcol] }
|
||||
result8[0][1] shouldBe 1000001
|
||||
|
||||
val schema = DataFrameSchema.readSqlTable(connection, tableName1)
|
||||
schema.columns["smallintcol"]!!.type shouldBe typeOf<Int>()
|
||||
schema.columns["bigserialcol"]!!.type shouldBe typeOf<Long>()
|
||||
schema.columns["doublecol"]!!.type shouldBe typeOf<Double>()
|
||||
|
||||
val schema1 = DataFrameSchema.readSqlTable(connection, tableName2)
|
||||
schema1.columns["moneycol"]!!.type shouldBe typeOf<String>()
|
||||
schema1.columns["numericcol"]!!.type shouldBe typeOf<BigDecimal>()
|
||||
schema1.columns["realcol"]!!.type shouldBe typeOf<Float>()
|
||||
schema1.columns["smallserialcol"]!!.type shouldBe typeOf<Int>()
|
||||
schema1.columns["serialcol"]!!.type shouldBe typeOf<Int>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `infer nullability`() {
|
||||
inferNullability(connection)
|
||||
}
|
||||
}
|
||||
+26
@@ -0,0 +1,26 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import io.kotest.matchers.shouldBe
|
||||
import org.jetbrains.kotlinx.dataframe.io.db.MsSql
|
||||
import org.junit.AfterClass
|
||||
import org.junit.BeforeClass
|
||||
import org.junit.Test
|
||||
|
||||
class MsSqlTest {
|
||||
companion object {
|
||||
@BeforeClass
|
||||
@JvmStatic
|
||||
fun setUpClass() {
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
@JvmStatic
|
||||
fun tearDownClass() {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `test SQL Server TOP limit functionality`() {
|
||||
MsSql.buildSqlQueryWithLimit("SELECT * FROM TestTable1", 1) shouldBe "SELECT TOP 1 * FROM TestTable1"
|
||||
}
|
||||
}
|
||||
+264
@@ -0,0 +1,264 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import io.kotest.matchers.shouldBe
|
||||
import org.intellij.lang.annotations.Language
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
|
||||
import org.jetbrains.kotlinx.dataframe.api.cast
|
||||
import org.jetbrains.kotlinx.dataframe.api.filter
|
||||
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
|
||||
import org.junit.AfterClass
|
||||
import org.junit.BeforeClass
|
||||
import org.junit.Test
|
||||
import java.io.File
|
||||
import java.nio.file.Files
|
||||
import java.sql.Connection
|
||||
import java.sql.DriverManager
|
||||
import kotlin.reflect.typeOf
|
||||
|
||||
@DataSchema
|
||||
interface CustomerSQLite {
|
||||
val id: Int?
|
||||
val name: String?
|
||||
val age: Int?
|
||||
val salary: Double
|
||||
val profilePicture: ByteArray?
|
||||
}
|
||||
|
||||
@DataSchema
|
||||
interface OrderSQLite {
|
||||
val id: Int?
|
||||
val customerName: String?
|
||||
val orderDate: String?
|
||||
val totalAmount: Double
|
||||
val orderDetails: ByteArray?
|
||||
}
|
||||
|
||||
@DataSchema
|
||||
interface CustomerOrderSQLite {
|
||||
val customerId: Int?
|
||||
val customerName: String?
|
||||
val customerAge: Int?
|
||||
val customerSalary: Double
|
||||
val customerProfilePicture: ByteArray?
|
||||
val orderId: Int?
|
||||
val orderDate: String?
|
||||
val totalAmount: Double
|
||||
val orderDetails: ByteArray?
|
||||
}
|
||||
|
||||
class SqliteTest {
|
||||
companion object {
|
||||
private lateinit var connection: Connection
|
||||
|
||||
/**
|
||||
* We are using a temporary file because we need to test requests with DBConnectionConfig,
|
||||
* which creates a connection under the hood and need to have access to the shared SQLite database
|
||||
*/
|
||||
private lateinit var testDbFile: File
|
||||
private lateinit var databaseUrl: String
|
||||
|
||||
@BeforeClass
|
||||
@JvmStatic
|
||||
fun setUpClass() {
|
||||
testDbFile = Files.createTempFile("dataframe_sqlite_test_", ".db").toFile()
|
||||
testDbFile.deleteOnExit() // if fails
|
||||
|
||||
databaseUrl = "jdbc:sqlite:${testDbFile.absolutePath}"
|
||||
connection = DriverManager.getConnection(databaseUrl)
|
||||
|
||||
@Language("SQL")
|
||||
val createCustomersTableQuery = """
|
||||
CREATE TABLE Customers (
|
||||
id INTEGER PRIMARY KEY,
|
||||
name TEXT,
|
||||
age INTEGER,
|
||||
salary REAL NOT NULL,
|
||||
profilePicture BLOB
|
||||
)
|
||||
"""
|
||||
|
||||
connection.createStatement().execute(createCustomersTableQuery)
|
||||
|
||||
@Language("SQL")
|
||||
val createOrderTableQuery = """
|
||||
CREATE TABLE Orders (
|
||||
id INTEGER PRIMARY KEY,
|
||||
customerName TEXT,
|
||||
orderDate TEXT,
|
||||
totalAmount NUMERIC NOT NULL,
|
||||
orderDetails BLOB
|
||||
)
|
||||
"""
|
||||
|
||||
connection.createStatement().execute(createOrderTableQuery)
|
||||
|
||||
val profilePicture = "SampleProfilePictureData".toByteArray()
|
||||
val orderDetails = "OrderDetailsData".toByteArray()
|
||||
|
||||
connection.prepareStatement("INSERT INTO Customers (name, age, salary, profilePicture) VALUES (?, ?, ?, ?)")
|
||||
.use {
|
||||
it.setString(1, "John Doe")
|
||||
it.setInt(2, 30)
|
||||
it.setDouble(3, 2500.50)
|
||||
it.setBytes(4, profilePicture)
|
||||
it.executeUpdate()
|
||||
}
|
||||
|
||||
connection.prepareStatement("INSERT INTO Customers (name, age, salary, profilePicture) VALUES (?, ?, ?, ?)")
|
||||
.use {
|
||||
it.setString(1, null)
|
||||
it.setInt(2, 40)
|
||||
it.setDouble(3, 1500.50)
|
||||
it.setBytes(4, profilePicture)
|
||||
it.executeUpdate()
|
||||
}
|
||||
|
||||
connection.prepareStatement(
|
||||
"INSERT INTO Orders (customerName, orderDate, totalAmount, orderDetails) VALUES (?, ?, ?, ?)",
|
||||
).use {
|
||||
it.setString(1, null)
|
||||
it.setString(2, "2023-07-21")
|
||||
it.setDouble(3, 150.75)
|
||||
it.setBytes(4, orderDetails)
|
||||
it.executeUpdate()
|
||||
}
|
||||
|
||||
connection.prepareStatement(
|
||||
"INSERT INTO Orders (customerName, orderDate, totalAmount, orderDetails) VALUES (?, ?, ?, ?)",
|
||||
).use {
|
||||
it.setString(1, "John Doe")
|
||||
it.setString(2, "2023-08-21")
|
||||
it.setDouble(3, 250.75)
|
||||
it.setBytes(4, orderDetails)
|
||||
it.executeUpdate()
|
||||
}
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
@JvmStatic
|
||||
fun tearDownClass() {
|
||||
try {
|
||||
connection.close()
|
||||
if (::testDbFile.isInitialized && testDbFile.exists()) {
|
||||
testDbFile.delete()
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
// Log, but not fail
|
||||
println("Warning: Could not clean up test database file: ${e.message}")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from tables`() {
|
||||
val customerTableName = "Customers"
|
||||
val df = DataFrame.readSqlTable(connection, customerTableName).cast<CustomerSQLite>()
|
||||
val result = df.filter { it[CustomerSQLite::name] == "John Doe" }
|
||||
result[0][2] shouldBe 30
|
||||
|
||||
val schema = DataFrameSchema.readSqlTable(connection, customerTableName)
|
||||
schema.columns["id"]!!.type shouldBe typeOf<Int?>()
|
||||
schema.columns["name"]!!.type shouldBe typeOf<String?>()
|
||||
schema.columns["salary"]!!.type shouldBe typeOf<Double>()
|
||||
schema.columns["profilePicture"]!!.type shouldBe typeOf<ByteArray?>()
|
||||
|
||||
val orderTableName = "Orders"
|
||||
val df2 = DataFrame.readSqlTable(connection, orderTableName).cast<OrderSQLite>()
|
||||
val result2 = df2.filter { it[OrderSQLite::totalAmount] > 10 }
|
||||
result2[0][2] shouldBe "2023-07-21"
|
||||
|
||||
val schema2 = DataFrameSchema.readSqlTable(connection, orderTableName)
|
||||
schema2.columns["id"]!!.type shouldBe typeOf<Int?>()
|
||||
schema2.columns["customerName"]!!.type shouldBe typeOf<String?>()
|
||||
schema2.columns["totalAmount"]!!.type shouldBe typeOf<Double>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from tables with DBConnectionConfig`() {
|
||||
val customerTableName = "Customers"
|
||||
|
||||
val dbConnectionConfig = DbConnectionConfig(databaseUrl)
|
||||
|
||||
val df = DataFrame.readSqlTable(dbConnectionConfig, customerTableName).cast<CustomerSQLite>()
|
||||
val result = df.filter { it[CustomerSQLite::name] == "John Doe" }
|
||||
result[0][2] shouldBe 30
|
||||
|
||||
val schema = DataFrameSchema.readSqlTable(dbConnectionConfig, customerTableName)
|
||||
schema.columns["id"]!!.type shouldBe typeOf<Int?>()
|
||||
schema.columns["name"]!!.type shouldBe typeOf<String?>()
|
||||
schema.columns["salary"]!!.type shouldBe typeOf<Double>()
|
||||
schema.columns["profilePicture"]!!.type shouldBe typeOf<ByteArray?>()
|
||||
|
||||
val orderTableName = "Orders"
|
||||
val df2 = DataFrame.readSqlTable(dbConnectionConfig, orderTableName).cast<OrderSQLite>()
|
||||
val result2 = df2.filter { it[OrderSQLite::totalAmount] > 10 }
|
||||
result2[0][2] shouldBe "2023-07-21"
|
||||
|
||||
val schema2 = DataFrameSchema.readSqlTable(dbConnectionConfig, orderTableName)
|
||||
schema2.columns["id"]!!.type shouldBe typeOf<Int?>()
|
||||
schema2.columns["customerName"]!!.type shouldBe typeOf<String?>()
|
||||
schema2.columns["totalAmount"]!!.type shouldBe typeOf<Double>()
|
||||
}
|
||||
|
||||
@Language("SQL")
|
||||
private val sqlQuery = """
|
||||
SELECT
|
||||
c.id AS customerId,
|
||||
c.name AS customerName,
|
||||
c.age AS customerAge,
|
||||
c.salary AS customerSalary,
|
||||
c.profilePicture AS customerProfilePicture,
|
||||
o.id AS orderId,
|
||||
o.orderDate AS orderDate,
|
||||
o.totalAmount AS totalAmount,
|
||||
o.orderDetails AS orderDetails
|
||||
FROM Customers c
|
||||
INNER JOIN Orders o ON c.name = o.customerName
|
||||
"""
|
||||
|
||||
@Test
|
||||
fun `read from sql query`() {
|
||||
val df = DataFrame.readSqlQuery(connection, sqlQuery).cast<CustomerOrderSQLite>()
|
||||
val result = df.filter { it[CustomerOrderSQLite::customerSalary] > 1 }
|
||||
result[0][3] shouldBe 2500.5
|
||||
|
||||
val schema = DataFrameSchema.readSqlQuery(connection, sqlQuery = sqlQuery)
|
||||
schema.columns["customerId"]!!.type shouldBe typeOf<Int?>()
|
||||
schema.columns["customerName"]!!.type shouldBe typeOf<String?>()
|
||||
schema.columns["customerAge"]!!.type shouldBe typeOf<Int?>()
|
||||
schema.columns["totalAmount"]!!.type shouldBe typeOf<Double>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from sql query with DBConnectionConfig`() {
|
||||
val dbConnectionConfig = DbConnectionConfig(databaseUrl)
|
||||
|
||||
val df = DataFrame.readSqlQuery(dbConnectionConfig, sqlQuery).cast<CustomerOrderSQLite>()
|
||||
val result = df.filter { it[CustomerOrderSQLite::customerSalary] > 1 }
|
||||
result[0][3] shouldBe 2500.5
|
||||
|
||||
val schema = DataFrameSchema.readSqlQuery(dbConnectionConfig, sqlQuery = sqlQuery)
|
||||
schema.columns["customerId"]!!.type shouldBe typeOf<Int?>()
|
||||
schema.columns["customerName"]!!.type shouldBe typeOf<String?>()
|
||||
schema.columns["customerAge"]!!.type shouldBe typeOf<Int?>()
|
||||
schema.columns["totalAmount"]!!.type shouldBe typeOf<Double>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read from all tables`() {
|
||||
val dataframes = DataFrame.readAllSqlTables(connection).values.toList()
|
||||
|
||||
val customerDf = dataframes[0].cast<CustomerSQLite>()
|
||||
|
||||
customerDf.rowsCount() shouldBe 2
|
||||
customerDf.filter { it[CustomerSQLite::age] != null && it[CustomerSQLite::age]!! > 30 }.rowsCount() shouldBe 1
|
||||
customerDf[0][1] shouldBe "John Doe"
|
||||
|
||||
val orderDf = dataframes[1].cast<OrderSQLite>()
|
||||
|
||||
orderDf.rowsCount() shouldBe 2
|
||||
orderDf.filter { it[OrderSQLite::totalAmount] > 200 }.rowsCount() shouldBe 1
|
||||
orderDf[0][1] shouldBe null
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
# SLF4J's SimpleLogger configuration file
|
||||
# Simple implementation of Logger that sends all enabled log messages, for all defined loggers, to System.err.
|
||||
|
||||
# Default logging detail level for all instances of SimpleLogger.
|
||||
# Must be one of ("trace", "debug", "info", "warn", or "error").
|
||||
# If not specified, defaults to "info".
|
||||
org.slf4j.simpleLogger.defaultLogLevel=debug
|
||||
|
||||
# Logging detail level for a SimpleLogger instance named "xxxxx".
|
||||
# Must be one of ("trace", "debug", "info", "warn", or "error").
|
||||
# If not specified, the default logging detail level is used.
|
||||
#org.slf4j.simpleLogger.log.xxxxx=
|
||||
|
||||
# Set to true if you want the current date and time to be included in output messages.
|
||||
# Default is false, and will output the number of milliseconds elapsed since startup.
|
||||
org.slf4j.simpleLogger.showDateTime=true
|
||||
|
||||
# The date and time format to be used in the output messages.
|
||||
# The pattern describing the date and time format is the same that is used in java.text.SimpleDateFormat.
|
||||
# If the format is not specified or is invalid, the default format is used.
|
||||
# The default format is yyyy-MM-dd HH:mm:ss:SSS Z.
|
||||
org.slf4j.simpleLogger.dateTimeFormat=yyyy-MM-dd HH:mm:ss:SSS Z
|
||||
|
||||
# Set to true if you want to output the current thread name.
|
||||
# Defaults to true.
|
||||
org.slf4j.simpleLogger.showThreadName=true
|
||||
|
||||
# Set to true if you want the Logger instance name to be included in output messages.
|
||||
# Defaults to true.
|
||||
org.slf4j.simpleLogger.showLogName=true
|
||||
|
||||
# Set to true if you want the last component of the name to be included in output messages.
|
||||
# Defaults to false.
|
||||
#org.slf4j.simpleLogger.showShortLogName=false
|
||||
Reference in New Issue
Block a user