init research
This commit is contained in:
@@ -0,0 +1,281 @@
|
||||
[//]: # (title: How to Extend DataFrame Library for Custom SQL Database Support: Example with HSQLDB)
|
||||
|
||||
# How to Extend DataFrame Library for Custom SQL Database Support: Example with HSQLDB
|
||||
|
||||
This guide demonstrates how advanced users can extend the Kotlin DataFrame library to support a custom SQL database,
|
||||
using HSQLDB as an example. By following these steps,
|
||||
you will be able to integrate your custom database into the DataFrame library,
|
||||
allowing for seamless DataFrame creation, manipulation, and querying.
|
||||
|
||||
This guide is intended for Gradle projects,
|
||||
but the experience will be similar in Kotlin Notebooks,
|
||||
as demonstrated in this [Kotlin DataFrame SQL Example](https://github.com/zaleslaw/KotlinDataFrame-SQL-Examples/blob/master/notebooks/customdb.ipynb).
|
||||
|
||||
---
|
||||
|
||||
## Prerequisites
|
||||
|
||||
1. **Create a Gradle Project**:
|
||||
|
||||
Add the following dependencies and dataframe plugin to your `build.gradle.kts`:
|
||||
|
||||
```kotlin
|
||||
plugins {
|
||||
id("org.jetbrains.kotlinx.dataframe") version "$dataframe_version"
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation("org.jetbrains.kotlinx:dataframe:$dataframe_version")
|
||||
implementation("org.hsqldb:hsqldb:$version")
|
||||
}
|
||||
```
|
||||
|
||||
2. **Install HSQLDB**:
|
||||
|
||||
Follow the [HSQLDB Quick Guide](https://www.tutorialspoint.com/hsqldb/hsqldb_quick_guide.htm) to set up HSQLDB locally.
|
||||
|
||||
3. **Start the HSQLDB Server**:
|
||||
|
||||
Launch a terminal or command prompt and execute the following command:
|
||||
|
||||
```bash
|
||||
java -classpath lib/hsqldb.jar org.hsqldb.server.Server --database.0 file:hsqldb/demodb --dbname.0 testdb
|
||||
```
|
||||
|
||||
## Implementing Custom Database Type Support
|
||||
|
||||
To enable HSQLDB integration, implement a custom `DbType` by overriding required methods.
|
||||
|
||||
|
||||
**Create the HSQLDB Type**
|
||||
|
||||
```kotlin
|
||||
/**
|
||||
* Represents the HSQLDB database type.
|
||||
*
|
||||
* This class provides methods to convert data from a ResultSet to the appropriate type for HSQLDB,
|
||||
* and to generate the corresponding column schema.
|
||||
*/
|
||||
public object HSQLDB : DbType("hsqldb") {
|
||||
override val driverClassName: String
|
||||
get() = "org.hsqldb.jdbcDriver"
|
||||
|
||||
override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? {
|
||||
return null
|
||||
}
|
||||
|
||||
override fun isSystemTable(tableMetadata: TableMetadata): Boolean {
|
||||
val locale = Locale.getDefault()
|
||||
fun String?.containsWithLowercase(substr: String) = this?.lowercase(locale)?.contains(substr) == true
|
||||
val schemaName = tableMetadata.schemaName
|
||||
val name = tableMetadata.name
|
||||
return schemaName.containsWithLowercase("information_schema") ||
|
||||
schemaName.containsWithLowercase("system") ||
|
||||
name.containsWithLowercase("system_")
|
||||
}
|
||||
|
||||
override fun buildTableMetadata(tables: ResultSet): TableMetadata =
|
||||
TableMetadata(
|
||||
tables.getString("TABLE_NAME"),
|
||||
tables.getString("TABLE_SCHEM"),
|
||||
tables.getString("TABLE_CAT"),
|
||||
)
|
||||
|
||||
override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? {
|
||||
return null
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Defining Helper Functions**
|
||||
|
||||
Define utility functions to manage database connections and tables.
|
||||
For example purposes, we create a small function that can populate the table with a schema and some sample data.
|
||||
|
||||
```kotlin
|
||||
const val URL = "jdbc:hsqldb:hsql://localhost/testdb"
|
||||
const val USER_NAME = "SA"
|
||||
const val PASSWORD = ""
|
||||
|
||||
|
||||
fun removeTable(con: Connection): Int {
|
||||
val stmt = con.createStatement()
|
||||
return stmt.executeUpdate("""DROP TABLE orders""")
|
||||
}
|
||||
|
||||
fun createAndPopulateTable(con: Connection) {
|
||||
val stmt = con.createStatement()
|
||||
stmt.executeUpdate(
|
||||
"""CREATE TABLE IF NOT EXISTS orders (
|
||||
id INT GENERATED BY DEFAULT AS IDENTITY PRIMARY KEY,
|
||||
item VARCHAR(50) NOT NULL,
|
||||
price DOUBLE NOT NULL,
|
||||
order_date DATE
|
||||
);
|
||||
""".trimIndent()
|
||||
)
|
||||
|
||||
stmt.executeUpdate(
|
||||
"""INSERT INTO orders (item, price, order_date)
|
||||
VALUES ('Laptop', 1500.00, NOW())""".trimIndent()
|
||||
)
|
||||
|
||||
stmt.executeUpdate(
|
||||
"""INSERT INTO orders (item, price, order_date)
|
||||
VALUES ('Smartphone', 700.00, NOW())""".trimIndent()
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
**Define the Table Schema**
|
||||
|
||||
Use the `@DataSchema` annotation to define a [**custom data schema**](schemas.md) for the `orders` table.
|
||||
|
||||
```kotlin
|
||||
@DataSchema
|
||||
interface Orders {
|
||||
val id: Int
|
||||
val item: String
|
||||
val price: Double
|
||||
val orderDate: java.util.Date
|
||||
}
|
||||
```
|
||||
|
||||
**End-to-End Example**
|
||||
|
||||
Finally, use the following code to create, populate, read, and delete the table in HSQLDB.
|
||||
|
||||
```kotlin
|
||||
fun main() {
|
||||
DriverManager.getConnection(URL, USER_NAME, PASSWORD).use { con ->
|
||||
createAndPopulateTable(con)
|
||||
|
||||
val df = con
|
||||
.readDataFrame("SELECT * FROM orders", dbType = HSQLDB)
|
||||
.renameToCamelCase()
|
||||
.cast<Orders>(verify = true)
|
||||
|
||||
df.filter { it.price > 800 }.print()
|
||||
|
||||
removeTable(con)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Running the `main` function above will output filtered rows from the `orders` table where `price > 800`.
|
||||
|
||||
It will also demonstrate how to define and use custom SQL database extensions in the DataFrame library.
|
||||
|
||||
Find a full example project [here](https://github.com/zaleslaw/KotlinDataFrame-SQL-Examples/tree/master/src/main/kotlin/customdb).
|
||||
|
||||
The core principles of working with `DbType` remain the same, and the example with HSQLDB demonstrates the basic implementation pattern. However, the `DbType` class now offers more methods for customization to give you greater control over database integration.
|
||||
|
||||
## Advanced Customization Options
|
||||
|
||||
For advanced users, the `DbType` class provides additional properties and methods that can be overridden to fine-tune database integration:
|
||||
|
||||
### Performance and Configuration Properties
|
||||
|
||||
You can customize default performance-related settings:
|
||||
|
||||
```kotlin
|
||||
/**
|
||||
* Specifies the default batch size for fetching rows from the database during query execution.
|
||||
* Value is set to 1000 by default.
|
||||
*/
|
||||
public open val defaultFetchSize: Int = 1000
|
||||
|
||||
/**
|
||||
* Specifies the default timeout in seconds for database queries.
|
||||
* If set to `null`, no timeout is applied, allowing queries to run indefinitely.
|
||||
*/
|
||||
public open val defaultQueryTimeout: Int? = null // null = no timeout
|
||||
```
|
||||
|
||||
### Query Building and Statement Configuration
|
||||
|
||||
Override these methods to customize SQL query generation and statement configuration:
|
||||
|
||||
```kotlin
|
||||
/**
|
||||
* Builds a SELECT query for reading from a table.
|
||||
*/
|
||||
public open fun buildSelectTableQueryWithLimit(tableName: String, limit: Int?): String
|
||||
|
||||
/**
|
||||
* Configures the provided `PreparedStatement` for optimized read operations.
|
||||
* This method sets the fetch size for efficient streaming, applies a query timeout if specified,
|
||||
* and configures the fetch direction to forward-only for better performance in read-only operations.
|
||||
*/
|
||||
public open fun configureReadStatement(statement: PreparedStatement)
|
||||
|
||||
/**
|
||||
* Quotes an identifier (table or column name) according to database-specific rules.
|
||||
* Examples:
|
||||
* - PostgreSQL: "tableName" or "schema"."table"
|
||||
* - MySQL: `tableName` or `schema`.`table`
|
||||
* - MS SQL: [tableName] or [schema].[table]
|
||||
* - SQLite/H2: no quotes for simple names
|
||||
*/
|
||||
public open fun quoteIdentifier(name: String): String
|
||||
|
||||
/**
|
||||
* Constructs a SQL query with a limit clause.
|
||||
*/
|
||||
public open fun buildSqlQueryWithLimit(sqlQuery: String, limit: Int = 1): String
|
||||
```
|
||||
|
||||
### Connection and Data Handling
|
||||
|
||||
For specialized connection handling and data extraction:
|
||||
|
||||
```kotlin
|
||||
/**
|
||||
* Creates a database connection using the provided configuration.
|
||||
* Some databases (like Sqlite) require read-only mode to be set during connection creation
|
||||
* rather than after the connection is established.
|
||||
*/
|
||||
public open fun createConnection(dbConfig: DbConnectionConfig): Connection
|
||||
|
||||
/**
|
||||
* Extracts a value from the ResultSet for the given column.
|
||||
* This method can be overridden by custom database types to provide specialized parsing logic.
|
||||
*/
|
||||
public open fun extractValueFromResultSet(
|
||||
rs: ResultSet,
|
||||
columnIndex: Int,
|
||||
columnMetadata: TableColumnMetadata,
|
||||
kType: KType,
|
||||
): Any?
|
||||
|
||||
/**
|
||||
* Builds a single DataColumn with proper type handling.
|
||||
*/
|
||||
public open fun buildDataColumn(
|
||||
name: String,
|
||||
values: MutableList<Any?>,
|
||||
kType: KType,
|
||||
inferNullability: Boolean,
|
||||
): DataColumn<*>
|
||||
```
|
||||
|
||||
### Type Mapping and Metadata
|
||||
|
||||
For custom type mappings and metadata extraction:
|
||||
|
||||
```kotlin
|
||||
/**
|
||||
* Creates a mapping between common SQL types and their corresponding KTypes.
|
||||
*/
|
||||
public open fun makeCommonSqlToKTypeMapping(tableColumnMetadata: TableColumnMetadata): KType
|
||||
|
||||
/**
|
||||
* Retrieves column metadata from a JDBC ResultSet.
|
||||
* This method reads column metadata from ResultSetMetaData with graceful fallbacks
|
||||
* for JDBC drivers that throw SQLFeatureNotSupportedException for certain methods.
|
||||
*/
|
||||
public open fun getTableColumnsMetadata(resultSet: ResultSet): List<TableColumnMetadata>
|
||||
```
|
||||
|
||||
> **Note:** The set of overridable methods and properties may change in the next Beta release as we continue to improve the API based on user feedback.
|
||||
|
||||
Reference in New Issue
Block a user