init research

This commit is contained in:
2026-02-08 11:20:43 -10:00
commit bdf064f54d
3041 changed files with 1592200 additions and 0 deletions
+7
View File
@@ -0,0 +1,7 @@
## :dataframe-csv
This module, published as `dataframe-csv`, contains all logic and tests for DataFrame to be able to work with `csv`
files.
At the moment, this module is in the experimental stage, so it's not included when
you add the `dataframe` dependency to your project.
+173
View File
@@ -0,0 +1,173 @@
public final class org/jetbrains/kotlinx/dataframe/io/CsvDeephaven : org/jetbrains/kotlinx/dataframe/io/SupportedDataFrameFormat {
public fun <init> ()V
public fun <init> (C)V
public synthetic fun <init> (CILkotlin/jvm/internal/DefaultConstructorMarker;)V
public fun acceptsExtension (Ljava/lang/String;)Z
public fun acceptsSample (Lorg/jetbrains/kotlinx/dataframe/io/SupportedFormatSample;)Z
public fun createDefaultReadMethod (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethod;
public fun getTestOrder ()I
public fun readDataFrame (Ljava/io/File;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public fun readDataFrame (Ljava/io/InputStream;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public fun readDataFrame (Ljava/nio/file/Path;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}
public final class org/jetbrains/kotlinx/dataframe/io/QuoteMode : java/lang/Enum {
public static final field ALL Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;
public static final field ALL_NON_NULL Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;
public static final field MINIMAL Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;
public static final field NONE Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;
public static final field NON_NUMERIC Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;
public static fun getEntries ()Lkotlin/enums/EnumEntries;
public static fun valueOf (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;
public static fun values ()[Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;
}
public final class org/jetbrains/kotlinx/dataframe/io/ReadCsvDeephavenKt {
public static final fun readCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final synthetic fun readCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final synthetic fun readCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final synthetic fun readCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final synthetic fun readCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final synthetic fun readCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}
public final class org/jetbrains/kotlinx/dataframe/io/ReadCsvStrKt {
public static final fun readCsvStr (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZ)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readCsvStr$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}
public final class org/jetbrains/kotlinx/dataframe/io/ReadDelimDeephavenKt {
public static final fun readDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final synthetic fun readDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final synthetic fun readDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final synthetic fun readDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final synthetic fun readDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final synthetic fun readDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}
public final class org/jetbrains/kotlinx/dataframe/io/ReadDelimStrKt {
public static final fun readDelimStr (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZ)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readDelimStr$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}
public final class org/jetbrains/kotlinx/dataframe/io/ReadTsvDeephavenKt {
public static final fun readTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final synthetic fun readTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final synthetic fun readTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final synthetic fun readTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final synthetic fun readTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun readTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final synthetic fun readTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}
public final class org/jetbrains/kotlinx/dataframe/io/ReadTsvStrKt {
public static final fun readTsvStr (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZ)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static synthetic fun readTsvStr$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}
public final class org/jetbrains/kotlinx/dataframe/io/ToCsvStrKt {
public static final fun toCsvStr (Lorg/jetbrains/kotlinx/dataframe/DataFrame;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;)Ljava/lang/String;
public static synthetic fun toCsvStr$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;ILjava/lang/Object;)Ljava/lang/String;
}
public final class org/jetbrains/kotlinx/dataframe/io/ToDelimStrKt {
public static final fun toDelimStr (Lorg/jetbrains/kotlinx/dataframe/DataFrame;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;)Ljava/lang/String;
public static synthetic fun toDelimStr$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;ILjava/lang/Object;)Ljava/lang/String;
}
public final class org/jetbrains/kotlinx/dataframe/io/ToTsvStrKt {
public static final fun toTsvStr (Lorg/jetbrains/kotlinx/dataframe/DataFrame;ZCLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;)Ljava/lang/String;
public static synthetic fun toTsvStr$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;ZCLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;ILjava/lang/Object;)Ljava/lang/String;
}
public final class org/jetbrains/kotlinx/dataframe/io/TsvDeephaven : org/jetbrains/kotlinx/dataframe/io/SupportedDataFrameFormat {
public fun <init> ()V
public fun <init> (C)V
public synthetic fun <init> (CILkotlin/jvm/internal/DefaultConstructorMarker;)V
public fun acceptsExtension (Ljava/lang/String;)Z
public fun acceptsSample (Lorg/jetbrains/kotlinx/dataframe/io/SupportedFormatSample;)Z
public fun createDefaultReadMethod (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethod;
public fun getTestOrder ()I
public fun readDataFrame (Ljava/io/File;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public fun readDataFrame (Ljava/io/InputStream;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public fun readDataFrame (Ljava/nio/file/Path;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
}
public final class org/jetbrains/kotlinx/dataframe/io/UtilKt {
public static final fun getDEFAULT_DELIM_NULL_STRINGS ()Ljava/util/Set;
}
public final class org/jetbrains/kotlinx/dataframe/io/WriteCsvDeephavenKt {
public static final fun writeCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/io/File;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;)V
public static final fun writeCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/Appendable;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;Lkotlin/jvm/functions/Function2;)V
public static final fun writeCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/String;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;)V
public static final fun writeCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/nio/file/Path;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;)V
public static synthetic fun writeCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/io/File;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;ILjava/lang/Object;)V
public static synthetic fun writeCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/Appendable;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;Lkotlin/jvm/functions/Function2;ILjava/lang/Object;)V
public static synthetic fun writeCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/String;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;ILjava/lang/Object;)V
public static synthetic fun writeCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/nio/file/Path;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;ILjava/lang/Object;)V
}
public final class org/jetbrains/kotlinx/dataframe/io/WriteDelimDeephavenKt {
public static final fun writeDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/io/File;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;)V
public static final fun writeDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/Appendable;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;Lkotlin/jvm/functions/Function2;)V
public static final fun writeDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/String;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;)V
public static final fun writeDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/nio/file/Path;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;)V
public static synthetic fun writeDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/io/File;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;ILjava/lang/Object;)V
public static synthetic fun writeDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/Appendable;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;Lkotlin/jvm/functions/Function2;ILjava/lang/Object;)V
public static synthetic fun writeDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/String;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;ILjava/lang/Object;)V
public static synthetic fun writeDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/nio/file/Path;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;ILjava/lang/Object;)V
}
public final class org/jetbrains/kotlinx/dataframe/io/WriteTsvDeephavenKt {
public static final fun writeTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/io/File;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;)V
public static final fun writeTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/Appendable;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;Lkotlin/jvm/functions/Function2;)V
public static final fun writeTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/String;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;)V
public static final fun writeTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/nio/file/Path;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;)V
public static synthetic fun writeTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/io/File;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;ILjava/lang/Object;)V
public static synthetic fun writeTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/Appendable;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;Lkotlin/jvm/functions/Function2;ILjava/lang/Object;)V
public static synthetic fun writeTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/String;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;ILjava/lang/Object;)V
public static synthetic fun writeTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/nio/file/Path;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;ILjava/lang/Object;)V
}
+153
View File
@@ -0,0 +1,153 @@
import nl.jolanrensen.kodex.gradle.creatingRunKodexTask
import org.gradle.jvm.tasks.Jar
plugins {
with(convention.plugins) {
alias(kotlinJvm8)
}
with(libs.plugins) {
alias(publisher)
alias(serialization)
alias(kodex)
alias(binary.compatibility.validator)
alias(kotlinx.benchmark)
}
idea
}
group = "org.jetbrains.kotlinx"
dependencies {
api(projects.core)
// for reading/writing JSON <-> DataFrame/DataRow in CSV/TSV/Delim
// can safely be excluded when working without JSON and only writing flat dataframes
api(projects.dataframeJson)
// for csv reading
api(libs.deephavenCsv)
// for csv writing
api(libs.commonsCsv)
implementation(libs.commonsIo)
implementation(libs.sl4j)
implementation(libs.kotlinLogging)
implementation(libs.kotlin.reflect)
testImplementation(libs.kotlinx.benchmark.runtime)
testImplementation(libs.junit)
testImplementation(libs.sl4jsimple)
testImplementation(libs.kotestAssertions) {
exclude("org.jetbrains.kotlin", "kotlin-stdlib-jdk8")
}
}
benchmark {
targets {
register("test")
}
}
val generatedSourcesFolderName = "generated-sources"
// Backup the kotlin source files location
val kotlinMainSources = kotlin.sourceSets.main
.get()
.kotlin.sourceDirectories
.toList()
val kotlinTestSources = kotlin.sourceSets.test
.get()
.kotlin.sourceDirectories
.toList()
fun pathOf(vararg parts: String) = parts.joinToString(File.separator)
// Include both test and main sources for cross-referencing, Exclude generated sources
val processKDocsMainSources = (kotlinMainSources + kotlinTestSources)
.filterNot { pathOf("build", "generated") in it.path }
// sourceset of the generated sources as a result of `processKDocsMain`, this will create linter tasks
val generatedSources by kotlin.sourceSets.creating {
kotlin {
setSrcDirs(
listOf(
"$generatedSourcesFolderName/src/main/kotlin",
"$generatedSourcesFolderName/src/main/java",
),
)
}
}
// Task to generate the processed documentation
val processKDocsMain by creatingRunKodexTask(processKDocsMainSources) {
group = "KDocs"
target = file(generatedSourcesFolderName)
// false, so `runKtlintFormatOverGeneratedSourcesSourceSet` can format the output
outputReadOnly = false
exportAsHtml {
dir = file("../docs/StardustDocs/resources/snippets/kdocs")
}
finalizedBy("runKtlintFormatOverGeneratedSourcesSourceSet")
}
tasks.named("ktlintGeneratedSourcesSourceSetCheck") {
onlyIf { false }
}
tasks.named("runKtlintCheckOverGeneratedSourcesSourceSet") {
onlyIf { false }
}
// If `changeJarTask` is run, modify all Jar tasks such that before running the Kotlin sources are set to
// the target of `processKdocMain`, and they are returned to normal afterward.
// This is usually only done when publishing
val changeJarTask by tasks.registering {
outputs.upToDateWhen { project.hasProperty("skipKodex") }
doFirst {
tasks.withType<Jar> {
doFirst {
require(generatedSources.kotlin.srcDirs.toList().isNotEmpty()) {
logger.error("`processKDocsMain`'s outputs are empty, did `processKDocsMain` run before this task?")
}
kotlin.sourceSets.main {
kotlin.setSrcDirs(generatedSources.kotlin.srcDirs)
}
logger.lifecycle("$this is run with modified sources: \"$generatedSourcesFolderName\"")
}
doLast {
kotlin.sourceSets.main {
kotlin.setSrcDirs(kotlinMainSources)
}
}
}
}
}
// if `processKDocsMain` runs, the Jar tasks must run after it so the generated-sources are there
tasks.withType<Jar> {
mustRunAfter(changeJarTask, processKDocsMain)
}
// modify all publishing tasks to depend on `changeJarTask` so the sources are swapped out with generated sources
tasks.configureEach {
if (!project.hasProperty("skipKodex") && name.startsWith("publish")) {
dependsOn(processKDocsMain, changeJarTask)
}
}
// Exclude the generated/processed sources from the IDE
idea {
module {
excludeDirs.add(file(generatedSourcesFolderName))
}
}
kotlinPublications {
publication {
publicationName = "dataframeCsv"
artifactId = project.name
description = "CSV support for Kotlin DataFrame"
packageName = artifactId
}
}
@@ -0,0 +1,274 @@
package org.jetbrains.kotlinx.dataframe.documentationCsv
import io.deephaven.csv.CsvSpecs
import org.apache.commons.csv.CSVFormat
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses
import org.jetbrains.kotlinx.dataframe.io.AdjustCSVFormat
import org.jetbrains.kotlinx.dataframe.io.AdjustCsvSpecs
import org.jetbrains.kotlinx.dataframe.io.ColType
import org.jetbrains.kotlinx.dataframe.io.Compression
import org.jetbrains.kotlinx.dataframe.io.DefaultNullStringsContentLink
import org.jetbrains.kotlinx.dataframe.io.QuoteMode
import java.nio.charset.Charset
/**
* Contains both the default values of csv/tsv parameters and the parameter KDocs.
*/
@Suppress("ktlint:standard:class-naming", "ClassName", "KDocUnresolvedReference")
internal object DelimParams {
/**
* @param path The file path to read.
* Use [charset] to specify the encoding.
* Can also be compressed as `.gz` or `.zip`, see [Compression][Compression].
*/
typealias PATH_READ = Nothing
/**
* @param file The file to read.
* Use [charset] to specify the encoding.
* Can also be compressed as `.gz` or `.zip`, see [Compression][Compression].
*/
typealias FILE_READ = Nothing
/**
* @param url The URL from which to fetch the data.
* Use [charset] to specify the encoding.
* Can also be compressed as `.gz` or `.zip`, see [Compression][Compression].
*/
typealias URL_READ = Nothing
/**
* @param fileOrUrl The file path or URL to read the data from.
* Use [charset] to specify the encoding.
* Can also be compressed as `.gz` or `.zip`, see [Compression][Compression].
*/
typealias FILE_OR_URL_READ = Nothing
/**
* @param inputStream Represents the file to read.
* Use [charset] to specify the encoding.
*/
typealias INPUT_STREAM_READ = Nothing
/** @param text The raw data to read in the form of a [String]. */
typealias TEXT_READ = Nothing
/** @param file The file to write to. */
typealias FILE_WRITE = Nothing
/** @param path The path pointing to a file to write to. */
typealias PATH_WRITE = Nothing
/** @param writer The [Appendable] to write to. */
typealias WRITER_WRITE = Nothing
/**
* @param charset The [character set][java.nio.charset.Charset] the input is encoded in.
* Default: `null`
*
* If `null`, the Charset will be read from the BOM of the provided input,
* defaulting to [UTF-8][Charsets.UTF_8] if no BOM is found.
*/
val CHARSET: Charset? = null
/**
* @param delimiter The field delimiter character. Default: ','.
*
* Ignored if [hasFixedWidthColumns] is `true`.
*/
const val CSV_DELIMITER: Char = ','
/**
* @param delimiter The field delimiter character. Default: '\t'.
*
* Ignored if [hasFixedWidthColumns] is `true`.
*/
const val TSV_DELIMITER: Char = '\t'
/**
* @param delimiter The field delimiter character. Default: ','.
*
* Ignored if [hasFixedWidthColumns] is `true`.
*/
const val DELIM_DELIMITER: Char = ','
/**
* @param header Optional column titles. Default: empty list.
*
* If non-empty, the data will be read with [header] as the column titles
* (use [skipLines] if there's a header in the data).
* If empty (default), the header will be read from the data.
*/
val HEADER: List<String> = emptyList()
/**
* @param hasFixedWidthColumns Whether the data has fixed-width columns instead of a single delimiter.
* Default: `false`.
*
* Fixed-width columns can occur, for instance, in multi-space delimited data, where the columns are separated
* by multiple spaces instead of a single delimiter, so columns are visually aligned.
* Column widths are determined by the header in the data (if present), or manually by setting
* [fixedColumnWidths].
*/
const val HAS_FIXED_WIDTH_COLUMNS: Boolean = false
/**
* @param fixedColumnWidths The fixed column widths. Default: empty list.
*
* Requires [hasFixedWidthColumns]. If empty, the column widths will be determined by the header in the data
* (if present), else, this manually sets the column widths.
* The number of widths should match the number of columns.
*/
val FIXED_COLUMN_WIDTHS: List<Int> = emptyList()
/**
* @param compression The compression of the data.
* Default: [Compression.None], unless detected otherwise from the input file or url.
*/
val COMPRESSION: Compression<*> = Compression.None
/**
* @param colTypes The expected [ColType] per column name. Default: empty map, a.k.a. infer every column type.
*
* If supplied for a certain column name (inferred from data or given by [header]),
* the parser will parse the column with the specified name as the specified type, else it will infer the type.
*
* e.g. `colTypes = `[mapOf][mapOf]`("colName" `[to][to]` `[ColType][ColType]`.`[Int][ColType.Int]`)`.
* You can also set [ColType][ColType]`.`[DEFAULT][ColType.DEFAULT]` `[to][to]` `[ColType][ColType]`.X`
* to set a _default_ column type, like [ColType.String].
*/
val COL_TYPES: Map<String, ColType> = emptyMap()
/**
* @param skipLines The number of lines to skip before reading the header and data. Default: `0`.
*
* Useful for files with metadata, or comments at the beginning, or to give a custom [header].
*/
const val SKIP_LINES: Long = 0L
/**
* @param readLines The maximum number of lines to read from the data. Default: `null`.
*
* If `null`, all lines will be read.
*/
val READ_LINES: Long? = null
/**
* @param parserOptions Optional [parsing options][ParserOptions] for columns initially read as [String].
* Default, `null`.
*
* Can configure locale, date format, double parsing, skipping types, etc.
*
* If [parserOptions] or any of the arguments are `null`, the global parser configuration
* ([DataFrame.parser][DataFrame.Companion.parser]) will be queried.
*
* The only exceptions are:
* - [nullStrings][ParserOptions.nullStrings], which, if `null`,
* will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS].
* - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses] to
* the given types or the global setting.
*/
val PARSER_OPTIONS: ParserOptions? = null
/**
* @param ignoreEmptyLines Whether to skip intermediate empty lines. Default: `false`.
*
* If `false`, empty lines will be interpreted as having _empty_ values if [allowMissingColumns].
*/
const val IGNORE_EMPTY_LINES: Boolean = false
/**
* @param allowMissingColumns Whether to allow rows with fewer columns than the header. Default: `true`.
*
* If `true`, rows that are too short will be interpreted as _empty_ values.
*/
const val ALLOW_MISSING_COLUMNS: Boolean = true
/**
* @param ignoreExcessColumns Whether to ignore rows with more columns than the header. Default: `true`.
*
* If `true`, rows that are too long will have those columns dropped.
*/
const val IGNORE_EXCESS_COLUMNS: Boolean = true
/**
* @param quote The quote character. Default: `"`.
*
* Used when field- or line delimiters should be interpreted as literal text.
*
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
*/
const val QUOTE: Char = '"'
/**
* @param ignoreSurroundingSpaces Whether to ignore leading and trailing blanks around non-quoted fields.
* Default: `true`.
*/
const val IGNORE_SURROUNDING_SPACES: Boolean = true
/**
* @param trimInsideQuoted Whether to ignore leading and trailing blanks inside quoted fields.
* Default: `false`.
*/
const val TRIM_INSIDE_QUOTED: Boolean = false
/**
* @param parseParallel Whether to parse the data in parallel. Default: `true`.
*
* If `true`, the data will be read and parsed in parallel by the Deephaven parser.
* This is usually faster but can be turned off for debugging.
*/
const val PARSE_PARALLEL: Boolean = true
/**
* @param adjustCsvSpecs Optional extra [CsvSpecs] configuration. Default: `{ it }`.
*
* Before instantiating the [CsvSpecs], the [CsvSpecs.Builder] will be passed to this lambda.
* This will allow you to configure/overwrite any CSV / TSV parsing options.
*/
val ADJUST_CSV_SPECS: AdjustCsvSpecs = { it }
/** @param includeHeader Whether to include the header in the output. Default: `true`. */
const val INCLUDE_HEADER: Boolean = true
/**
* @param quoteMode The [QuoteMode] to use when writing CSV / TSV files.
* Default: [QuoteMode.MINIMAL].
*/
val QUOTE_MODE: QuoteMode = QuoteMode.MINIMAL
/**
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE].
* Default: `null`. This will double-quote the value.
*/
val ESCAPE_CHAR: Char? = null
/**
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
* Default: `'#'`.
*/
const val COMMENT_CHAR: Char = '#'
/**
* @param recordSeparator The character that separates records in a CSV / TSV file.
* Default: `'\n'`, a Unix-newline.
*/
const val RECORD_SEPARATOR: String = "\n"
/**
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
* Default: empty list.
*/
val HEADER_COMMENTS: List<String> = emptyList()
/**
* @param adjustCsvFormat Optional extra [CSVFormat] configuration. Default: `{ it }`.
*
* Before instantiating the [CSVFormat], the [CSVFormat.Builder] will be passed to this lambda.
* This will allow you to configure/overwrite any CSV / TSV writing options.
*/
val ADJUST_CSV_FORMAT: AdjustCSVFormat = { it }
}
@@ -0,0 +1,69 @@
package org.jetbrains.kotlinx.dataframe.documentationCsv
import kotlin.annotation.AnnotationTarget.ANNOTATION_CLASS
import kotlin.annotation.AnnotationTarget.CLASS
import kotlin.annotation.AnnotationTarget.CONSTRUCTOR
import kotlin.annotation.AnnotationTarget.FIELD
import kotlin.annotation.AnnotationTarget.FILE
import kotlin.annotation.AnnotationTarget.FUNCTION
import kotlin.annotation.AnnotationTarget.LOCAL_VARIABLE
import kotlin.annotation.AnnotationTarget.PROPERTY
import kotlin.annotation.AnnotationTarget.PROPERTY_GETTER
import kotlin.annotation.AnnotationTarget.PROPERTY_SETTER
import kotlin.annotation.AnnotationTarget.TYPE
import kotlin.annotation.AnnotationTarget.TYPEALIAS
import kotlin.annotation.AnnotationTarget.VALUE_PARAMETER
/**
* Any `Documentable` annotated with this annotation will be excluded from the generated sources by
* the documentation processor.
*
* **NOTE: DO NOT RENAME!**
*/
@Target(
CLASS,
ANNOTATION_CLASS,
PROPERTY,
FIELD,
LOCAL_VARIABLE,
VALUE_PARAMETER,
CONSTRUCTOR,
FUNCTION,
PROPERTY_GETTER,
PROPERTY_SETTER,
TYPE,
TYPEALIAS,
FILE,
)
internal annotation class ExcludeFromSources
/**
* Any `Documentable` annotated with this annotation will be exported to HTML by the documentation
* processor.
*
* You can use @exportAsHtmlStart and @exportAsHtmlEnd to specify a range of the doc to
* export to HTML.
*
* **NOTE: DO NOT RENAME!**
*
* @param theme Whether to include a simple theme in the HTML file. Default is `true`.
* @param stripReferences Whether to strip `[references]` from the HTML file. Default is `true`.
* This is useful when you want to include the HTML file in a website, where the references are not
* needed or would break.
*/
@Target(
CLASS,
ANNOTATION_CLASS,
PROPERTY,
FIELD,
LOCAL_VARIABLE,
VALUE_PARAMETER,
CONSTRUCTOR,
FUNCTION,
PROPERTY_GETTER,
PROPERTY_SETTER,
TYPE,
TYPEALIAS,
FILE,
)
internal annotation class ExportAsHtml(val theme: Boolean = true, val stripReferences: Boolean = true)
@@ -0,0 +1,24 @@
package org.jetbrains.kotlinx.dataframe.impl.io
import io.deephaven.csv.containers.ByteSlice
import io.deephaven.csv.tokenization.Tokenizer.CustomDoubleParser
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
/**
* Wrapper around [FastDoubleParser] so we can use it from Deephaven.
*/
internal class DataFrameCustomDoubleParser(parserOptions: ParserOptions? = null) : CustomDoubleParser {
private val fastDoubleParser = FastDoubleParser(parserOptions)
override fun parse(bs: ByteSlice): Double =
try {
fastDoubleParser.parseOrNull(bs.data(), bs.begin(), bs.size())
} catch (_: Exception) {
null
} ?: throw NumberFormatException()
override fun parse(cs: CharSequence): Double =
fastDoubleParser.parseOrNull(cs.toString())
?: throw NumberFormatException()
}
@@ -0,0 +1,204 @@
package org.jetbrains.kotlinx.dataframe.impl.io
import io.deephaven.csv.parsers.DataType
import io.deephaven.csv.parsers.DataType.BOOLEAN_AS_BYTE
import io.deephaven.csv.parsers.DataType.BYTE
import io.deephaven.csv.parsers.DataType.CHAR
import io.deephaven.csv.parsers.DataType.DATETIME_AS_LONG
import io.deephaven.csv.parsers.DataType.DOUBLE
import io.deephaven.csv.parsers.DataType.FLOAT
import io.deephaven.csv.parsers.DataType.INT
import io.deephaven.csv.parsers.DataType.LONG
import io.deephaven.csv.parsers.DataType.SHORT
import io.deephaven.csv.parsers.DataType.STRING
import io.deephaven.csv.parsers.DataType.TIMESTAMP_AS_LONG
import io.deephaven.csv.sinks.Sink
import io.deephaven.csv.sinks.SinkFactory
import io.deephaven.csv.sinks.Source
import kotlinx.datetime.toKotlinLocalDateTime
import java.time.LocalDateTime
import java.time.ZoneOffset
import kotlin.time.Duration.Companion.nanoseconds
internal interface SinkSource<T : Any> :
Sink<T>,
Source<T>
/**
* Implementation of Deephaven's [Sink] and [Source] that stores data in an [ArrayList].
*
* The implementation is based on [Writing Your Own Data Sinks](https://github.com/deephaven/deephaven-csv/blob/main/ADVANCED.md).
*
* If we ever store column data unboxed / primitively, this needs to be modified.
*/
@Suppress("UNCHECKED_CAST")
internal class ListSink(val columnIndex: Int, val dataType: DataType) : SinkSource<Any> {
@Suppress("ktlint:standard:comment-wrapping", "ktlint:standard:no-consecutive-comments")
companion object {
val SINK_FACTORY: SinkFactory = SinkFactory.of(
// unused in Parsers.DEFAULT:
/* byteSinkSupplier = */ { ListSink(it, BYTE) as SinkSource<ByteArray> },
/* shortSinkSupplier = */ { ListSink(it, SHORT) as SinkSource<ShortArray> },
/* intSinkSupplier = */ { ListSink(it, INT) as SinkSource<IntArray> },
/* longSinkSupplier = */ { ListSink(it, LONG) as SinkSource<LongArray> },
// unused in Parsers.COMPLETE and Parsers.DEFAULT:
/* floatSinkSupplier = */ { ListSink(it, FLOAT) as SinkSource<FloatArray> },
/* doubleSinkSupplier = */ { ListSink(it, DOUBLE) as SinkSource<DoubleArray> },
/* booleanAsByteSinkSupplier = */ { ListSink(it, BOOLEAN_AS_BYTE) as SinkSource<ByteArray> },
/* charSinkSupplier = */ { ListSink(it, CHAR) as SinkSource<CharArray> },
/* stringSinkSupplier = */ { ListSink(it, STRING) as SinkSource<Array<String>> },
/* dateTimeAsLongSinkSupplier = */ { ListSink(it, DATETIME_AS_LONG) as SinkSource<LongArray> },
// unused in Parsers.COMPLETE and Parsers.DEFAULT:
/* timestampAsLongSinkSupplier = */ { ListSink(it, TIMESTAMP_AS_LONG) as SinkSource<LongArray> },
)
}
private val _data: MutableList<Any?> = ArrayList(1000)
val data: List<Any?>
get() = _data
var hasNulls: Boolean = false
private set
private fun getValue(src: Any, srcIndex: Int, isNull: BooleanArray): Any? =
if (isNull[srcIndex]) {
hasNulls = true
null
} else {
when (dataType) {
BOOLEAN_AS_BYTE -> (src as ByteArray)[srcIndex] == 1.toByte()
// unused in Parsers.DEFAULT
BYTE -> (src as ByteArray)[srcIndex]
// unused in Parsers.DEFAULT
SHORT -> (src as ShortArray)[srcIndex]
INT -> (src as IntArray)[srcIndex]
LONG -> (src as LongArray)[srcIndex]
// unused in Parsers.COMPLETE and Parsers.DEFAULT
FLOAT -> (src as FloatArray)[srcIndex]
DOUBLE -> (src as DoubleArray)[srcIndex]
CHAR -> (src as CharArray)[srcIndex]
STRING -> (src as Array<String>)[srcIndex]
DATETIME_AS_LONG -> (src as LongArray)[srcIndex].nanoseconds
.toComponents { seconds, nanoseconds ->
LocalDateTime.ofEpochSecond(seconds, nanoseconds, ZoneOffset.UTC)
}.toKotlinLocalDateTime()
// unused in Parsers.COMPLETE and Parsers.DEFAULT
TIMESTAMP_AS_LONG -> (src as LongArray)[srcIndex].nanoseconds
.toComponents { seconds, nanoseconds ->
LocalDateTime.ofEpochSecond(seconds, nanoseconds, ZoneOffset.UTC)
}.toKotlinLocalDateTime()
else -> error("unsupported parser")
}
}
private fun writeAppending(
src: Any,
destBegin: Int,
destEnd: Int,
isNull: BooleanArray,
) {
while (data.size < destBegin) {
_data += null
hasNulls = true
}
for ((srcIndex, _) in (destBegin..<destEnd).withIndex()) {
_data += getValue(src, srcIndex, isNull)
}
}
private fun writeReplacing(
src: Any,
destBegin: Int,
destEnd: Int,
isNull: BooleanArray,
) {
for ((srcIndex, destIndex) in (destBegin..<destEnd).withIndex()) {
_data[destIndex] = getValue(src, srcIndex, isNull)
}
}
override fun write(
src: Any,
isNull: BooleanArray,
destBegin: Long,
destEnd: Long,
appending: Boolean,
) {
if (destBegin == destEnd) return
val destBeginAsInt = destBegin.toInt()
val destEndAsInt = destEnd.toInt()
if (appending) {
writeAppending(src = src, destBegin = destBeginAsInt, destEnd = destEndAsInt, isNull = isNull)
} else {
writeReplacing(src = src, destBegin = destBeginAsInt, destEnd = destEndAsInt, isNull = isNull)
}
}
override fun read(
dest: Any,
isNull: BooleanArray,
srcBegin: Long,
srcEnd: Long,
) {
if (srcBegin == srcEnd) return
val srcBeginAsInt = srcBegin.toInt()
val srcEndAsInt = srcEnd.toInt()
when (dataType) {
BYTE -> {
dest as ByteArray
for ((srcIndex, destIndex) in (srcBeginAsInt..<srcEndAsInt).withIndex()) {
val value = data[srcIndex] as Byte?
if (value != null) dest[destIndex] = value
isNull[destIndex] = value == null
}
}
SHORT -> {
dest as ShortArray
for ((srcIndex, destIndex) in (srcBeginAsInt..<srcEndAsInt).withIndex()) {
val value = data[srcIndex] as Short?
if (value != null) dest[destIndex] = value
isNull[destIndex] = value == null
}
}
INT -> {
dest as IntArray
for ((srcIndex, destIndex) in (srcBeginAsInt..<srcEndAsInt).withIndex()) {
val value = data[srcIndex] as Int?
if (value != null) dest[destIndex] = value
isNull[destIndex] = value == null
}
}
LONG -> {
dest as LongArray
for ((srcIndex, destIndex) in (srcBeginAsInt..<srcEndAsInt).withIndex()) {
val value = data[srcIndex] as Long?
if (value != null) dest[destIndex] = value
isNull[destIndex] = value == null
}
}
// Deephaven's fast path for numeric type inference supports only byte, short, int, and long
// so this should never be reached
else -> error("unsupported sink state")
}
}
override fun getUnderlying(): ListSink = this
}
@@ -0,0 +1,448 @@
@file:JvmName("ReadDelimDeephavenKt")
package org.jetbrains.kotlinx.dataframe.impl.io
import io.deephaven.csv.CsvSpecs
import io.deephaven.csv.parsers.DataType
import io.deephaven.csv.parsers.DataType.BOOLEAN_AS_BYTE
import io.deephaven.csv.parsers.DataType.BYTE
import io.deephaven.csv.parsers.DataType.CHAR
import io.deephaven.csv.parsers.DataType.DATETIME_AS_LONG
import io.deephaven.csv.parsers.DataType.DOUBLE
import io.deephaven.csv.parsers.DataType.FLOAT
import io.deephaven.csv.parsers.DataType.INT
import io.deephaven.csv.parsers.DataType.LONG
import io.deephaven.csv.parsers.DataType.SHORT
import io.deephaven.csv.parsers.DataType.STRING
import io.deephaven.csv.parsers.DataType.TIMESTAMP_AS_LONG
import io.deephaven.csv.parsers.Parser
import io.deephaven.csv.parsers.Parsers
import io.deephaven.csv.reading.CsvReader
import io.deephaven.csv.util.CsvReaderException
import kotlinx.datetime.LocalDate
import kotlinx.datetime.LocalDateTime
import kotlinx.datetime.LocalTime
import org.apache.commons.io.input.BOMInputStream
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.api.convertTo
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import org.jetbrains.kotlinx.dataframe.api.parse
import org.jetbrains.kotlinx.dataframe.api.parser
import org.jetbrains.kotlinx.dataframe.api.tryParse
import org.jetbrains.kotlinx.dataframe.columns.ValueColumn
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_SPECS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ALLOW_MISSING_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CHARSET
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COL_TYPES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMPRESSION
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FIXED_COLUMN_WIDTHS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HAS_FIXED_WIDTH_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EMPTY_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EXCESS_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_SURROUNDING_SPACES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INPUT_STREAM_READ
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSER_OPTIONS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSE_PARALLEL
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.READ_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.SKIP_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TRIM_INSIDE_QUOTED
import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
import org.jetbrains.kotlinx.dataframe.io.AdjustCsvSpecs
import org.jetbrains.kotlinx.dataframe.io.ColType
import org.jetbrains.kotlinx.dataframe.io.Compression
import org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS
import org.jetbrains.kotlinx.dataframe.io.skippingBomCharacters
import org.jetbrains.kotlinx.dataframe.io.toKType
import org.jetbrains.kotlinx.dataframe.io.useDecompressed
import java.io.InputStream
import java.math.BigDecimal
import java.math.BigInteger
import java.net.URL
import java.nio.charset.Charset
import kotlin.reflect.KType
import kotlin.reflect.full.withNullability
import kotlin.reflect.typeOf
import kotlin.time.Duration
import kotlin.time.Instant as StdlibInstant
import kotlinx.datetime.Instant as DeprecatedInstant
/**
* Implementation to read delimiter-separated data from an [InputStream] based on the Deephaven CSV library.
*
* @param inputStream Represents the file to read.
* Use [charset] to specify the encoding.
* @param charset The [character set][java.nio.charset.Charset] the input is encoded in.
* Default: `null`
*
* If `null`, the Charset will be read from the BOM of the provided input,
* defaulting to [UTF-8][Charsets.UTF_8] if no BOM is found.
* @param delimiter The field delimiter character. The default is ',' for CSV, 't' for TSV.
* @param header Optional column titles. Default: empty list.
*
* If non-empty, the data will be read with [header] as the column titles
* (use [skipLines] if there's a header in the data).
* If empty (default), the header will be read from the data.
* @param colTypes The expected [ColType] per column name. Default: empty map, a.k.a. infer every column type.
*
* If supplied for a certain column name (inferred from data or given by [header]),
* the parser will parse the column with the specified name as the specified type, else it will infer the type.
*
* e.g. `colTypes = `[mapOf][mapOf]`("colName" `[to][to]` `[ColType][ColType]`.`[Int][ColType.Int]`)`.
* You can also set [ColType][ColType]`.`[DEFAULT][ColType.DEFAULT]` `[to][to]` `[ColType][ColType]`.X`
* to set a _default_ column type, like [ColType.String].
* @param skipLines The number of lines to skip before reading the header and data. Default: `0`.
*
* Useful for files with metadata, or comments at the beginning, or to give a custom [header].
* @param readLines The maximum number of lines to read from the data. Default: `null`.
*
* If `null`, all lines will be read.
* @param hasFixedWidthColumns Whether the data has fixed-width columns instead of a single delimiter.
* Default: `false`.
*
* Fixed-width columns can occur, for instance, in multi-space delimited data, where the columns are separated
* by multiple spaces instead of a single delimiter, so columns are visually aligned.
* Column widths are determined by the header in the data (if present), or manually by setting
* [fixedColumnWidths].
* @param fixedColumnWidths The fixed column widths. Default: empty list.
*
* Requires [hasFixedWidthColumns]. If empty, the column widths will be determined by the header in the data
* (if present), else, this manually sets the column widths.
* The number of widths should match the number of columns.
* @param parserOptions Optional [parsing options][ParserOptions] for columns initially read as [String].
* Default, `null`.
*
* Can configure locale, date format, double parsing, skipping types, etc.
*
* If [parserOptions] or any of the arguments are `null`, the global parser configuration
* ([DataFrame.parser][DataFrame.Companion.parser]) will be queried.
*
* The only exceptions are:
* - [nullStrings][ParserOptions.nullStrings], which, if `null`,
* will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS].
* - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to
* the given types or the global setting.
* @param ignoreEmptyLines Whether to skip intermediate empty lines. Default: `false`.
*
* If `false`, empty lines will be interpreted as having _empty_ values if [allowMissingColumns].
* @param allowMissingColumns Whether to allow rows with fewer columns than the header. Default: `true`.
*
* If `true`, rows that are too short will be interpreted as _empty_ values.
* @param ignoreExcessColumns Whether to ignore rows with more columns than the header. Default: `true`.
*
* If `true`, rows that are too long will have those columns dropped.
* @param quote The quote character. Default: `"`.
*
* Used when field- or line delimiters should be interpreted as literal text.
*
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
* @param ignoreSurroundingSpaces Whether to ignore leading and trailing blanks around non-quoted fields.
* Default: `true`.
* @param trimInsideQuoted Whether to ignore leading and trailing blanks inside quoted fields.
* Default: `false`.
* @param parseParallel Whether to parse the data in parallel. Default: `true`.
*
* If `true`, the data will be read and parsed in parallel by the Deephaven parser.
* This is usually faster but can be turned off for debugging.
* @param compression The compression of the data.
* Default: [Compression.None], unless detected otherwise from the input file or url.
* @param adjustCsvSpecs Optional extra [CsvSpecs] configuration. Default: `{ it }`.
*
* Before instantiating the [CsvSpecs], the [CsvSpecs.Builder] will be passed to this lambda.
* This will allow you to configure/overwrite any CSV / TSV parsing options.
*/
internal fun readDelimImpl(
inputStream: InputStream,
delimiter: Char,
header: List<String>,
charset: Charset?,
hasFixedWidthColumns: Boolean,
fixedColumnWidths: List<Int>,
colTypes: Map<String, ColType>,
skipLines: Long,
readLines: Long?,
parserOptions: ParserOptions?,
ignoreEmptyLines: Boolean,
allowMissingColumns: Boolean,
ignoreExcessColumns: Boolean,
quote: Char,
ignoreSurroundingSpaces: Boolean,
trimInsideQuoted: Boolean,
parseParallel: Boolean,
compression: Compression<*>,
adjustCsvSpecs: AdjustCsvSpecs,
): DataFrame<*> {
// set up the csv specs
val csvSpecs = with(CsvSpecs.builder()) {
customDoubleParser(DataFrameCustomDoubleParser(parserOptions))
// use the given nullStrings if provided, else take the global ones + some extras
val nullStrings = parserOptions?.nullStrings ?: (DataFrame.parser.nulls + DEFAULT_DELIM_NULL_STRINGS)
nullValueLiterals(nullStrings)
headerLegalizer(::legalizeHeader)
numRows(readLines ?: Long.MAX_VALUE)
ignoreEmptyLines(ignoreEmptyLines)
allowMissingColumns(allowMissingColumns)
ignoreExcessColumns(ignoreExcessColumns)
if (!hasFixedWidthColumns) delimiter(delimiter)
quote(quote)
ignoreSurroundingSpaces(ignoreSurroundingSpaces)
trim(trimInsideQuoted)
concurrent(parseParallel)
header(header)
hasFixedWidthColumns(hasFixedWidthColumns)
if (hasFixedWidthColumns && fixedColumnWidths.isNotEmpty()) fixedColumnWidths(fixedColumnWidths)
skipLines(takeHeaderFromCsv = header.isEmpty(), skipLines = skipLines)
parsers(parserOptions, colTypes)
adjustCsvSpecs(this, this)
}.build()
val csvReaderResult = inputStream.useDecompressed(compression) { decompressedInputStream ->
// read the csv
try {
val deBommedInputString = decompressedInputStream.skippingBomCharacters()
// choose charset like: provided? -> from BOM? -> UTF-8
val streamCharset = charset
?: (deBommedInputString as? BOMInputStream)?.bom?.let { Charset.forName(it.charsetName) }
?: Charsets.UTF_8
@Suppress("ktlint:standard:comment-wrapping")
CsvReader.read(
/* specs = */ csvSpecs,
/* stream = */ deBommedInputString,
/* streamCharset = */ streamCharset,
/* sinkFactory = */ ListSink.SINK_FACTORY,
)
} catch (e: CsvReaderException) {
// catch case when the file is empty and header needs to be inferred from it.
if (e.message ==
"Can't proceed because hasHeaderRow is set but input file is empty or shorter than skipHeaderRows"
) {
return@readDelimImpl DataFrame.empty()
}
throw IllegalStateException(
"Could not read delimiter-separated data: CsvReaderException: ${e.message}: ${e.cause?.message ?: ""}",
e,
)
}
}
val defaultColType = colTypes[ColType.DEFAULT]
// convert each ResultColumn to a DataColumn
val cols = csvReaderResult.map {
it.toDataColumn(
parserOptions = parserOptions,
desiredColType = colTypes[it.name()] ?: defaultColType,
)
}
return dataFrameOf(cols)
}
@Suppress("UNCHECKED_CAST")
private fun CsvReader.ResultColumn.toDataColumn(
parserOptions: ParserOptions?,
desiredColType: ColType?,
): DataColumn<*> {
val listSink = data()!! as ListSink
val columnData = listSink.data
val dataType = listSink.dataType
val hasNulls = listSink.hasNulls
val type = dataType().toKType().withNullability(hasNulls)
val column = DataColumn.createValueColumn(
name = name(),
values = columnData,
type = type,
)
if (dataType != STRING) return column
// attempt to perform additional parsing if necessary, will remain String if it fails
column as ValueColumn<String?>
return when {
desiredColType != null ->
column.convertTo(
newType = desiredColType.toKType().withNullability(true),
parserOptions = parserOptions,
)
else -> {
val givenSkipTypes = parserOptions?.skipTypes ?: DataFrame.parser.skipTypes
// no need to check for types that Deephaven already parses, skip those too
val adjustedSkipTypes = givenSkipTypes + typesDeephavenAlreadyParses
val adjustedParserOptions = (parserOptions ?: ParserOptions())
.copy(skipTypes = adjustedSkipTypes)
column.tryParse(adjustedParserOptions)
}
}
}
private fun DataType?.toKType(): KType =
when (this) {
BOOLEAN_AS_BYTE -> typeOf<Boolean>()
// unused in Parsers.DEFAULT
BYTE -> typeOf<Byte>()
// unused in Parsers.DEFAULT
SHORT -> typeOf<Short>()
INT -> typeOf<Int>()
LONG -> typeOf<Long>()
// unused in Parsers.COMPLETE and Parsers.DEFAULT
FLOAT -> typeOf<Float>()
DOUBLE -> typeOf<Double>()
DATETIME_AS_LONG -> typeOf<LocalDateTime>()
CHAR -> typeOf<Char>()
STRING -> typeOf<String>()
// unused in Parsers.COMPLETE and Parsers.DEFAULT
TIMESTAMP_AS_LONG -> typeOf<LocalDateTime>()
DataType.CUSTOM -> error("custom data type")
null -> error("null data type")
}
private fun legalizeHeader(header: Array<String>): Array<String> {
val generator = ColumnNameGenerator()
return header.map { generator.addUnique(it) }.toTypedArray()
}
private fun CsvSpecs.Builder.skipLines(takeHeaderFromCsv: Boolean, skipLines: Long): CsvSpecs.Builder =
if (takeHeaderFromCsv) {
skipHeaderRows(skipLines)
} else {
skipRows(skipLines)
}
/**
* Sets the correct parsers for the csv, based on [colTypes] and [ParserOptions.skipTypes].
* If [ColType.DEFAULT] is present, it sets the default parser.
*
* Logic overview:
*
* - if no [colTypes] are given
* - let deephaven use all its [default parsers][Parsers.DEFAULT] minus [Parsers.DATETIME]
* - subtract parsers of [skipTypes][ParserOptions.skipTypes] if those are supplied
* - if [colTypes] are supplied
* - if [ColType.DEFAULT] is among the values
* - set the parser for each supplied column+colType
* - let deephaven use _only_ the parser given as [ColType.DEFAULT] type
* - if [ColType.DEFAULT] is not among the values
* - set the parser for each supplied column+coltype
* - let deephaven use all its [default parsers][Parsers.DEFAULT] minus [Parsers.DATETIME]
* - subtract parsers of [skipTypes][ParserOptions.skipTypes] if those are supplied
*
* We will not use [Deephaven's DateTime parser][Parsers.DATETIME].
* This is done to avoid different behavior compared to [DataFrame.parse];
* Deephaven parses [Instant] as [LocalDateTime]. [Issue #1047](https://github.com/Kotlin/dataframe/issues/1047)
*
* Note that `skipTypes` will never skip a type explicitly set by `colTypes`.
* This is intended.
*/
private fun CsvSpecs.Builder.parsers(parserOptions: ParserOptions?, colTypes: Map<String, ColType>): CsvSpecs.Builder {
for ((colName, colType) in colTypes) {
if (colName == ColType.DEFAULT) continue
putParserForName(colName, colType.toCsvParser())
}
// BOOLEAN, INT, LONG, DOUBLE, CHAR, STRING
val defaultParsers = Parsers.DEFAULT - Parsers.DATETIME
val skipTypes = parserOptions?.skipTypes ?: DataFrame.parser.skipTypes
val parsersToUse = when {
ColType.DEFAULT in colTypes ->
listOf(colTypes[ColType.DEFAULT]!!.toCsvParser(), Parsers.STRING)
skipTypes.isNotEmpty() -> {
val parsersToSkip = skipTypes
.mapNotNull { it.toColType().toCsvParserOrNull() }
defaultParsers.toSet() - parsersToSkip.toSet()
}
else -> defaultParsers
}
parsers(parsersToUse)
return this
}
private fun CsvSpecs.Builder.header(header: List<String>): CsvSpecs.Builder =
if (header.isEmpty()) {
// take header from csv
hasHeaderRow(true)
} else {
hasHeaderRow(false)
.headers(header)
}
/**
* Converts a [ColType] to a [Parser] from the Deephaven CSV library.
* If no direct [Parser] exists, it returns `null`.
*/
internal fun ColType.toCsvParserOrNull(): Parser<*>? =
when (this) {
ColType.Int -> Parsers.INT
ColType.Long -> Parsers.LONG
ColType.Double -> Parsers.DOUBLE
ColType.Char -> Parsers.CHAR
ColType.Boolean -> Parsers.BOOLEAN
ColType.String -> Parsers.STRING
else -> null
}
/**
* Converts a [ColType] to a [Parser] from the Deephaven CSV library.
* If no direct [Parser] exists, it defaults to [Parsers.STRING] so that [DataFrame.parse] can handle it.
*/
internal fun ColType.toCsvParser(): Parser<*> = toCsvParserOrNull() ?: Parsers.STRING
internal fun KType.toColType(): ColType =
when (this.withNullability(false)) {
typeOf<Int>() -> ColType.Int
typeOf<Long>() -> ColType.Long
typeOf<Double>() -> ColType.Double
typeOf<Boolean>() -> ColType.Boolean
typeOf<BigDecimal>() -> ColType.BigDecimal
typeOf<BigInteger>() -> ColType.BigInteger
typeOf<LocalDate>() -> ColType.LocalDate
typeOf<LocalTime>() -> ColType.LocalTime
typeOf<LocalDateTime>() -> ColType.LocalDateTime
typeOf<String>() -> ColType.String
typeOf<DeprecatedInstant>() -> ColType.DeprecatedInstant
typeOf<StdlibInstant>() -> ColType.StdlibInstant
typeOf<Duration>() -> ColType.Duration
typeOf<URL>() -> ColType.Url
typeOf<DataFrame<*>>() -> ColType.JsonArray
typeOf<DataRow<*>>() -> ColType.JsonObject
typeOf<Char>() -> ColType.Char
else -> ColType.String
}
/**
* Types that Deephaven already parses, so we can skip them when
* defaulting to DataFrame's String parsers.
*
* [LocalDateTime] and [java.time.LocalDateTime] are not included because Deephaven cannot recognize all formats.
*/
internal val typesDeephavenAlreadyParses: Set<KType> =
setOf(
typeOf<Int>(),
typeOf<Long>(),
typeOf<Double>(),
typeOf<Char>(),
typeOf<Boolean>(),
)
@@ -0,0 +1,112 @@
@file:JvmName("WriteDelimDeephavenKt")
package org.jetbrains.kotlinx.dataframe.impl.io
import org.apache.commons.csv.CSVFormat
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.AnyRow
import org.jetbrains.kotlinx.dataframe.api.forEach
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CSV_DELIMITER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.WRITER_WRITE
import org.jetbrains.kotlinx.dataframe.io.AdjustCSVFormat
import org.jetbrains.kotlinx.dataframe.io.QuoteMode
import org.jetbrains.kotlinx.dataframe.io.toJson
import org.apache.commons.csv.QuoteMode as ApacheQuoteMode
/**
* Writes [df] to [writer] in a delimiter-separated format.
*
* @param df The data to write.
* @param writer The [Appendable] to write to.
* @param delimiter The field delimiter character. Default: ','.
*
* Ignored if [hasFixedWidthColumns] is `true`.
* @param includeHeader Whether to include the header in the output. Default: `true`.
* @param quote The quote character. Default: `"`.
*
* Used when field- or line delimiters should be interpreted as literal text.
*
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
* Default: `null`. This will double-quote the value.
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
* Default: `'#'`.
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
* Default: empty list.
* @param recordSeparator The character that separates records in a CSV / TSV file.
* Default: `'\n'`, a Unix-newline.
* @param adjustCsvFormat Optional extra [CSVFormat] configuration. Default: `{ it }`.
*
* Before instantiating the [CSVFormat], the [CSVFormat.Builder] will be passed to this lambda.
* This will allow you to configure/overwrite any CSV / TSV writing options.
*/
internal fun writeDelimImpl(
df: AnyFrame,
writer: Appendable,
delimiter: Char,
includeHeader: Boolean,
quote: Char?,
quoteMode: QuoteMode,
escapeChar: Char?,
commentChar: Char?,
headerComments: List<String>,
recordSeparator: String,
adjustCsvFormat: AdjustCSVFormat,
) {
// setup CSV format
val format = with(CSVFormat.Builder.create(CSVFormat.DEFAULT)) {
setDelimiter(delimiter)
setQuote(quote)
setSkipHeaderRecord(!includeHeader)
setQuoteMode(quoteMode.toApache())
setRecordSeparator(recordSeparator)
setEscape(escapeChar)
setCommentMarker(commentChar)
setHeaderComments(*headerComments.toTypedArray())
}.let { adjustCsvFormat(it, it) }
.get()
// let the format handle the writing, only converting AnyRow and AnyFrame to JSON
format.print(writer).use { printer ->
if (includeHeader) {
printer.printRecord(df.columnNames())
}
df.forEach {
val values = it.values().map {
when (it) {
is AnyRow -> try {
it.toJson()
} catch (_: NoClassDefFoundError) {
error(
"Encountered a DataRow value when writing to csv/tsv/delim. It must be serialized to JSON, requiring the 'dataframe-json' dependency.",
)
}
is AnyFrame -> try {
it.toJson()
} catch (_: NoClassDefFoundError) {
error(
"Encountered a DataFrame value when writing to csv/tsv/delim. It must be serialized to JSON, requiring the 'dataframe-json' dependency.",
)
}
else -> it
}
}
printer.printRecord(values)
}
}
}
internal fun QuoteMode.toApache(): ApacheQuoteMode =
when (this) {
QuoteMode.ALL -> ApacheQuoteMode.ALL
QuoteMode.MINIMAL -> ApacheQuoteMode.MINIMAL
QuoteMode.NON_NUMERIC -> ApacheQuoteMode.NON_NUMERIC
QuoteMode.NONE -> ApacheQuoteMode.NONE
QuoteMode.ALL_NON_NULL -> ApacheQuoteMode.ALL_NON_NULL
}
@@ -0,0 +1,26 @@
package org.jetbrains.kotlinx.dataframe.io
/** Defines quoting behavior. */
public enum class QuoteMode {
/** Quotes all fields. */
ALL,
/** Quotes all non-null fields. */
ALL_NON_NULL,
/**
* Quotes fields that contain special characters such as a field delimiter, quote character, or any of the
* characters in the line separator string.
*/
MINIMAL,
/** Quotes all non-numeric fields. */
NON_NUMERIC,
/**
* Never quotes fields. When the delimiter occurs in data, the printer prefixes it with the escape character. If the
* escape character is not set, format validation throws an exception.
*/
NONE,
}
@@ -0,0 +1,39 @@
@file:JvmName("CsvDeephavenKt")
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.codeGen.AbstractDefaultReadMethod
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams
import java.io.File
import java.io.InputStream
import java.nio.file.Path
import kotlin.reflect.typeOf
public class CsvDeephaven(private val delimiter: Char = DelimParams.CSV_DELIMITER) : SupportedDataFrameFormat {
override fun readDataFrame(stream: InputStream, header: List<String>): DataFrame<*> =
DataFrame.readCsv(inputStream = stream, header = header, delimiter = delimiter)
override fun readDataFrame(file: File, header: List<String>): DataFrame<*> =
DataFrame.readCsv(file = file, header = header, delimiter = delimiter)
override fun readDataFrame(path: Path, header: List<String>): DataFrame<*> =
DataFrame.readCsv(path = path, delimiter = delimiter, header = header)
override fun acceptsExtension(ext: String): Boolean = ext == "csv"
override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough
override val testOrder: Int = 20_000
override fun createDefaultReadMethod(pathRepresentation: String?): DefaultReadDfMethod {
val arguments = MethodArguments().add("delimiter", typeOf<Char>(), "'%L'", delimiter)
return DefaultReadCsvMethod(pathRepresentation, arguments)
}
}
private const val READ_CSV = "readCsv"
internal class DefaultReadCsvMethod(path: String?, arguments: MethodArguments) :
AbstractDefaultReadMethod(path, arguments, READ_CSV)
@@ -0,0 +1,175 @@
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonReadDelimDocs
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_SPECS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ALLOW_MISSING_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COL_TYPES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CSV_DELIMITER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FIXED_COLUMN_WIDTHS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HAS_FIXED_WIDTH_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EMPTY_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EXCESS_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_SURROUNDING_SPACES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSER_OPTIONS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSE_PARALLEL
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.READ_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.SKIP_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TEXT_READ
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TRIM_INSIDE_QUOTED
import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl
/**
* ### Read CSV String to [DataFrame]
*
* Reads any CSV [String] to a [DataFrame][DataFrame].
*
* Parameters you can use to customize the reading process include, for instance, [delimiter],
* [header], [colTypes], [readLines], and [parserOptions].
* See the param list below for all settings.
*
* The integration is built upon [Deephaven CSV](https://github.com/deephaven/deephaven-csv).
*
* ##### Similar Functions
* With the overloads of [DataFrame.readCsv][readCsv]`()`, you can read any CSV by [File][File],
* [Path][java.nio.file.Path], [URL][URL], or [InputStream][InputStream].
* Reading by file path or URL can also be done by passing a [String].
*
* For example, [DataFrame.readCsv][readCsv]`("input.csv")` or with some options:
*
* [DataFrame.readCsv][readCsv]`(`
*
* &nbsp;&nbsp;&nbsp;&nbsp;`file = `[File][File]`("input.csv"),`
*
* &nbsp;&nbsp;&nbsp;&nbsp;`parserOptions = `[ParserOptions][org.jetbrains.kotlinx.dataframe.api.ParserOptions]`(locale = `[Locale][java.util.Locale]`.`[US][java.util.Locale.US]`),`
*
* &nbsp;&nbsp;&nbsp;&nbsp;`colTypes = `[mapOf][mapOf]`("a" `[to][to]` `[ColType][ColType]`.`[Int][ColType.Int]`, `[ColType][ColType]`.`[DEFAULT][ColType.DEFAULT]` `[to][to]` `[ColType][ColType]`.`[String][ColType.String]`),`
*
* &nbsp;&nbsp;&nbsp;&nbsp;`readLines = 1000L,`
*
* `)`
*
* ZIP (.zip) or GZIP (.gz) files are supported by default. [compression] is automatically detected.
*
* You can also read "raw" CSV data from a [String] like this:
*
* [DataFrame.readCsvStr][readCsvStr]`("a,b,c", delimiter = ",")`
*
* @param text The raw data to read in the form of a [String].
* @param delimiter The field delimiter character. Default: ','.
*
* Ignored if [hasFixedWidthColumns] is `true`.
* @param charset The [character set][java.nio.charset.Charset] the input is encoded in.
* Default: `null`
*
* If `null`, the Charset will be read from the BOM of the provided input,
* defaulting to [UTF-8][Charsets.UTF_8] if no BOM is found.
* @param header Optional column titles. Default: empty list.
*
* If non-empty, the data will be read with [header] as the column titles
* (use [skipLines] if there's a header in the data).
* If empty (default), the header will be read from the data.
* @param hasFixedWidthColumns Whether the data has fixed-width columns instead of a single delimiter.
* Default: `false`.
*
* Fixed-width columns can occur, for instance, in multi-space delimited data, where the columns are separated
* by multiple spaces instead of a single delimiter, so columns are visually aligned.
* Column widths are determined by the header in the data (if present), or manually by setting
* [fixedColumnWidths].
* @param fixedColumnWidths The fixed column widths. Default: empty list.
*
* Requires [hasFixedWidthColumns]. If empty, the column widths will be determined by the header in the data
* (if present), else, this manually sets the column widths.
* The number of widths should match the number of columns.
* @param colTypes The expected [ColType] per column name. Default: empty map, a.k.a. infer every column type.
*
* If supplied for a certain column name (inferred from data or given by [header]),
* the parser will parse the column with the specified name as the specified type, else it will infer the type.
*
* e.g. `colTypes = `[mapOf][mapOf]`("colName" `[to][to]` `[ColType][ColType]`.`[Int][ColType.Int]`)`.
* You can also set [ColType][ColType]`.`[DEFAULT][ColType.DEFAULT]` `[to][to]` `[ColType][ColType]`.X`
* to set a _default_ column type, like [ColType.String].
* @param skipLines The number of lines to skip before reading the header and data. Default: `0`.
*
* Useful for files with metadata, or comments at the beginning, or to give a custom [header].
* @param readLines The maximum number of lines to read from the data. Default: `null`.
*
* If `null`, all lines will be read.
* @param parserOptions Optional [parsing options][ParserOptions] for columns initially read as [String].
* Default, `null`.
*
* Can configure locale, date format, double parsing, skipping types, etc.
*
* If [parserOptions] or any of the arguments are `null`, the global parser configuration
* ([DataFrame.parser][DataFrame.Companion.parser]) will be queried.
*
* The only exceptions are:
* - [nullStrings][ParserOptions.nullStrings], which, if `null`,
* will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS].
* - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to
* the given types or the global setting.
* @param ignoreEmptyLines Whether to skip intermediate empty lines. Default: `false`.
*
* If `false`, empty lines will be interpreted as having _empty_ values if [allowMissingColumns].
* @param allowMissingColumns Whether to allow rows with fewer columns than the header. Default: `true`.
*
* If `true`, rows that are too short will be interpreted as _empty_ values.
* @param ignoreExcessColumns Whether to ignore rows with more columns than the header. Default: `true`.
*
* If `true`, rows that are too long will have those columns dropped.
* @param quote The quote character. Default: `"`.
*
* Used when field- or line delimiters should be interpreted as literal text.
*
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
* @param ignoreSurroundingSpaces Whether to ignore leading and trailing blanks around non-quoted fields.
* Default: `true`.
* @param trimInsideQuoted Whether to ignore leading and trailing blanks inside quoted fields.
* Default: `false`.
* @param parseParallel Whether to parse the data in parallel. Default: `true`.
*
* If `true`, the data will be read and parsed in parallel by the Deephaven parser.
* This is usually faster but can be turned off for debugging.
*/
public fun DataFrame.Companion.readCsvStr(
text: String,
delimiter: Char = CSV_DELIMITER,
header: List<String> = HEADER,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
): DataFrame<*> =
readDelimImpl(
inputStream = text.byteInputStream(),
charset = Charsets.UTF_8,
delimiter = delimiter,
header = header,
hasFixedWidthColumns = hasFixedWidthColumns,
fixedColumnWidths = fixedColumnWidths,
compression = Compression.None, // of course
colTypes = colTypes,
skipLines = skipLines,
readLines = readLines,
parserOptions = parserOptions,
ignoreEmptyLines = ignoreEmptyLines,
allowMissingColumns = allowMissingColumns,
ignoreExcessColumns = ignoreExcessColumns,
quote = quote,
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
@@ -0,0 +1,175 @@
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonReadDelimDocs
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_SPECS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ALLOW_MISSING_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COL_TYPES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.DELIM_DELIMITER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FIXED_COLUMN_WIDTHS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HAS_FIXED_WIDTH_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EMPTY_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EXCESS_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_SURROUNDING_SPACES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSER_OPTIONS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSE_PARALLEL
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.READ_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.SKIP_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TEXT_READ
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TRIM_INSIDE_QUOTED
import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl
/**
* ### Read Delimiter-Separated Text String to [DataFrame]
*
* Reads any delimiter-separated text [String] to a [DataFrame][DataFrame].
*
* Parameters you can use to customize the reading process include, for instance, [delimiter],
* [header], [colTypes], [readLines], and [parserOptions].
* See the param list below for all settings.
*
* The integration is built upon [Deephaven CSV](https://github.com/deephaven/deephaven-csv).
*
* ##### Similar Functions
* With the overloads of [DataFrame.readDelim][readDelim]`()`, you can read any delimiter-separated text by [File][File],
* [Path][java.nio.file.Path], [URL][URL], or [InputStream][InputStream].
* Reading by file path or URL can also be done by passing a [String].
*
* For example, [DataFrame.readDelim][readDelim]`("input.txt")` or with some options:
*
* [DataFrame.readDelim][readDelim]`(`
*
* &nbsp;&nbsp;&nbsp;&nbsp;`file = `[File][File]`("input.txt"),`
*
* &nbsp;&nbsp;&nbsp;&nbsp;`parserOptions = `[ParserOptions][org.jetbrains.kotlinx.dataframe.api.ParserOptions]`(locale = `[Locale][java.util.Locale]`.`[US][java.util.Locale.US]`),`
*
* &nbsp;&nbsp;&nbsp;&nbsp;`colTypes = `[mapOf][mapOf]`("a" `[to][to]` `[ColType][ColType]`.`[Int][ColType.Int]`, `[ColType][ColType]`.`[DEFAULT][ColType.DEFAULT]` `[to][to]` `[ColType][ColType]`.`[String][ColType.String]`),`
*
* &nbsp;&nbsp;&nbsp;&nbsp;`readLines = 1000L,`
*
* `)`
*
* ZIP (.zip) or GZIP (.gz) files are supported by default. [compression] is automatically detected.
*
* You can also read "raw" delimiter-separated text data from a [String] like this:
*
* [DataFrame.readDelimStr][readDelimStr]`("a,b,c", delimiter = ",")`
*
* @param text The raw data to read in the form of a [String].
* @param delimiter The field delimiter character. Default: ','.
*
* Ignored if [hasFixedWidthColumns] is `true`.
* @param charset The [character set][java.nio.charset.Charset] the input is encoded in.
* Default: `null`
*
* If `null`, the Charset will be read from the BOM of the provided input,
* defaulting to [UTF-8][Charsets.UTF_8] if no BOM is found.
* @param header Optional column titles. Default: empty list.
*
* If non-empty, the data will be read with [header] as the column titles
* (use [skipLines] if there's a header in the data).
* If empty (default), the header will be read from the data.
* @param hasFixedWidthColumns Whether the data has fixed-width columns instead of a single delimiter.
* Default: `false`.
*
* Fixed-width columns can occur, for instance, in multi-space delimited data, where the columns are separated
* by multiple spaces instead of a single delimiter, so columns are visually aligned.
* Column widths are determined by the header in the data (if present), or manually by setting
* [fixedColumnWidths].
* @param fixedColumnWidths The fixed column widths. Default: empty list.
*
* Requires [hasFixedWidthColumns]. If empty, the column widths will be determined by the header in the data
* (if present), else, this manually sets the column widths.
* The number of widths should match the number of columns.
* @param colTypes The expected [ColType] per column name. Default: empty map, a.k.a. infer every column type.
*
* If supplied for a certain column name (inferred from data or given by [header]),
* the parser will parse the column with the specified name as the specified type, else it will infer the type.
*
* e.g. `colTypes = `[mapOf][mapOf]`("colName" `[to][to]` `[ColType][ColType]`.`[Int][ColType.Int]`)`.
* You can also set [ColType][ColType]`.`[DEFAULT][ColType.DEFAULT]` `[to][to]` `[ColType][ColType]`.X`
* to set a _default_ column type, like [ColType.String].
* @param skipLines The number of lines to skip before reading the header and data. Default: `0`.
*
* Useful for files with metadata, or comments at the beginning, or to give a custom [header].
* @param readLines The maximum number of lines to read from the data. Default: `null`.
*
* If `null`, all lines will be read.
* @param parserOptions Optional [parsing options][ParserOptions] for columns initially read as [String].
* Default, `null`.
*
* Can configure locale, date format, double parsing, skipping types, etc.
*
* If [parserOptions] or any of the arguments are `null`, the global parser configuration
* ([DataFrame.parser][DataFrame.Companion.parser]) will be queried.
*
* The only exceptions are:
* - [nullStrings][ParserOptions.nullStrings], which, if `null`,
* will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS].
* - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to
* the given types or the global setting.
* @param ignoreEmptyLines Whether to skip intermediate empty lines. Default: `false`.
*
* If `false`, empty lines will be interpreted as having _empty_ values if [allowMissingColumns].
* @param allowMissingColumns Whether to allow rows with fewer columns than the header. Default: `true`.
*
* If `true`, rows that are too short will be interpreted as _empty_ values.
* @param ignoreExcessColumns Whether to ignore rows with more columns than the header. Default: `true`.
*
* If `true`, rows that are too long will have those columns dropped.
* @param quote The quote character. Default: `"`.
*
* Used when field- or line delimiters should be interpreted as literal text.
*
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
* @param ignoreSurroundingSpaces Whether to ignore leading and trailing blanks around non-quoted fields.
* Default: `true`.
* @param trimInsideQuoted Whether to ignore leading and trailing blanks inside quoted fields.
* Default: `false`.
* @param parseParallel Whether to parse the data in parallel. Default: `true`.
*
* If `true`, the data will be read and parsed in parallel by the Deephaven parser.
* This is usually faster but can be turned off for debugging.
*/
public fun DataFrame.Companion.readDelimStr(
text: String,
delimiter: Char = DELIM_DELIMITER,
header: List<String> = HEADER,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
): DataFrame<*> =
readDelimImpl(
inputStream = text.byteInputStream(),
charset = Charsets.UTF_8,
delimiter = delimiter,
header = header,
hasFixedWidthColumns = hasFixedWidthColumns,
fixedColumnWidths = fixedColumnWidths,
compression = Compression.None, // of course
colTypes = colTypes,
skipLines = skipLines,
readLines = readLines,
parserOptions = parserOptions,
ignoreEmptyLines = ignoreEmptyLines,
allowMissingColumns = allowMissingColumns,
ignoreExcessColumns = ignoreExcessColumns,
quote = quote,
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
@@ -0,0 +1,175 @@
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonReadDelimDocs
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_SPECS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ALLOW_MISSING_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COL_TYPES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FIXED_COLUMN_WIDTHS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HAS_FIXED_WIDTH_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EMPTY_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EXCESS_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_SURROUNDING_SPACES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSER_OPTIONS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSE_PARALLEL
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.READ_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.SKIP_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TEXT_READ
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TRIM_INSIDE_QUOTED
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TSV_DELIMITER
import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl
/**
* ### Read TSV String to [DataFrame]
*
* Reads any TSV [String] to a [DataFrame][DataFrame].
*
* Parameters you can use to customize the reading process include, for instance, [delimiter],
* [header], [colTypes], [readLines], and [parserOptions].
* See the param list below for all settings.
*
* The integration is built upon [Deephaven CSV](https://github.com/deephaven/deephaven-csv).
*
* ##### Similar Functions
* With the overloads of [DataFrame.readTsv][readTsv]`()`, you can read any TSV by [File][File],
* [Path][java.nio.file.Path], [URL][URL], or [InputStream][InputStream].
* Reading by file path or URL can also be done by passing a [String].
*
* For example, [DataFrame.readTsv][readTsv]`("input.tsv")` or with some options:
*
* [DataFrame.readTsv][readTsv]`(`
*
* &nbsp;&nbsp;&nbsp;&nbsp;`file = `[File][File]`("input.tsv"),`
*
* &nbsp;&nbsp;&nbsp;&nbsp;`parserOptions = `[ParserOptions][org.jetbrains.kotlinx.dataframe.api.ParserOptions]`(locale = `[Locale][java.util.Locale]`.`[US][java.util.Locale.US]`),`
*
* &nbsp;&nbsp;&nbsp;&nbsp;`colTypes = `[mapOf][mapOf]`("a" `[to][to]` `[ColType][ColType]`.`[Int][ColType.Int]`, `[ColType][ColType]`.`[DEFAULT][ColType.DEFAULT]` `[to][to]` `[ColType][ColType]`.`[String][ColType.String]`),`
*
* &nbsp;&nbsp;&nbsp;&nbsp;`readLines = 1000L,`
*
* `)`
*
* ZIP (.zip) or GZIP (.gz) files are supported by default. [compression] is automatically detected.
*
* You can also read "raw" TSV data from a [String] like this:
*
* [DataFrame.readTsvStr][readTsvStr]`("a,b,c", delimiter = ",")`
*
* @param text The raw data to read in the form of a [String].
* @param delimiter The field delimiter character. Default: '\t'.
*
* Ignored if [hasFixedWidthColumns] is `true`.
* @param charset The [character set][java.nio.charset.Charset] the input is encoded in.
* Default: `null`
*
* If `null`, the Charset will be read from the BOM of the provided input,
* defaulting to [UTF-8][Charsets.UTF_8] if no BOM is found.
* @param header Optional column titles. Default: empty list.
*
* If non-empty, the data will be read with [header] as the column titles
* (use [skipLines] if there's a header in the data).
* If empty (default), the header will be read from the data.
* @param hasFixedWidthColumns Whether the data has fixed-width columns instead of a single delimiter.
* Default: `false`.
*
* Fixed-width columns can occur, for instance, in multi-space delimited data, where the columns are separated
* by multiple spaces instead of a single delimiter, so columns are visually aligned.
* Column widths are determined by the header in the data (if present), or manually by setting
* [fixedColumnWidths].
* @param fixedColumnWidths The fixed column widths. Default: empty list.
*
* Requires [hasFixedWidthColumns]. If empty, the column widths will be determined by the header in the data
* (if present), else, this manually sets the column widths.
* The number of widths should match the number of columns.
* @param colTypes The expected [ColType] per column name. Default: empty map, a.k.a. infer every column type.
*
* If supplied for a certain column name (inferred from data or given by [header]),
* the parser will parse the column with the specified name as the specified type, else it will infer the type.
*
* e.g. `colTypes = `[mapOf][mapOf]`("colName" `[to][to]` `[ColType][ColType]`.`[Int][ColType.Int]`)`.
* You can also set [ColType][ColType]`.`[DEFAULT][ColType.DEFAULT]` `[to][to]` `[ColType][ColType]`.X`
* to set a _default_ column type, like [ColType.String].
* @param skipLines The number of lines to skip before reading the header and data. Default: `0`.
*
* Useful for files with metadata, or comments at the beginning, or to give a custom [header].
* @param readLines The maximum number of lines to read from the data. Default: `null`.
*
* If `null`, all lines will be read.
* @param parserOptions Optional [parsing options][ParserOptions] for columns initially read as [String].
* Default, `null`.
*
* Can configure locale, date format, double parsing, skipping types, etc.
*
* If [parserOptions] or any of the arguments are `null`, the global parser configuration
* ([DataFrame.parser][DataFrame.Companion.parser]) will be queried.
*
* The only exceptions are:
* - [nullStrings][ParserOptions.nullStrings], which, if `null`,
* will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS].
* - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to
* the given types or the global setting.
* @param ignoreEmptyLines Whether to skip intermediate empty lines. Default: `false`.
*
* If `false`, empty lines will be interpreted as having _empty_ values if [allowMissingColumns].
* @param allowMissingColumns Whether to allow rows with fewer columns than the header. Default: `true`.
*
* If `true`, rows that are too short will be interpreted as _empty_ values.
* @param ignoreExcessColumns Whether to ignore rows with more columns than the header. Default: `true`.
*
* If `true`, rows that are too long will have those columns dropped.
* @param quote The quote character. Default: `"`.
*
* Used when field- or line delimiters should be interpreted as literal text.
*
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
* @param ignoreSurroundingSpaces Whether to ignore leading and trailing blanks around non-quoted fields.
* Default: `true`.
* @param trimInsideQuoted Whether to ignore leading and trailing blanks inside quoted fields.
* Default: `false`.
* @param parseParallel Whether to parse the data in parallel. Default: `true`.
*
* If `true`, the data will be read and parsed in parallel by the Deephaven parser.
* This is usually faster but can be turned off for debugging.
*/
public fun DataFrame.Companion.readTsvStr(
text: String,
delimiter: Char = TSV_DELIMITER,
header: List<String> = HEADER,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
): DataFrame<*> =
readDelimImpl(
inputStream = text.byteInputStream(),
charset = Charsets.UTF_8,
delimiter = delimiter,
header = header,
hasFixedWidthColumns = hasFixedWidthColumns,
fixedColumnWidths = fixedColumnWidths,
compression = Compression.None, // of course
colTypes = colTypes,
skipLines = skipLines,
readLines = readLines,
parserOptions = parserOptions,
ignoreEmptyLines = ignoreEmptyLines,
allowMissingColumns = allowMissingColumns,
ignoreExcessColumns = ignoreExcessColumns,
quote = quote,
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
@@ -0,0 +1,83 @@
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMMENT_CHAR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CSV_DELIMITER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ESCAPE_CHAR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER_COMMENTS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INCLUDE_HEADER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE_MODE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.RECORD_SEPARATOR
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
/**
* ### Convert [DataFrame] to CSV String
*
* Converts [this][this] [DataFrame][DataFrame] to a CSV [String].
*
* Parameters you can use to customize the process include, for instance, [delimiter],
* [includeHeader], [quoteMode], and [headerComments].
* See the param list below for all settings.
*
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
*
* ##### Similar Functions
* With overloads of [DataFrame.writeCsv][writeCsv]`()`, you can write CSV to [File][File], [Path][java.nio.file.Path],
* [Appendable], or [String].
*
* For example, [DataFrame.writeCsv][writeCsv]`("output.csv")`
*
* or [DataFrame.writeCsv][writeCsv]`(`[File][File]`("output.csv"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
*
* Converting to a [String] can be done like this:
*
* [DataFrame.toCsvStr][toCsvStr]`(delimiter = ",")`
*
* @param delimiter The field delimiter character. Default: ','.
*
* Ignored if [hasFixedWidthColumns] is `true`.
* @param includeHeader Whether to include the header in the output. Default: `true`.
* @param quote The quote character. Default: `"`.
*
* Used when field- or line delimiters should be interpreted as literal text.
*
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
* Default: `null`. This will double-quote the value.
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
* Default: `'#'`.
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
* Default: empty list.
* @param recordSeparator The character that separates records in a CSV / TSV file.
* Default: `'\n'`, a Unix-newline.
*/
public fun AnyFrame.toCsvStr(
delimiter: Char = CSV_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
): String =
buildString {
writeDelimImpl(
df = this@toCsvStr,
writer = this,
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = ADJUST_CSV_FORMAT,
)
}
@@ -0,0 +1,83 @@
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMMENT_CHAR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.DELIM_DELIMITER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ESCAPE_CHAR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER_COMMENTS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INCLUDE_HEADER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE_MODE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.RECORD_SEPARATOR
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
/**
* ### Convert [DataFrame] to Delimiter-Separated Text String
*
* Converts [this][this] [DataFrame][DataFrame] to a delimiter-separated text [String].
*
* Parameters you can use to customize the process include, for instance, [delimiter],
* [includeHeader], [quoteMode], and [headerComments].
* See the param list below for all settings.
*
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
*
* ##### Similar Functions
* With overloads of [DataFrame.writeDelim][writeDelim]`()`, you can write delimiter-separated text to [File][File], [Path][java.nio.file.Path],
* [Appendable], or [String].
*
* For example, [DataFrame.writeDelim][writeDelim]`("output.txt")`
*
* or [DataFrame.writeDelim][writeDelim]`(`[File][File]`("output.txt"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
*
* Converting to a [String] can be done like this:
*
* [DataFrame.toDelimStr][toDelimStr]`(delimiter = ",")`
*
* @param delimiter The field delimiter character. Default: ','.
*
* Ignored if [hasFixedWidthColumns] is `true`.
* @param includeHeader Whether to include the header in the output. Default: `true`.
* @param quote The quote character. Default: `"`.
*
* Used when field- or line delimiters should be interpreted as literal text.
*
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
* Default: `null`. This will double-quote the value.
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
* Default: `'#'`.
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
* Default: empty list.
* @param recordSeparator The character that separates records in a CSV / TSV file.
* Default: `'\n'`, a Unix-newline.
*/
public fun AnyFrame.toDelimStr(
delimiter: Char = DELIM_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
): String =
buildString {
writeDelimImpl(
df = this@toDelimStr,
writer = this,
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = ADJUST_CSV_FORMAT,
)
}
@@ -0,0 +1,83 @@
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMMENT_CHAR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ESCAPE_CHAR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER_COMMENTS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INCLUDE_HEADER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE_MODE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.RECORD_SEPARATOR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TSV_DELIMITER
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
/**
* ### Convert [DataFrame] to TSV String
*
* Converts [this][this] [DataFrame][DataFrame] to a TSV [String].
*
* Parameters you can use to customize the process include, for instance, [delimiter],
* [includeHeader], [quoteMode], and [headerComments].
* See the param list below for all settings.
*
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
*
* ##### Similar Functions
* With overloads of [DataFrame.writeTsv][writeTsv]`()`, you can write TSV to [File][File], [Path][java.nio.file.Path],
* [Appendable], or [String].
*
* For example, [DataFrame.writeTsv][writeTsv]`("output.tsv")`
*
* or [DataFrame.writeTsv][writeTsv]`(`[File][File]`("output.tsv"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
*
* Converting to a [String] can be done like this:
*
* [DataFrame.toTsvStr][toTsvStr]`(delimiter = ",")`
*
* @param delimiter The field delimiter character. Default: '\t'.
*
* Ignored if [hasFixedWidthColumns] is `true`.
* @param includeHeader Whether to include the header in the output. Default: `true`.
* @param quote The quote character. Default: `"`.
*
* Used when field- or line delimiters should be interpreted as literal text.
*
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
* Default: `null`. This will double-quote the value.
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
* Default: `'#'`.
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
* Default: empty list.
* @param recordSeparator The character that separates records in a CSV / TSV file.
* Default: `'\n'`, a Unix-newline.
*/
public fun AnyFrame.toTsvStr(
includeHeader: Boolean = INCLUDE_HEADER,
delimiter: Char = TSV_DELIMITER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
): String =
buildString {
writeDelimImpl(
df = this@toTsvStr,
writer = this,
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = ADJUST_CSV_FORMAT,
)
}
@@ -0,0 +1,39 @@
@file:JvmName("TsvDeephavenKt")
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.codeGen.AbstractDefaultReadMethod
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams
import java.io.File
import java.io.InputStream
import java.nio.file.Path
import kotlin.reflect.typeOf
public class TsvDeephaven(private val delimiter: Char = DelimParams.TSV_DELIMITER) : SupportedDataFrameFormat {
override fun readDataFrame(stream: InputStream, header: List<String>): DataFrame<*> =
DataFrame.readTsv(inputStream = stream, header = header, delimiter = delimiter)
override fun readDataFrame(file: File, header: List<String>): DataFrame<*> =
DataFrame.readTsv(file = file, header = header, delimiter = delimiter)
override fun readDataFrame(path: Path, header: List<String>): DataFrame<*> =
DataFrame.readTsv(path = path, header = header, delimiter = delimiter)
override fun acceptsExtension(ext: String): Boolean = ext == "tsv"
override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough
override val testOrder: Int = 30_000
override fun createDefaultReadMethod(pathRepresentation: String?): DefaultReadDfMethod {
val arguments = MethodArguments().add("delimiter", typeOf<Char>(), "'%L'", delimiter)
return DefaultReadTsvMethod(pathRepresentation, arguments)
}
}
private const val READ_TSV = "readTsv"
internal class DefaultReadTsvMethod(path: String?, arguments: MethodArguments) :
AbstractDefaultReadMethod(path, arguments, READ_TSV)
@@ -0,0 +1,25 @@
package org.jetbrains.kotlinx.dataframe.io
import io.deephaven.csv.CsvSpecs
import org.apache.commons.csv.CSVFormat
import org.jetbrains.kotlinx.dataframe.documentationCsv.ExcludeFromSources
/**
* Default strings that are considered null when reading CSV / TSV / delim files:
*
* [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS]
*/
public val DEFAULT_DELIM_NULL_STRINGS: Set<String> =
setOf("", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil")
/**
* Typealias for `CsvSpecs.Builder.(CsvSpecs.Builder) -> CsvSpecs.Builder`.
* A lambda where you can overwrite or adjust any of the CSV specs.
*/
public typealias AdjustCsvSpecs = CsvSpecs.Builder.(CsvSpecs.Builder) -> CsvSpecs.Builder
/**
* Typealias for `CSVFormat.Builder.(CSVFormat.Builder) -> CSVFormat.Builder`.
* A lambda where you can overwrite or adjust any of the CSV format options.
*/
public typealias AdjustCSVFormat = CSVFormat.Builder.(CSVFormat.Builder) -> CSVFormat.Builder
@@ -0,0 +1,305 @@
@file:JvmName("WriteCsvDeephavenKt")
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMMENT_CHAR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CSV_DELIMITER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ESCAPE_CHAR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FILE_WRITE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER_COMMENTS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INCLUDE_HEADER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PATH_WRITE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE_MODE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.RECORD_SEPARATOR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.WRITER_WRITE
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
import java.io.File
import java.io.FileWriter
import java.nio.file.Path
import kotlin.io.path.writer
/**
* ### Write [DataFrame] to CSV File
*
* Writes [this][this] [DataFrame][DataFrame] to a CSV file.
*
* Parameters you can use to customize the process include, for instance, [delimiter],
* [includeHeader], [quoteMode], and [headerComments].
* See the param list below for all settings.
*
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
*
* ##### Similar Functions
* With overloads of [DataFrame.writeCsv][writeCsv]`()`, you can write CSV to [File][File], [Path][java.nio.file.Path],
* [Appendable], or [String].
*
* For example, [DataFrame.writeCsv][writeCsv]`("output.csv")`
*
* or [DataFrame.writeCsv][writeCsv]`(`[File][File]`("output.csv"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
*
* Converting to a [String] can be done like this:
*
* [DataFrame.toCsvStr][toCsvStr]`(delimiter = ",")`
*
* @param path The path pointing to a file to write to.
* @param delimiter The field delimiter character. Default: ','.
*
* Ignored if [hasFixedWidthColumns] is `true`.
* @param includeHeader Whether to include the header in the output. Default: `true`.
* @param quote The quote character. Default: `"`.
*
* Used when field- or line delimiters should be interpreted as literal text.
*
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
* Default: `null`. This will double-quote the value.
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
* Default: `'#'`.
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
* Default: empty list.
* @param recordSeparator The character that separates records in a CSV / TSV file.
* Default: `'\n'`, a Unix-newline.
*/
public fun AnyFrame.writeCsv(
path: Path,
delimiter: Char = CSV_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
): Unit =
writeDelimImpl(
df = this,
writer = path.writer(),
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = ADJUST_CSV_FORMAT,
)
/**
* ### Write [DataFrame] to CSV File
*
* Writes [this][this] [DataFrame][DataFrame] to a CSV file.
*
* Parameters you can use to customize the process include, for instance, [delimiter],
* [includeHeader], [quoteMode], and [headerComments].
* See the param list below for all settings.
*
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
*
* ##### Similar Functions
* With overloads of [DataFrame.writeCsv][writeCsv]`()`, you can write CSV to [File][File], [Path][java.nio.file.Path],
* [Appendable], or [String].
*
* For example, [DataFrame.writeCsv][writeCsv]`("output.csv")`
*
* or [DataFrame.writeCsv][writeCsv]`(`[File][File]`("output.csv"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
*
* Converting to a [String] can be done like this:
*
* [DataFrame.toCsvStr][toCsvStr]`(delimiter = ",")`
*
* @param file The file to write to.
* @param delimiter The field delimiter character. Default: ','.
*
* Ignored if [hasFixedWidthColumns] is `true`.
* @param includeHeader Whether to include the header in the output. Default: `true`.
* @param quote The quote character. Default: `"`.
*
* Used when field- or line delimiters should be interpreted as literal text.
*
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
* Default: `null`. This will double-quote the value.
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
* Default: `'#'`.
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
* Default: empty list.
* @param recordSeparator The character that separates records in a CSV / TSV file.
* Default: `'\n'`, a Unix-newline.
*/
public fun AnyFrame.writeCsv(
file: File,
delimiter: Char = CSV_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
): Unit =
writeDelimImpl(
df = this,
writer = FileWriter(file),
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = ADJUST_CSV_FORMAT,
)
/**
* ### Write [DataFrame] to CSV File
*
* Writes [this][this] [DataFrame][DataFrame] to a CSV file.
*
* Parameters you can use to customize the process include, for instance, [delimiter],
* [includeHeader], [quoteMode], and [headerComments].
* See the param list below for all settings.
*
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
*
* ##### Similar Functions
* With overloads of [DataFrame.writeCsv][writeCsv]`()`, you can write CSV to [File][File], [Path][java.nio.file.Path],
* [Appendable], or [String].
*
* For example, [DataFrame.writeCsv][writeCsv]`("output.csv")`
*
* or [DataFrame.writeCsv][writeCsv]`(`[File][File]`("output.csv"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
*
* Converting to a [String] can be done like this:
*
* [DataFrame.toCsvStr][toCsvStr]`(delimiter = ",")`
*
* @param path The path pointing to a file to write to.
* @param delimiter The field delimiter character. Default: ','.
*
* Ignored if [hasFixedWidthColumns] is `true`.
* @param includeHeader Whether to include the header in the output. Default: `true`.
* @param quote The quote character. Default: `"`.
*
* Used when field- or line delimiters should be interpreted as literal text.
*
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
* Default: `null`. This will double-quote the value.
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
* Default: `'#'`.
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
* Default: empty list.
* @param recordSeparator The character that separates records in a CSV / TSV file.
* Default: `'\n'`, a Unix-newline.
*/
public fun AnyFrame.writeCsv(
path: String,
delimiter: Char = CSV_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
): Unit =
writeDelimImpl(
df = this,
writer = FileWriter(path),
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = ADJUST_CSV_FORMAT,
)
/**
*
* ### Write [DataFrame] to CSV Appendable
*
* Writes [this][this] [DataFrame][DataFrame] to a CSV [Appendable].
*
* Parameters you can use to customize the process include, for instance, [delimiter],
* [includeHeader], [quoteMode], and [headerComments].
* See the param list below for all settings.
*
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
*
* ##### Similar Functions
* With overloads of [DataFrame.writeCsv][writeCsv]`()`, you can write CSV to [File][File], [Path][java.nio.file.Path],
* [Appendable], or [String].
*
* For example, [DataFrame.writeCsv][writeCsv]`("output.csv")`
*
* or [DataFrame.writeCsv][writeCsv]`(`[File][File]`("output.csv"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
*
* Converting to a [String] can be done like this:
*
* [DataFrame.toCsvStr][toCsvStr]`(delimiter = ",")`
*
* @param writer The [Appendable] to write to.
* @param delimiter The field delimiter character. Default: ','.
*
* Ignored if [hasFixedWidthColumns] is `true`.
* @param includeHeader Whether to include the header in the output. Default: `true`.
* @param quote The quote character. Default: `"`.
*
* Used when field- or line delimiters should be interpreted as literal text.
*
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
* Default: `null`. This will double-quote the value.
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
* Default: `'#'`.
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
* Default: empty list.
* @param recordSeparator The character that separates records in a CSV / TSV file.
* Default: `'\n'`, a Unix-newline.
* @param adjustCsvFormat Optional extra [CSVFormat] configuration. Default: `{ it }`.
*
* Before instantiating the [CSVFormat], the [CSVFormat.Builder] will be passed to this lambda.
* This will allow you to configure/overwrite any CSV / TSV writing options.
*/
public fun AnyFrame.writeCsv(
writer: Appendable,
delimiter: Char = CSV_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
adjustCsvFormat: AdjustCSVFormat = ADJUST_CSV_FORMAT,
): Unit =
writeDelimImpl(
df = this,
writer = writer,
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = adjustCsvFormat,
)
@@ -0,0 +1,305 @@
@file:JvmName("WriteDelimDeephavenKt")
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMMENT_CHAR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.DELIM_DELIMITER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ESCAPE_CHAR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FILE_WRITE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER_COMMENTS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INCLUDE_HEADER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PATH_WRITE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE_MODE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.RECORD_SEPARATOR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.WRITER_WRITE
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
import java.io.File
import java.io.FileWriter
import java.nio.file.Path
import kotlin.io.path.writer
/**
* ### Write [DataFrame] to Delimiter-Separated Text File
*
* Writes [this][this] [DataFrame][DataFrame] to a delimiter-separated text file.
*
* Parameters you can use to customize the process include, for instance, [delimiter],
* [includeHeader], [quoteMode], and [headerComments].
* See the param list below for all settings.
*
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
*
* ##### Similar Functions
* With overloads of [DataFrame.writeDelim][writeDelim]`()`, you can write delimiter-separated text to [File][File], [Path][java.nio.file.Path],
* [Appendable], or [String].
*
* For example, [DataFrame.writeDelim][writeDelim]`("output.txt")`
*
* or [DataFrame.writeDelim][writeDelim]`(`[File][File]`("output.txt"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
*
* Converting to a [String] can be done like this:
*
* [DataFrame.toDelimStr][toDelimStr]`(delimiter = ",")`
*
* @param path The path pointing to a file to write to.
* @param delimiter The field delimiter character. Default: ','.
*
* Ignored if [hasFixedWidthColumns] is `true`.
* @param includeHeader Whether to include the header in the output. Default: `true`.
* @param quote The quote character. Default: `"`.
*
* Used when field- or line delimiters should be interpreted as literal text.
*
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
* Default: `null`. This will double-quote the value.
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
* Default: `'#'`.
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
* Default: empty list.
* @param recordSeparator The character that separates records in a CSV / TSV file.
* Default: `'\n'`, a Unix-newline.
*/
public fun AnyFrame.writeDelim(
path: Path,
delimiter: Char = DELIM_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
): Unit =
writeDelimImpl(
df = this,
writer = path.writer(),
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = ADJUST_CSV_FORMAT,
)
/**
* ### Write [DataFrame] to Delimiter-Separated Text File
*
* Writes [this][this] [DataFrame][DataFrame] to a delimiter-separated text file.
*
* Parameters you can use to customize the process include, for instance, [delimiter],
* [includeHeader], [quoteMode], and [headerComments].
* See the param list below for all settings.
*
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
*
* ##### Similar Functions
* With overloads of [DataFrame.writeDelim][writeDelim]`()`, you can write delimiter-separated text to [File][File], [Path][java.nio.file.Path],
* [Appendable], or [String].
*
* For example, [DataFrame.writeDelim][writeDelim]`("output.txt")`
*
* or [DataFrame.writeDelim][writeDelim]`(`[File][File]`("output.txt"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
*
* Converting to a [String] can be done like this:
*
* [DataFrame.toDelimStr][toDelimStr]`(delimiter = ",")`
*
* @param file The file to write to.
* @param delimiter The field delimiter character. Default: ','.
*
* Ignored if [hasFixedWidthColumns] is `true`.
* @param includeHeader Whether to include the header in the output. Default: `true`.
* @param quote The quote character. Default: `"`.
*
* Used when field- or line delimiters should be interpreted as literal text.
*
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
* Default: `null`. This will double-quote the value.
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
* Default: `'#'`.
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
* Default: empty list.
* @param recordSeparator The character that separates records in a CSV / TSV file.
* Default: `'\n'`, a Unix-newline.
*/
public fun AnyFrame.writeDelim(
file: File,
delimiter: Char = DELIM_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
): Unit =
writeDelimImpl(
df = this,
writer = FileWriter(file),
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = ADJUST_CSV_FORMAT,
)
/**
* ### Write [DataFrame] to Delimiter-Separated Text File
*
* Writes [this][this] [DataFrame][DataFrame] to a delimiter-separated text file.
*
* Parameters you can use to customize the process include, for instance, [delimiter],
* [includeHeader], [quoteMode], and [headerComments].
* See the param list below for all settings.
*
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
*
* ##### Similar Functions
* With overloads of [DataFrame.writeDelim][writeDelim]`()`, you can write delimiter-separated text to [File][File], [Path][java.nio.file.Path],
* [Appendable], or [String].
*
* For example, [DataFrame.writeDelim][writeDelim]`("output.txt")`
*
* or [DataFrame.writeDelim][writeDelim]`(`[File][File]`("output.txt"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
*
* Converting to a [String] can be done like this:
*
* [DataFrame.toDelimStr][toDelimStr]`(delimiter = ",")`
*
* @param path The path pointing to a file to write to.
* @param delimiter The field delimiter character. Default: ','.
*
* Ignored if [hasFixedWidthColumns] is `true`.
* @param includeHeader Whether to include the header in the output. Default: `true`.
* @param quote The quote character. Default: `"`.
*
* Used when field- or line delimiters should be interpreted as literal text.
*
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
* Default: `null`. This will double-quote the value.
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
* Default: `'#'`.
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
* Default: empty list.
* @param recordSeparator The character that separates records in a CSV / TSV file.
* Default: `'\n'`, a Unix-newline.
*/
public fun AnyFrame.writeDelim(
path: String,
delimiter: Char = DELIM_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
): Unit =
writeDelimImpl(
df = this,
writer = FileWriter(path),
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = ADJUST_CSV_FORMAT,
)
/**
*
* ### Write [DataFrame] to Delimiter-Separated Text Appendable
*
* Writes [this][this] [DataFrame][DataFrame] to a delimiter-separated text [Appendable].
*
* Parameters you can use to customize the process include, for instance, [delimiter],
* [includeHeader], [quoteMode], and [headerComments].
* See the param list below for all settings.
*
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
*
* ##### Similar Functions
* With overloads of [DataFrame.writeDelim][writeDelim]`()`, you can write delimiter-separated text to [File][File], [Path][java.nio.file.Path],
* [Appendable], or [String].
*
* For example, [DataFrame.writeDelim][writeDelim]`("output.txt")`
*
* or [DataFrame.writeDelim][writeDelim]`(`[File][File]`("output.txt"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
*
* Converting to a [String] can be done like this:
*
* [DataFrame.toDelimStr][toDelimStr]`(delimiter = ",")`
*
* @param writer The [Appendable] to write to.
* @param delimiter The field delimiter character. Default: ','.
*
* Ignored if [hasFixedWidthColumns] is `true`.
* @param includeHeader Whether to include the header in the output. Default: `true`.
* @param quote The quote character. Default: `"`.
*
* Used when field- or line delimiters should be interpreted as literal text.
*
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
* Default: `null`. This will double-quote the value.
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
* Default: `'#'`.
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
* Default: empty list.
* @param recordSeparator The character that separates records in a CSV / TSV file.
* Default: `'\n'`, a Unix-newline.
* @param adjustCsvFormat Optional extra [CSVFormat] configuration. Default: `{ it }`.
*
* Before instantiating the [CSVFormat], the [CSVFormat.Builder] will be passed to this lambda.
* This will allow you to configure/overwrite any CSV / TSV writing options.
*/
public fun AnyFrame.writeDelim(
writer: Appendable,
delimiter: Char = DELIM_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
adjustCsvFormat: AdjustCSVFormat = ADJUST_CSV_FORMAT,
): Unit =
writeDelimImpl(
df = this,
writer = writer,
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = adjustCsvFormat,
)
@@ -0,0 +1,305 @@
@file:JvmName("WriteTsvDeephavenKt")
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMMENT_CHAR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ESCAPE_CHAR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FILE_WRITE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER_COMMENTS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INCLUDE_HEADER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PATH_WRITE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE_MODE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.RECORD_SEPARATOR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TSV_DELIMITER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.WRITER_WRITE
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
import java.io.File
import java.io.FileWriter
import java.nio.file.Path
import kotlin.io.path.writer
/**
* ### Write [DataFrame] to TSV File
*
* Writes [this][this] [DataFrame][DataFrame] to a TSV file.
*
* Parameters you can use to customize the process include, for instance, [delimiter],
* [includeHeader], [quoteMode], and [headerComments].
* See the param list below for all settings.
*
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
*
* ##### Similar Functions
* With overloads of [DataFrame.writeTsv][writeTsv]`()`, you can write TSV to [File][File], [Path][java.nio.file.Path],
* [Appendable], or [String].
*
* For example, [DataFrame.writeTsv][writeTsv]`("output.tsv")`
*
* or [DataFrame.writeTsv][writeTsv]`(`[File][File]`("output.tsv"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
*
* Converting to a [String] can be done like this:
*
* [DataFrame.toTsvStr][toTsvStr]`(delimiter = ",")`
*
* @param path The path pointing to a file to write to.
* @param delimiter The field delimiter character. Default: '\t'.
*
* Ignored if [hasFixedWidthColumns] is `true`.
* @param includeHeader Whether to include the header in the output. Default: `true`.
* @param quote The quote character. Default: `"`.
*
* Used when field- or line delimiters should be interpreted as literal text.
*
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
* Default: `null`. This will double-quote the value.
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
* Default: `'#'`.
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
* Default: empty list.
* @param recordSeparator The character that separates records in a CSV / TSV file.
* Default: `'\n'`, a Unix-newline.
*/
public fun AnyFrame.writeTsv(
path: Path,
delimiter: Char = TSV_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
): Unit =
writeDelimImpl(
df = this,
writer = path.writer(),
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = ADJUST_CSV_FORMAT,
)
/**
* ### Write [DataFrame] to TSV File
*
* Writes [this][this] [DataFrame][DataFrame] to a TSV file.
*
* Parameters you can use to customize the process include, for instance, [delimiter],
* [includeHeader], [quoteMode], and [headerComments].
* See the param list below for all settings.
*
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
*
* ##### Similar Functions
* With overloads of [DataFrame.writeTsv][writeTsv]`()`, you can write TSV to [File][File], [Path][java.nio.file.Path],
* [Appendable], or [String].
*
* For example, [DataFrame.writeTsv][writeTsv]`("output.tsv")`
*
* or [DataFrame.writeTsv][writeTsv]`(`[File][File]`("output.tsv"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
*
* Converting to a [String] can be done like this:
*
* [DataFrame.toTsvStr][toTsvStr]`(delimiter = ",")`
*
* @param file The file to write to.
* @param delimiter The field delimiter character. Default: '\t'.
*
* Ignored if [hasFixedWidthColumns] is `true`.
* @param includeHeader Whether to include the header in the output. Default: `true`.
* @param quote The quote character. Default: `"`.
*
* Used when field- or line delimiters should be interpreted as literal text.
*
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
* Default: `null`. This will double-quote the value.
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
* Default: `'#'`.
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
* Default: empty list.
* @param recordSeparator The character that separates records in a CSV / TSV file.
* Default: `'\n'`, a Unix-newline.
*/
public fun AnyFrame.writeTsv(
file: File,
delimiter: Char = TSV_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
): Unit =
writeDelimImpl(
df = this,
writer = FileWriter(file),
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = ADJUST_CSV_FORMAT,
)
/**
* ### Write [DataFrame] to TSV File
*
* Writes [this][this] [DataFrame][DataFrame] to a TSV file.
*
* Parameters you can use to customize the process include, for instance, [delimiter],
* [includeHeader], [quoteMode], and [headerComments].
* See the param list below for all settings.
*
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
*
* ##### Similar Functions
* With overloads of [DataFrame.writeTsv][writeTsv]`()`, you can write TSV to [File][File], [Path][java.nio.file.Path],
* [Appendable], or [String].
*
* For example, [DataFrame.writeTsv][writeTsv]`("output.tsv")`
*
* or [DataFrame.writeTsv][writeTsv]`(`[File][File]`("output.tsv"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
*
* Converting to a [String] can be done like this:
*
* [DataFrame.toTsvStr][toTsvStr]`(delimiter = ",")`
*
* @param path The path pointing to a file to write to.
* @param delimiter The field delimiter character. Default: '\t'.
*
* Ignored if [hasFixedWidthColumns] is `true`.
* @param includeHeader Whether to include the header in the output. Default: `true`.
* @param quote The quote character. Default: `"`.
*
* Used when field- or line delimiters should be interpreted as literal text.
*
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
* Default: `null`. This will double-quote the value.
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
* Default: `'#'`.
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
* Default: empty list.
* @param recordSeparator The character that separates records in a CSV / TSV file.
* Default: `'\n'`, a Unix-newline.
*/
public fun AnyFrame.writeTsv(
path: String,
delimiter: Char = TSV_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
): Unit =
writeDelimImpl(
df = this,
writer = FileWriter(path),
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = ADJUST_CSV_FORMAT,
)
/**
*
* ### Write [DataFrame] to TSV Appendable
*
* Writes [this][this] [DataFrame][DataFrame] to a TSV [Appendable].
*
* Parameters you can use to customize the process include, for instance, [delimiter],
* [includeHeader], [quoteMode], and [headerComments].
* See the param list below for all settings.
*
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
*
* ##### Similar Functions
* With overloads of [DataFrame.writeTsv][writeTsv]`()`, you can write TSV to [File][File], [Path][java.nio.file.Path],
* [Appendable], or [String].
*
* For example, [DataFrame.writeTsv][writeTsv]`("output.tsv")`
*
* or [DataFrame.writeTsv][writeTsv]`(`[File][File]`("output.tsv"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
*
* Converting to a [String] can be done like this:
*
* [DataFrame.toTsvStr][toTsvStr]`(delimiter = ",")`
*
* @param writer The [Appendable] to write to.
* @param delimiter The field delimiter character. Default: '\t'.
*
* Ignored if [hasFixedWidthColumns] is `true`.
* @param includeHeader Whether to include the header in the output. Default: `true`.
* @param quote The quote character. Default: `"`.
*
* Used when field- or line delimiters should be interpreted as literal text.
*
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
* Default: `null`. This will double-quote the value.
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
* Default: `'#'`.
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
* Default: empty list.
* @param recordSeparator The character that separates records in a CSV / TSV file.
* Default: `'\n'`, a Unix-newline.
* @param adjustCsvFormat Optional extra [CSVFormat] configuration. Default: `{ it }`.
*
* Before instantiating the [CSVFormat], the [CSVFormat.Builder] will be passed to this lambda.
* This will allow you to configure/overwrite any CSV / TSV writing options.
*/
public fun AnyFrame.writeTsv(
writer: Appendable,
delimiter: Char = TSV_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
adjustCsvFormat: AdjustCSVFormat = ADJUST_CSV_FORMAT,
): Unit =
writeDelimImpl(
df = this,
writer = writer,
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = adjustCsvFormat,
)
@@ -0,0 +1,30 @@
@file:JvmName("CsvDeprecationMessagesKt")
package org.jetbrains.kotlinx.dataframe.util
/*
* This file contains deprecation messages for the whole core module.
* After each release, all messages should be reviewed and updated.
* Level.WARNING -> Level.ERROR
* Level.ERROR -> Remove
*/
// region WARNING in 0.15, ERROR in 1.0
private const val MESSAGE_1_0 = "Will be ERROR in 1.0."
internal const val READ_CSV_BINARY_COMPATIBILITY = "This overload is here to maintain binary compatibility."
internal const val READ_TSV_BINARY_COMPATIBILITY = "This overload is here to maintain binary compatibility."
internal const val READ_DELIM_BINARY_COMPATIBILITY = "This overload is here to maintain binary compatibility."
// endregion
// region WARNING in 1.0, ERROR in 1.1
private const val MESSAGE_1_1 = "Will be ERROR in 1.1."
// endregion
// region keep across releases
// endregion
@@ -0,0 +1,54 @@
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.openjdk.jmh.annotations.Benchmark
import org.openjdk.jmh.annotations.BenchmarkMode
import org.openjdk.jmh.annotations.Measurement
import org.openjdk.jmh.annotations.Mode
import org.openjdk.jmh.annotations.Param
import org.openjdk.jmh.annotations.Scope
import org.openjdk.jmh.annotations.Setup
import org.openjdk.jmh.annotations.State
import org.openjdk.jmh.annotations.TearDown
import org.openjdk.jmh.annotations.Warmup
import java.io.File
import java.util.concurrent.TimeUnit
@BenchmarkMode(Mode.SingleShotTime)
@Warmup(iterations = 10, time = 5, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 10, timeUnit = TimeUnit.SECONDS)
@State(Scope.Benchmark)
open class BenchmarkTest {
@Param("small", "medium", "large")
var type = ""
var file: File? = null
@Setup
fun setup() {
System.setProperty("org.slf4j.simpleLogger.defaultLogLevel", "info")
file = File(
"src/test/resources/" + when (type) {
"small" -> "testCSV.csv"
"medium" -> "gross-domestic-product-june-2024-quarter.csv"
"large" -> "largeCsv.csv.gz"
else -> throw IllegalArgumentException("Invalid type")
},
)
}
@TearDown
fun tearDown() {
file = null
}
@Benchmark
fun apache() {
DataFrame.readCSV(file!!)
}
@Benchmark
fun deephaven() {
DataFrame.readCsv(file!!)
}
}
@@ -0,0 +1,887 @@
package org.jetbrains.kotlinx.dataframe.io
import io.deephaven.csv.parsers.Parsers
import io.kotest.assertions.throwables.shouldNotThrowAny
import io.kotest.assertions.throwables.shouldThrow
import io.kotest.matchers.collections.shouldContainInOrder
import io.kotest.matchers.nulls.shouldNotBeNull
import io.kotest.matchers.shouldBe
import io.kotest.matchers.shouldNotBe
import kotlinx.datetime.LocalDate
import kotlinx.datetime.LocalDateTime
import org.intellij.lang.annotations.Language
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.api.allNulls
import org.jetbrains.kotlinx.dataframe.api.convert
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import org.jetbrains.kotlinx.dataframe.api.group
import org.jetbrains.kotlinx.dataframe.api.groupBy
import org.jetbrains.kotlinx.dataframe.api.into
import org.jetbrains.kotlinx.dataframe.api.isEmpty
import org.jetbrains.kotlinx.dataframe.api.parser
import org.jetbrains.kotlinx.dataframe.api.print
import org.jetbrains.kotlinx.dataframe.api.schema
import org.jetbrains.kotlinx.dataframe.api.toStr
import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import org.junit.After
import org.junit.Before
import org.junit.Test
import java.io.File
import java.io.StringWriter
import java.math.BigDecimal
import java.net.URL
import java.util.Locale
import java.util.zip.GZIPInputStream
import kotlin.reflect.KClass
import kotlin.reflect.typeOf
import kotlin.time.Instant as StdlibInstant
import kotlinx.datetime.Instant as DeprecatedInstant
// can be enabled for showing logs for these tests
private const val SHOW_LOGS = false
@Suppress("ktlint:standard:argument-list-wrapping")
class DelimCsvTsvTests {
private val logLevel = "org.slf4j.simpleLogger.log.${FastDoubleParser::class.qualifiedName}"
private var loggerBefore: String? = null
@Before
fun setLogger() {
if (!SHOW_LOGS) return
loggerBefore = System.getProperty(logLevel)
System.setProperty(logLevel, "trace")
}
@After
fun restoreLogger() {
if (!SHOW_LOGS) return
if (loggerBefore != null) {
System.setProperty(logLevel, loggerBefore)
}
}
@Test
fun readNulls() {
@Language("CSV")
val src =
"""
first,second
2,,
3,,
""".trimIndent()
val df = DataFrame.readCsvStr(src)
df.rowsCount() shouldBe 2
df.columnsCount() shouldBe 2
df["first"].type() shouldBe typeOf<Int>()
df["second"].allNulls() shouldBe true
df["second"].type() shouldBe typeOf<String?>()
}
@Test
fun write() {
val df = dataFrameOf("col1", "col2")(
1, null,
2, null,
).convert("col2").toStr()
val str = StringWriter()
df.writeCsv(str)
val res = DataFrame.readCsvStr(str.buffer.toString())
res shouldBe df
}
@Test
fun readCsv() {
val df = DataFrame.read(simpleCsv)
df.columnsCount() shouldBe 11
df.rowsCount() shouldBe 5
df.columnNames()[5] shouldBe "duplicate1"
df.columnNames()[6] shouldBe "duplicate11"
df["duplicate1"].type() shouldBe typeOf<Char?>()
df["double"].type() shouldBe typeOf<Double?>()
df["number"].type() shouldBe typeOf<Double>()
df["time"].type() shouldBe typeOf<LocalDateTime>()
df.print(columnTypes = true, borders = true, title = true)
}
@Test
fun `readCsv different charset`() {
val df = DataFrame.readCsv(simpleCsv)
DataFrame.readCsv(simpleCsvUtf16le) shouldBe df
DataFrame.readCsv(simpleCsvUtf16le, charset = Charsets.UTF_16LE) shouldBe df
DataFrame.readCsv(simpleCsvUtf16le, charset = Charsets.UTF_16BE) shouldNotBe df
DataFrame.readCsv(simpleCsvUtf16le, charset = Charsets.UTF_8) shouldNotBe df
}
@Test
fun `readCsv gz compressed different charset`() {
val df = DataFrame.readCsv(simpleCsv)
DataFrame.readCsv(simpleCsvUtf16leGz) shouldBe df
DataFrame.readCsv(simpleCsvUtf16leGz, charset = Charsets.UTF_16LE) shouldBe df
DataFrame.readCsv(simpleCsvUtf16leGz, charset = Charsets.UTF_16BE) shouldNotBe df
DataFrame.readCsv(simpleCsvUtf16leGz, charset = Charsets.UTF_8) shouldNotBe df
}
@Test
fun `readCsv zip compressed different charset`() {
val df = DataFrame.readCsv(simpleCsv)
DataFrame.readCsv(simpleCsvUtf16leZip) shouldBe df
DataFrame.readCsv(simpleCsvUtf16leZip, charset = Charsets.UTF_16LE) shouldBe df
DataFrame.readCsv(simpleCsvUtf16leZip, charset = Charsets.UTF_16BE) shouldNotBe df
DataFrame.readCsv(simpleCsvUtf16leZip, charset = Charsets.UTF_8) shouldNotBe df
}
@Test
fun `read ZIP Csv`() {
DataFrame.readCsv(simpleCsvZip) shouldBe DataFrame.readCsv(simpleCsv)
shouldThrow<IllegalStateException> {
DataFrame.readCsv(notCsv)
}
}
@Test
fun `read GZ Csv`() {
DataFrame.readCsv(simpleCsvGz) shouldBe DataFrame.readCsv(simpleCsv)
}
@Test
fun `read custom compression Csv`() {
DataFrame.readCsv(
simpleCsvGz,
compression = Compression(::GZIPInputStream),
) shouldBe DataFrame.readCsv(simpleCsv)
}
@Test
fun `read 2 compressed Csv`() {
shouldThrow<IllegalArgumentException> { DataFrame.readCsv(twoCsvsZip) }
}
@Test
fun readCsvWithFrenchLocaleAndAlternativeDelimiter() {
val df = DataFrame.readCsv(
url = csvWithFrenchLocale,
delimiter = ';',
parserOptions = ParserOptions(locale = Locale.FRENCH),
)
df.columnsCount() shouldBe 11
df.rowsCount() shouldBe 5
df.columnNames()[5] shouldBe "duplicate1"
df.columnNames()[6] shouldBe "duplicate11"
df["duplicate1"].type() shouldBe typeOf<Char?>()
df["double"].type() shouldBe typeOf<Double?>()
df["number"].type() shouldBe typeOf<Double>()
df["time"].type() shouldBe typeOf<LocalDateTime>()
println(df)
}
private fun assertColumnType(columnName: String, kClass: KClass<*>, schema: DataFrameSchema) {
val col = schema.columns[columnName]
col.shouldNotBeNull()
col.type.classifier shouldBe kClass
}
@Test
fun readCsvWithFloats() {
val df = DataFrame.readCsv(wineCsv, delimiter = ';')
val schema = df.schema()
assertColumnType("citric acid", Double::class, schema)
assertColumnType("alcohol", Double::class, schema)
assertColumnType("quality", Int::class, schema)
}
@Test
fun `read standard CSV with floats when user has alternative locale`() {
val currentLocale = Locale.getDefault()
try {
Locale.setDefault(Locale.forLanguageTag("ru-RU"))
val df = DataFrame.readCsv(wineCsv, delimiter = ';')
val schema = df.schema()
assertColumnType("citric acid", Double::class, schema)
assertColumnType("alcohol", Double::class, schema)
assertColumnType("quality", Int::class, schema)
} finally {
Locale.setDefault(currentLocale)
}
}
@Test
fun `read with custom header`() {
val header = ('A'..'K').map { it.toString() }
val df = DataFrame.readCsv(simpleCsv, header = header, skipLines = 1)
df.columnNames() shouldBe header
df["B"].type() shouldBe typeOf<Int>()
val headerShort = ('A'..'E').map { it.toString() }
val dfShort = DataFrame.readCsv(simpleCsv, header = headerShort, skipLines = 1)
dfShort.columnsCount() shouldBe 5
dfShort.columnNames() shouldBe headerShort
}
@Test
fun `read first rows`() {
val expected =
listOf(
"untitled",
"user_id",
"name",
"duplicate",
"username",
"duplicate1",
"duplicate11",
"double",
"number",
"time",
"empty",
)
val dfHeader = DataFrame.readCsv(simpleCsv, readLines = 0)
dfHeader.rowsCount() shouldBe 0
dfHeader.columnNames() shouldBe expected
val dfThree = DataFrame.readCsv(simpleCsv, readLines = 3)
dfThree.rowsCount() shouldBe 3
val dfFull = DataFrame.readCsv(simpleCsv, readLines = 10)
dfFull.rowsCount() shouldBe 5
}
@Test
fun `if string starts with a number, it should be parsed as a string anyway`() {
@Language("CSV")
val df = DataFrame.readCsvStr(
"""
duration,floatDuration
12 min,1.0
15,12.98 sec
1 Season,0.9 parsec
""".trimIndent(),
)
df["duration"].type() shouldBe typeOf<String>()
df["floatDuration"].type() shouldBe typeOf<String>()
}
@Test
fun `if record has fewer columns than header then pad it with nulls`() {
@Language("CSV")
val csvContent =
"""
col1,col2,col3
568,801,587
780,588
""".trimIndent()
val df = shouldNotThrowAny {
DataFrame.readCsvStr(csvContent)
}
df shouldBe dataFrameOf("col1", "col2", "col3")(
568, 801, 587,
780, 588, null,
)
}
@Test
fun `write and read frame column`() {
val df = dataFrameOf("a", "b", "c")(
1, 2, 3,
1, 3, 2,
2, 1, 3,
)
val grouped = df.groupBy("a").into("g")
val str = grouped.toCsvStr(escapeChar = null)
val res = DataFrame.readCsvStr(str, quote = '"')
res shouldBe grouped
}
@Test
fun `write and read column group`() {
val df = dataFrameOf("a", "b", "c")(
1, 2, 3,
1, 3, 2,
)
val grouped = df.group("b", "c").into("d")
val str = grouped.toCsvStr()
val res = DataFrame.readCsvStr(str)
res shouldBe grouped
}
@Test
fun `CSV String of saved dataframe starts with column name`() {
val df = dataFrameOf("a")(1)
df.toCsvStr().first() shouldBe 'a'
}
@Test
fun `guess tsv`() {
val df = DataFrame.read(testResource("abc.tsv"))
df.columnsCount() shouldBe 3
df.rowsCount() shouldBe 2
}
@Test
fun `write csv without header produce correct file`() {
val df = dataFrameOf("a", "b", "c")(
1, 2, 3,
1, 3, 2,
)
df.writeCsv(
path = "src/test/resources/without_header.csv",
includeHeader = false,
recordSeparator = "\r\n",
)
val producedFile = File("src/test/resources/without_header.csv")
producedFile.exists() shouldBe true
producedFile.readText() shouldBe "1,2,3\r\n1,3,2\r\n"
producedFile.delete()
}
@Test
fun `check integrity of example data`() {
shouldThrow<IllegalStateException> {
// cannot read file with blank line at the start
DataFrame.readCsv("../data/jetbrains repositories.csv")
}
shouldThrow<IllegalStateException> {
// ignoreEmptyLines only ignores intermediate empty lines
DataFrame.readCsv("../data/jetbrains repositories.csv", ignoreEmptyLines = true)
}
val df = DataFrame.readCsv(
"../data/jetbrains repositories.csv",
skipLines = 1, // we need to skip the empty lines manually
)
df.columnNames() shouldBe listOf("full_name", "html_url", "stargazers_count", "topics", "watchers")
df.columnTypes() shouldBe listOf(
typeOf<String>(),
typeOf<URL>(),
typeOf<Int>(),
typeOf<String>(),
typeOf<Int>(),
)
// same file without empty line at the beginning
df shouldBe DataFrame.readCsv("../data/jetbrains_repositories.csv")
}
@Test
fun `readCsvStr delimiter`() {
@Language("TSV")
val tsv =
"""
a b c
1 2 3
""".trimIndent()
val df = DataFrame.readCsvStr(tsv, '\t')
df shouldBe dataFrameOf("a", "b", "c")(1, 2, 3)
}
@Test
fun `file with BOM`() {
val df = DataFrame.readCsv(withBomCsv, delimiter = ';')
df.columnNames() shouldBe listOf("Column1", "Column2")
}
@Test
fun `read empty CSV`() {
val emptyDelimStr = DataFrame.readCsvStr("")
emptyDelimStr shouldBe DataFrame.empty()
val emptyWidthStr = DataFrame.readCsvStr("", hasFixedWidthColumns = true)
emptyWidthStr shouldBe DataFrame.empty()
val emptyCsvFile = DataFrame.readCsv(File.createTempFile("empty", "csv"))
emptyCsvFile shouldBe DataFrame.empty()
val emptyCsvFileManualHeader = DataFrame.readCsv(
file = File.createTempFile("empty", "csv"),
header = listOf("a", "b", "c"),
)
emptyCsvFileManualHeader.apply {
isEmpty() shouldBe true
columnNames() shouldBe listOf("a", "b", "c")
columnTypes() shouldBe listOf(typeOf<String>(), typeOf<String>(), typeOf<String>())
}
val emptyCsvFileWithHeader = DataFrame.readCsv(
file = File.createTempFile("empty", "csv").also { it.writeText("a,b,c") },
)
emptyCsvFileWithHeader.apply {
isEmpty() shouldBe true
columnNames() shouldBe listOf("a", "b", "c")
columnTypes() shouldBe listOf(typeOf<String>(), typeOf<String>(), typeOf<String>())
}
val emptyTsvStr = DataFrame.readTsv(File.createTempFile("empty", "tsv"))
emptyTsvStr shouldBe DataFrame.empty()
}
@Test
fun `read Csv with comments`() {
@Language("CSV")
val csv =
"""
# This is a comment
a,b,c
1,2,3
""".trimIndent()
val df = DataFrame.readCsvStr(csv, skipLines = 1L)
df shouldBe dataFrameOf("a", "b", "c")(1, 2, 3)
}
@Test
fun `csv with empty lines`() {
@Language("CSV")
val csv =
"""
a,b,c
1,2,3
4,5,6
""".trimIndent()
val df1 = DataFrame.readCsvStr(csv)
df1 shouldBe dataFrameOf("a", "b", "c")(
1, 2, 3,
null, null, null,
4, 5, 6,
)
val df2 = DataFrame.readCsvStr(csv, ignoreEmptyLines = true)
df2 shouldBe dataFrameOf("a", "b", "c")(
1, 2, 3,
4, 5, 6,
)
shouldThrow<IllegalStateException> { DataFrame.readCsvStr(csv, allowMissingColumns = false) }
}
@Test
fun `don't read folder`() {
shouldThrow<IllegalArgumentException> { DataFrame.readCsv("") }
shouldThrow<IllegalArgumentException> { DataFrame.readCsv("NON EXISTENT FILE") }
}
@Test
fun `cannot auto-parse specific date string`() {
@Language("csv")
val frenchCsv =
"""
name; price; date;
a;12,45; 05/06/2021;
b;-13,35;14/07/2025;
c;100 123,35;;
d;-204 235,23;;
e;NaN;;
f;null;;
""".trimIndent()
val dfDeephaven = DataFrame.readCsvStr(
text = frenchCsv,
delimiter = ';',
)
// could not parse, remains String
dfDeephaven["date"].type() shouldBe typeOf<String?>()
val dfDataFrame = DataFrame.readCsvStr(
text = frenchCsv,
delimiter = ';',
// setting any locale skips deephaven's date parsing
parserOptions = ParserOptions(locale = Locale.ROOT),
)
// could not parse, remains String
dfDataFrame["date"].type() shouldBe typeOf<String?>()
}
@Test
fun `parse with other locales`() {
@Language("csv")
val frenchCsv =
"""
name; price; date;
a;12,45; 05/06/2021;
b;-13,35;14/07/2025;
c;100 123,35;;
d;-204 235,23;;
e;NaN;;
f;null;;
""".trimIndent()
val frenchDf = DataFrame.readCsvStr(
text = frenchCsv,
delimiter = ';',
parserOptions = ParserOptions(
dateTimePattern = "dd/MM/yyyy",
locale = Locale.FRENCH,
),
)
frenchDf["price"].type() shouldBe typeOf<Double?>()
frenchDf["date"].type() shouldBe typeOf<LocalDate?>()
@Language("csv")
val dutchCsv =
"""
name; price;
a;12,45;
b;-13,35;
c;100.123,35;
d;-204.235,23;
e;NaN;
f;null;
""".trimIndent()
val dutchDf = DataFrame.readCsvStr(
text = dutchCsv,
delimiter = ';',
parserOptions = ParserOptions(
locale = Locale.forLanguageTag("nl-NL"),
),
)
dutchDf["price"].type() shouldBe typeOf<Double?>()
// skipping this test on windows due to lack of support for Arabic locales
if (!System.getProperty("os.name").startsWith("Windows")) {
// while negative numbers in RTL languages cannot be parsed thanks to Java, others work
@Language("csv")
val arabicCsv =
"""
الاسم; السعر;
أ;١٢٫٤٥;
ب;١٣٫٣٥;
ج;١٠٠٫١٢٣;
د;٢٠٤٫٢٣٥;
هـ;ليس رقم;
و;null;
""".trimIndent()
val easternArabicDf = DataFrame.readCsvStr(
arabicCsv,
delimiter = ';',
parserOptions = ParserOptions(
locale = Locale.forLanguageTag("ar-001"),
),
)
easternArabicDf["السعر"].type() shouldBe typeOf<Double?>()
easternArabicDf["الاسم"].type() shouldBe typeOf<String>() // apparently not a char
}
}
@Test
fun `handle slightly mixed locales`() {
@Language("csv")
val estonianWrongMinus =
"""
name; price;
a;12,45;
b;-13,35;
c;100 123,35;
d;-204 235,23;
e;NaN;
f;null;
""".trimIndent()
val estonianDf1 = DataFrame.readCsvStr(
text = estonianWrongMinus,
delimiter = ';',
parserOptions = ParserOptions(
locale = Locale.forLanguageTag("et-EE"),
),
)
estonianDf1["price"].type() shouldBe typeOf<Double?>()
// also test the global setting
DataFrame.parser.locale = Locale.forLanguageTag("et-EE")
val estonianDf2 = DataFrame.readCsvStr(
text = estonianWrongMinus,
delimiter = ';',
)
estonianDf2 shouldBe estonianDf1
DataFrame.parser.resetToDefault()
}
@Test
fun `NA and custom null string in double column`() {
val df1 = DataFrame.readCsv(
msleepCsv,
parserOptions = ParserOptions(
nullStrings = DEFAULT_DELIM_NULL_STRINGS + "nothing",
),
)
df1["name"].type() shouldBe typeOf<String>()
df1["genus"].type() shouldBe typeOf<String>()
df1["vore"].type() shouldBe typeOf<String?>()
df1["order"].type() shouldBe typeOf<String>()
df1["conservation"].type() shouldBe typeOf<String?>()
df1["sleep_total"].type() shouldBe typeOf<Double>()
df1["sleep_rem"].type() shouldBe typeOf<Double?>()
df1["sleep_cycle"].type() shouldBe typeOf<Double?>()
df1["awake"].type() shouldBe typeOf<Double>()
df1["brainwt"].type() shouldBe typeOf<Double?>()
df1["bodywt"].type() shouldBe typeOf<Double?>()
// Also test the global setting
DataFrame.parser.addNullString("nothing")
DEFAULT_DELIM_NULL_STRINGS.forEach {
DataFrame.parser.addNullString(it)
}
val df2 = DataFrame.readCsv(msleepCsv)
df2 shouldBe df1
DataFrame.parser.resetToDefault()
}
@Test
fun `multiple spaces as delimiter`() {
@Language("csv")
val csv =
"""
NAME STATUS AGE NUMBER LABELS
argo-events Active 2y77d 1234 app.kubernetes.io/instance=argo-events,kubernetes.io/metadata.name=argo-events
argo-workflows Active 2y77d 1234 app.kubernetes.io/instance=argo-workflows,kubernetes.io/metadata.name=argo-workflows
argocd Active 5y18d 1234 kubernetes.io/metadata.name=argocd
beta Active 4y235d 1234 kubernetes.io/metadata.name=beta
""".trimIndent()
val df1 = DataFrame.readCsvStr(
text = csv,
hasFixedWidthColumns = true,
)
df1["NAME"].type() shouldBe typeOf<String>()
df1["STATUS"].type() shouldBe typeOf<String>()
df1["AGE"].type() shouldBe typeOf<String>()
df1["NUMBER"].type() shouldBe typeOf<Int>()
df1["LABELS"].type() shouldBe typeOf<String>()
val df2 = DataFrame.readCsvStr(
text = csv,
hasFixedWidthColumns = true,
fixedColumnWidths = listOf(25, 9, 9, 9, 100),
skipLines = 1,
header = listOf("name", "status", "age", "number", "labels"),
)
df2["name"].type() shouldBe typeOf<String>()
df2["status"].type() shouldBe typeOf<String>()
df2["age"].type() shouldBe typeOf<String>()
df2["number"].type() shouldBe typeOf<Int>()
df2["labels"].type() shouldBe typeOf<String>()
}
@Test
fun `handle default coltype with other parameters`() {
val df = DataFrame.readCsv(
simpleCsv,
header = listOf("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"),
skipLines = 2,
colTypes = mapOf(
"a" to ColType.Int,
"b" to ColType.Double,
ColType.DEFAULT to ColType.String,
),
)
df.columnTypes().shouldContainInOrder(
typeOf<Int>(),
typeOf<Double>(),
typeOf<String>(),
typeOf<String?>(),
typeOf<String>(),
typeOf<String?>(),
typeOf<String?>(),
typeOf<String?>(),
typeOf<String>(),
typeOf<String>(),
typeOf<String?>(),
)
df.rowsCount() shouldBe 4
}
@Test
fun `skipping types`() {
val df1 = DataFrame.readCsv(
irisDataset,
colTypes = mapOf("sepal.length" to ColType.Double),
parserOptions = ParserOptions(
skipTypes = setOf(typeOf<Double>()),
),
)
df1["sepal.length"].type() shouldBe typeOf<Double>()
df1["sepal.width"].type() shouldBe typeOf<BigDecimal>()
df1["petal.length"].type() shouldBe typeOf<BigDecimal>()
df1["petal.width"].type() shouldBe typeOf<BigDecimal>()
df1["variety"].type() shouldBe typeOf<String>()
// Also test the global setting
DataFrame.parser.addSkipType(typeOf<Double>())
val df2 = DataFrame.readCsv(
irisDataset,
colTypes = mapOf("sepal.length" to ColType.Double),
)
df2 shouldBe df1
DataFrame.parser.resetToDefault()
}
// Issue #921
@Test
fun `read csv with custom null strings and given type`() {
@Language("CSV")
val csv =
"""
a,b
noppes,2
1.2,
3,45
,noppes
1.3,1
""".trimIndent()
val df1 = DataFrame.readCsvStr(
csv,
parserOptions = ParserOptions(
nullStrings = setOf("noppes", ""),
),
colTypes = mapOf("a" to ColType.Double, "b" to ColType.Int),
)
df1 shouldBe dataFrameOf("a", "b")(
null, 2,
1.2, null,
3.0, 45,
null, null,
1.3, 1,
)
// Also test the global setting
DataFrame.parser.addNullString("noppes")
DataFrame.parser.addNullString("")
val df2 = DataFrame.readCsvStr(
csv,
colTypes = mapOf("a" to ColType.Double, "b" to ColType.Int),
)
df2 shouldBe df1
DataFrame.parser.resetToDefault()
}
// Issue #1047
@Test
fun `Only use Deephaven datetime parser with custom csv specs`() {
@Language("csv")
val csvContent =
"""
with_timezone_offset,without_timezone_offset
2024-12-12T13:00:00+01:00,2024-12-12T13:00:00
""".trimIndent()
// use DFs parsers by default for datetime-like columns
val df1 = DataFrame.readCsvStr(csvContent)
df1["with_timezone_offset"].let {
it.type() shouldBe typeOf<StdlibInstant>()
it[0] shouldBe StdlibInstant.parse("2024-12-12T13:00:00+01:00")
}
df1["without_timezone_offset"].let {
it.type() shouldBe typeOf<LocalDateTime>()
it[0] shouldBe LocalDateTime.parse("2024-12-12T13:00:00")
}
// enable fast datetime parser for the first column with adjustCsvSpecs
val df2 = DataFrame.readCsv(
inputStream = csvContent.byteInputStream(),
adjustCsvSpecs = {
putParserForName("with_timezone_offset", Parsers.DATETIME)
},
)
df2["with_timezone_offset"].let {
it.type() shouldBe typeOf<LocalDateTime>()
it[0] shouldBe LocalDateTime.parse("2024-12-12T12:00:00")
}
df2["without_timezone_offset"].let {
it.type() shouldBe typeOf<LocalDateTime>()
it[0] shouldBe LocalDateTime.parse("2024-12-12T13:00:00")
}
}
@Test
fun `test parsing kotlin-time-Instant`() {
@Language("csv")
val csvContent =
"""
with_timezone_offset,without_timezone_offset
2024-12-12T13:00:00+01:00,2024-12-12T13:00:00
""".trimIndent()
DataFrame.parser.parseExperimentalInstant = true
// use DFs parsers by default for datetime-like columns
val df1 = DataFrame.readCsvStr(csvContent)
df1["with_timezone_offset"].let {
it.type() shouldBe typeOf<StdlibInstant>()
it[0] shouldBe StdlibInstant.parse("2024-12-12T13:00:00+01:00")
}
DataFrame.parser.resetToDefault()
}
@Test
fun `json dependency test`() {
val df = dataFrameOf("firstName", "lastName")(
"John", "Doe",
"Jane", "Doe",
).group { "firstName" and "lastName" }.into { "name" }
df.toCsvStr(quote = '\'') shouldBe
"""
name
'{"firstName":"John","lastName":"Doe"}'
'{"firstName":"Jane","lastName":"Doe"}'
""".trimIndent()
}
companion object {
private val irisDataset = testCsv("irisDataset")
private val simpleCsv = testCsv("testCSV")
private val simpleCsvUtf16le = testCsv("testCSV-utf-16-le-bom")
private val simpleCsvUtf16leGz = testResource("testCSV-utf16le-bom.csv.gz")
private val simpleCsvUtf16leZip = testResource("testCSV-utf-16-le-bom.zip")
private val simpleCsvZip = testResource("testCSV.zip")
private val twoCsvsZip = testResource("two csvs.zip")
private val simpleCsvGz = testResource("testCSV.csv.gz")
private val csvWithFrenchLocale = testCsv("testCSVwithFrenchLocale")
private val wineCsv = testCsv("wine")
private val withBomCsv = testCsv("with-bom")
private val msleepCsv = testCsv("msleep")
private val notCsv = testResource("not-csv.zip")
}
}
fun testResource(resourcePath: String): URL = DelimCsvTsvTests::class.java.classLoader.getResource(resourcePath)!!
fun testCsv(csvName: String) = testResource("$csvName.csv")
@@ -0,0 +1,134 @@
@file:ExcludeFromSources
package org.jetbrains.kotlinx.dataframe.documentationCsv
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CHARSET
import org.jetbrains.kotlinx.dataframe.io.ColType
import java.io.File
import java.io.InputStream
import java.net.URL
/**
* ### Read $[FILE_TYPE_TITLE] $[DATA_TITLE] to [DataFrame]
*
* Reads any $[FILE_TYPE] $[DATA] to a [DataFrame][DataFrame].
*
* Parameters you can use to customize the reading process include, for instance, \[delimiter\],
* \[header\], \[colTypes\], \[readLines\], and \[parserOptions\].
* See the param list below for all settings.
*
* The integration is built upon {@include [DocumentationUrls.Deephaven]}.
*
* ##### Similar Functions
* With the overloads of $[FUNCTION_LINK]`()`, you can read any $[FILE_TYPE] by [File][File],
* [Path][java.nio.file.Path], [URL][URL], or [InputStream][InputStream].
* Reading by file path or URL can also be done by passing a [String].
*
* For example, $[FUNCTION_LINK]`("input.$[CommonReadDelimDocs.FILE_EXTENSION]")` or with some options:
*
* $[FUNCTION_LINK]`(`
*
* {@include [Indent]}`file = `[File][File]`("input.$[CommonReadDelimDocs.FILE_EXTENSION]"),`
*
* {@include [Indent]}`parserOptions = `[ParserOptions][org.jetbrains.kotlinx.dataframe.api.ParserOptions]`(locale = `[Locale][java.util.Locale]`.`[US][java.util.Locale.US]`),`
*
* {@include [Indent]}`colTypes = `[mapOf][mapOf]`("a" `[to][to]` `[ColType][ColType]`.`[Int][ColType.Int]`, `[ColType][ColType]`.`[DEFAULT][ColType.DEFAULT]` `[to][to]` `[ColType][ColType]`.`[String][ColType.String]`),`
*
* {@include [Indent]}`readLines = 1000L,`
*
* `)`
*
* ZIP (.zip) or GZIP (.gz) files are supported by default. \[compression\] is automatically detected.
*
* You can also read "raw" $[FILE_TYPE] data from a [String] like this:
*
* $[STR_FUNCTION_LINK]`("a,b,c", delimiter = ",")`
*
* @comment Some helper arguments for the function links
* @set [FUNCTION_LINK] \[DataFrame.${[FUNCTION_NAME]}\]\[${[FUNCTION_NAME]}\]
* @set [STR_FUNCTION_LINK] \[DataFrame.${[FUNCTION_NAME]}Str\]\[${[FUNCTION_NAME]}Str\]
* @set [OLD_FUNCTION_LINK] \[DataFrame.${[OLD_FUNCTION_NAME]}\]\[org.jetbrains.kotlinx.dataframe.io.${[OLD_FUNCTION_NAME]}\]
*/
@Suppress("ClassName")
internal interface CommonReadDelimDocs {
/**
* @include [CommonReadDelimDocs]
* @set [FILE_TYPE_TITLE] CSV
* @set [FILE_TYPE] CSV
* @set [FILE_EXTENSION] csv
* @set [FUNCTION_NAME] readCsv
* @set [OLD_FUNCTION_NAME] readCSV
*/
typealias CsvDocs = Nothing
/**
* @include [CommonReadDelimDocs]
* @set [FILE_TYPE_TITLE] TSV
* @set [FILE_TYPE] TSV
* @set [FILE_EXTENSION] tsv
* @set [FUNCTION_NAME] readTsv
* @set [OLD_FUNCTION_NAME] readTSV
*/
typealias TsvDocs = Nothing
/**
* @include [CommonReadDelimDocs]
* @set [FILE_TYPE_TITLE] Delimiter-Separated Text
* @set [FILE_TYPE] delimiter-separated text
* @set [FILE_EXTENSION] txt
* @set [FUNCTION_NAME] readDelim
* @set [OLD_FUNCTION_NAME] readDelim{@comment cannot differentiate between old and new}
*/
typealias DelimDocs = Nothing
/**
* @include [CHARSET]
* @include [DelimParams.HEADER]
* @include [DelimParams.HAS_FIXED_WIDTH_COLUMNS]
* @include [DelimParams.FIXED_COLUMN_WIDTHS]
* @include [DelimParams.COL_TYPES]
* @include [DelimParams.SKIP_LINES]
* @include [DelimParams.READ_LINES]
* @include [DelimParams.PARSER_OPTIONS]
* @include [DelimParams.IGNORE_EMPTY_LINES]
* @include [DelimParams.ALLOW_MISSING_COLUMNS]
* @include [DelimParams.IGNORE_EXCESS_COLUMNS]
* @include [DelimParams.QUOTE]
* @include [DelimParams.IGNORE_SURROUNDING_SPACES]
* @include [DelimParams.TRIM_INSIDE_QUOTED]
* @include [DelimParams.PARSE_PARALLEL]
*/
typealias CommonReadParams = Nothing
// something like "File" or "File/URL"
typealias DATA_TITLE = Nothing
// something like "file" or "file or url"
typealias DATA = Nothing
// Like "CSV" or "TSV", capitalized
typealias FILE_TYPE_TITLE = Nothing
// Like "CSV" or "TSV"
typealias FILE_TYPE = Nothing
// like "csv" or "txt"
typealias FILE_EXTENSION = Nothing
// Function name, like "readCsv"
typealias FUNCTION_NAME = Nothing
// Old function name, like "readCSV"
typealias OLD_FUNCTION_NAME = Nothing
// A link to the main function, set by ReadDelim itself
typealias FUNCTION_LINK = Nothing
// A link to the str function, set by ReadDelim itself
typealias STR_FUNCTION_LINK = Nothing
// A link to the old function, set by ReadDelim itself
typealias OLD_FUNCTION_LINK = Nothing
}
@@ -0,0 +1,109 @@
@file:ExcludeFromSources
package org.jetbrains.kotlinx.dataframe.documentationCsv
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL
import java.io.File
/**
* ### $[WRITE_OR_CONVERT] [DataFrame] to $[FILE_TYPE_TITLE] $[DATA_TITLE]
*
* ${[WRITE_OR_CONVERT]}s \[this\]\[this\] [DataFrame][DataFrame] to a $[FILE_TYPE] $[DATA].
*
* Parameters you can use to customize the process include, for instance, \[delimiter\],
* \[includeHeader\], \[quoteMode\], and \[headerComments\].
* See the param list below for all settings.
*
* The integration is built upon {@include [DocumentationUrls.ApacheCsv]}.
*
* ##### Similar Functions
* With overloads of $[FUNCTION_LINK]`()`, you can write $[FILE_TYPE] to [File][File], [Path][java.nio.file.Path],
* [Appendable], or [String].
*
* For example, $[FUNCTION_LINK]`("output.$[CommonWriteDelimDocs.FILE_EXTENSION]")`
*
* or $[FUNCTION_LINK]`(`[File][File]`("output.$[CommonWriteDelimDocs.FILE_EXTENSION]"), quoteMode = `[QuoteMode.ALL][ALL]`)`
*
* Converting to a [String] can be done like this:
*
* $[TO_STR_FUNCTION_LINK]`(delimiter = ",")`
*
* @comment Some helper arguments for the function links
* @set [FUNCTION_LINK] \[DataFrame.${[FUNCTION_NAME]}\]\[${[FUNCTION_NAME]}\]
* @set [TO_STR_FUNCTION_LINK] \[DataFrame.${[TO_STR_FUNCTION_NAME]}\]\[${[TO_STR_FUNCTION_NAME]}\]
*/
@Suppress("ClassName")
internal interface CommonWriteDelimDocs {
/**
* @include [CommonWriteDelimDocs]
* @set [FILE_TYPE_TITLE] CSV
* @set [FILE_TYPE] CSV
* @set [FILE_EXTENSION] csv
* @set [FUNCTION_NAME] writeCsv
* @set [TO_STR_FUNCTION_NAME] toCsvStr
*/
typealias CsvDocs = Nothing
/**
* @include [CommonWriteDelimDocs]
* @set [FILE_TYPE_TITLE] TSV
* @set [FILE_TYPE] TSV
* @set [FILE_EXTENSION] tsv
* @set [FUNCTION_NAME] writeTsv
* @set [TO_STR_FUNCTION_NAME] toTsvStr
*/
typealias TsvDocs = Nothing
/**
* @include [CommonWriteDelimDocs]
* @set [FILE_TYPE_TITLE] Delimiter-Separated Text
* @set [FILE_TYPE] delimiter-separated text
* @set [FILE_EXTENSION] txt
* @set [FUNCTION_NAME] writeDelim
* @set [TO_STR_FUNCTION_NAME] toDelimStr
*/
typealias DelimDocs = Nothing
/**
* @include [DelimParams.INCLUDE_HEADER]
* @include [DelimParams.QUOTE]
* @include [DelimParams.QUOTE_MODE]
* @include [DelimParams.ESCAPE_CHAR]
* @include [DelimParams.COMMENT_CHAR]
* @include [DelimParams.HEADER_COMMENTS]
* @include [DelimParams.RECORD_SEPARATOR]
*/
typealias CommonWriteParams = Nothing
// something like "Write" or "Convert"
typealias WRITE_OR_CONVERT = Nothing
// Like "CSV" or "TSV", capitalized
typealias FILE_TYPE_TITLE = Nothing
// something like "File" or "String"
typealias DATA_TITLE = Nothing
// something like "file" or "text"
typealias DATA = Nothing
// Like "CSV" or "TSV"
typealias FILE_TYPE = Nothing
// like "csv" or "txt"
typealias FILE_EXTENSION = Nothing
// Function name, like "readCsv"
typealias FUNCTION_NAME = Nothing
// Function name, like "toCsvStr"
typealias TO_STR_FUNCTION_NAME = Nothing
// A link to the main function, set by WriteDelim itself
typealias FUNCTION_LINK = Nothing
// A link to the str function, set by WriteDelim itself
typealias TO_STR_FUNCTION_LINK = Nothing
}
@@ -0,0 +1,274 @@
package org.jetbrains.kotlinx.dataframe.documentationCsv
import io.deephaven.csv.CsvSpecs
import org.apache.commons.csv.CSVFormat
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses
import org.jetbrains.kotlinx.dataframe.io.AdjustCSVFormat
import org.jetbrains.kotlinx.dataframe.io.AdjustCsvSpecs
import org.jetbrains.kotlinx.dataframe.io.ColType
import org.jetbrains.kotlinx.dataframe.io.Compression
import org.jetbrains.kotlinx.dataframe.io.DefaultNullStringsContentLink
import org.jetbrains.kotlinx.dataframe.io.QuoteMode
import java.nio.charset.Charset
/**
* Contains both the default values of csv/tsv parameters and the parameter KDocs.
*/
@Suppress("ktlint:standard:class-naming", "ClassName", "KDocUnresolvedReference")
internal object DelimParams {
/**
* @param path The file path to read.
* Use [charset\] to specify the encoding.
* Can also be compressed as `.gz` or `.zip`, see [Compression][Compression].
*/
typealias PATH_READ = Nothing
/**
* @param file The file to read.
* Use [charset\] to specify the encoding.
* Can also be compressed as `.gz` or `.zip`, see [Compression][Compression].
*/
typealias FILE_READ = Nothing
/**
* @param url The URL from which to fetch the data.
* Use [charset\] to specify the encoding.
* Can also be compressed as `.gz` or `.zip`, see [Compression][Compression].
*/
typealias URL_READ = Nothing
/**
* @param fileOrUrl The file path or URL to read the data from.
* Use [charset\] to specify the encoding.
* Can also be compressed as `.gz` or `.zip`, see [Compression][Compression].
*/
typealias FILE_OR_URL_READ = Nothing
/**
* @param inputStream Represents the file to read.
* Use [charset\] to specify the encoding.
*/
typealias INPUT_STREAM_READ = Nothing
/** @param text The raw data to read in the form of a [String]. */
typealias TEXT_READ = Nothing
/** @param file The file to write to. */
typealias FILE_WRITE = Nothing
/** @param path The path pointing to a file to write to. */
typealias PATH_WRITE = Nothing
/** @param writer The [Appendable] to write to. */
typealias WRITER_WRITE = Nothing
/**
* @param charset The [character set][java.nio.charset.Charset] the input is encoded in.
* Default: `null`
*
* If `null`, the Charset will be read from the BOM of the provided input,
* defaulting to [UTF-8][Charsets.UTF_8] if no BOM is found.
*/
val CHARSET: Charset? = null
/**
* @param delimiter The field delimiter character. Default: ','.
*
* Ignored if \[hasFixedWidthColumns\] is `true`.
*/
const val CSV_DELIMITER: Char = ','
/**
* @param delimiter The field delimiter character. Default: '\\t'.
*
* Ignored if \[hasFixedWidthColumns\] is `true`.
*/
const val TSV_DELIMITER: Char = '\t'
/**
* @param delimiter The field delimiter character. Default: ','.
*
* Ignored if \[hasFixedWidthColumns\] is `true`.
*/
const val DELIM_DELIMITER: Char = ','
/**
* @param header Optional column titles. Default: empty list.
*
* If non-empty, the data will be read with \[header\] as the column titles
* (use \[skipLines\] if there's a header in the data).
* If empty (default), the header will be read from the data.
*/
val HEADER: List<String> = emptyList()
/**
* @param hasFixedWidthColumns Whether the data has fixed-width columns instead of a single delimiter.
* Default: `false`.
*
* Fixed-width columns can occur, for instance, in multi-space delimited data, where the columns are separated
* by multiple spaces instead of a single delimiter, so columns are visually aligned.
* Column widths are determined by the header in the data (if present), or manually by setting
* \[fixedColumnWidths\].
*/
const val HAS_FIXED_WIDTH_COLUMNS: Boolean = false
/**
* @param fixedColumnWidths The fixed column widths. Default: empty list.
*
* Requires \[hasFixedWidthColumns\]. If empty, the column widths will be determined by the header in the data
* (if present), else, this manually sets the column widths.
* The number of widths should match the number of columns.
*/
val FIXED_COLUMN_WIDTHS: List<Int> = emptyList()
/**
* @param compression The compression of the data.
* Default: [Compression.None], unless detected otherwise from the input file or url.
*/
val COMPRESSION: Compression<*> = Compression.None
/**
* @param colTypes The expected [ColType] per column name. Default: empty map, a.k.a. infer every column type.
*
* If supplied for a certain column name (inferred from data or given by \[header\]),
* the parser will parse the column with the specified name as the specified type, else it will infer the type.
*
* e.g. `colTypes = `[mapOf][mapOf]`("colName" `[to][to]` `[ColType][ColType]`.`[Int][ColType.Int]`)`.
* You can also set [ColType][ColType]`.`[DEFAULT][ColType.DEFAULT]` `[to][to]` `[ColType][ColType]`.X`
* to set a _default_ column type, like [ColType.String].
*/
val COL_TYPES: Map<String, ColType> = emptyMap()
/**
* @param skipLines The number of lines to skip before reading the header and data. Default: `0`.
*
* Useful for files with metadata, or comments at the beginning, or to give a custom \[header\].
*/
const val SKIP_LINES: Long = 0L
/**
* @param readLines The maximum number of lines to read from the data. Default: `null`.
*
* If `null`, all lines will be read.
*/
val READ_LINES: Long? = null
/**
* @param parserOptions Optional [parsing options][ParserOptions] for columns initially read as [String].
* Default, `null`.
*
* Can configure locale, date format, double parsing, skipping types, etc.
*
* If [parserOptions\] or any of the arguments are `null`, the global parser configuration
* ([DataFrame.parser][DataFrame.Companion.parser]) will be queried.
*
* The only exceptions are:
* - [nullStrings][ParserOptions.nullStrings], which, if `null`,
* will take the global setting + {@include [DefaultNullStringsContentLink]}.
* - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses] to
* the given types or the global setting.
*/
val PARSER_OPTIONS: ParserOptions? = null
/**
* @param ignoreEmptyLines Whether to skip intermediate empty lines. Default: `false`.
*
* If `false`, empty lines will be interpreted as having _empty_ values if \[allowMissingColumns\].
*/
const val IGNORE_EMPTY_LINES: Boolean = false
/**
* @param allowMissingColumns Whether to allow rows with fewer columns than the header. Default: `true`.
*
* If `true`, rows that are too short will be interpreted as _empty_ values.
*/
const val ALLOW_MISSING_COLUMNS: Boolean = true
/**
* @param ignoreExcessColumns Whether to ignore rows with more columns than the header. Default: `true`.
*
* If `true`, rows that are too long will have those columns dropped.
*/
const val IGNORE_EXCESS_COLUMNS: Boolean = true
/**
* @param quote The quote character. Default: `"`.
*
* Used when field- or line delimiters should be interpreted as literal text.
*
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
*/
const val QUOTE: Char = '"'
/**
* @param ignoreSurroundingSpaces Whether to ignore leading and trailing blanks around non-quoted fields.
* Default: `true`.
*/
const val IGNORE_SURROUNDING_SPACES: Boolean = true
/**
* @param trimInsideQuoted Whether to ignore leading and trailing blanks inside quoted fields.
* Default: `false`.
*/
const val TRIM_INSIDE_QUOTED: Boolean = false
/**
* @param parseParallel Whether to parse the data in parallel. Default: `true`.
*
* If `true`, the data will be read and parsed in parallel by the Deephaven parser.
* This is usually faster but can be turned off for debugging.
*/
const val PARSE_PARALLEL: Boolean = true
/**
* @param adjustCsvSpecs Optional extra [CsvSpecs] configuration. Default: `{ it }`.
*
* Before instantiating the [CsvSpecs], the [CsvSpecs.Builder] will be passed to this lambda.
* This will allow you to configure/overwrite any CSV / TSV parsing options.
*/
val ADJUST_CSV_SPECS: AdjustCsvSpecs = { it }
/** @param includeHeader Whether to include the header in the output. Default: `true`. */
const val INCLUDE_HEADER: Boolean = true
/**
* @param quoteMode The [QuoteMode] to use when writing CSV / TSV files.
* Default: [QuoteMode.MINIMAL].
*/
val QUOTE_MODE: QuoteMode = QuoteMode.MINIMAL
/**
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE].
* Default: `null`. This will double-quote the value.
*/
val ESCAPE_CHAR: Char? = null
/**
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
* Default: `'#'`.
*/
const val COMMENT_CHAR: Char = '#'
/**
* @param recordSeparator The character that separates records in a CSV / TSV file.
* Default: `'\\n'`, a Unix-newline.
*/
const val RECORD_SEPARATOR: String = "\n"
/**
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
* Default: empty list.
*/
val HEADER_COMMENTS: List<String> = emptyList()
/**
* @param adjustCsvFormat Optional extra [CSVFormat] configuration. Default: `{ it }`.
*
* Before instantiating the [CSVFormat], the [CSVFormat.Builder] will be passed to this lambda.
* This will allow you to configure/overwrite any CSV / TSV writing options.
*/
val ADJUST_CSV_FORMAT: AdjustCSVFormat = { it }
}
@@ -0,0 +1,12 @@
@file:ExcludeFromSources
package org.jetbrains.kotlinx.dataframe.documentationCsv
internal interface DocumentationUrls {
/** [Deephaven CSV](https://github.com/deephaven/deephaven-csv) */
typealias Deephaven = Nothing
/** [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/) */
typealias ApacheCsv = Nothing
}
@@ -0,0 +1,101 @@
package org.jetbrains.kotlinx.dataframe.documentationCsv
import kotlin.annotation.AnnotationTarget.ANNOTATION_CLASS
import kotlin.annotation.AnnotationTarget.CLASS
import kotlin.annotation.AnnotationTarget.CONSTRUCTOR
import kotlin.annotation.AnnotationTarget.FIELD
import kotlin.annotation.AnnotationTarget.FILE
import kotlin.annotation.AnnotationTarget.FUNCTION
import kotlin.annotation.AnnotationTarget.LOCAL_VARIABLE
import kotlin.annotation.AnnotationTarget.PROPERTY
import kotlin.annotation.AnnotationTarget.PROPERTY_GETTER
import kotlin.annotation.AnnotationTarget.PROPERTY_SETTER
import kotlin.annotation.AnnotationTarget.TYPE
import kotlin.annotation.AnnotationTarget.TYPEALIAS
import kotlin.annotation.AnnotationTarget.VALUE_PARAMETER
/**
*
* {@include [Indent]}
*
*/
@ExcludeFromSources
internal typealias LineBreak = Nothing
/** &nbsp; */
@ExcludeFromSources
internal typealias QuarterIndent = Nothing
/** &nbsp;&nbsp; */
@ExcludeFromSources
internal typealias HalfIndent = Nothing
/** &nbsp;&nbsp;&nbsp;&nbsp; */
@ExcludeFromSources
internal typealias Indent = Nothing
/** &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; */
@ExcludeFromSources
internal typealias DoubleIndent = Nothing
/** &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; */
@ExcludeFromSources
internal typealias TripleIndent = Nothing
/** &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; */
@ExcludeFromSources
internal typealias QuadrupleIndent = Nothing
/**
* Any `Documentable` annotated with this annotation will be excluded from the generated sources by
* the documentation processor.
*
* **NOTE: DO NOT RENAME!**
*/
@Target(
CLASS,
ANNOTATION_CLASS,
PROPERTY,
FIELD,
LOCAL_VARIABLE,
VALUE_PARAMETER,
CONSTRUCTOR,
FUNCTION,
PROPERTY_GETTER,
PROPERTY_SETTER,
TYPE,
TYPEALIAS,
FILE,
)
internal annotation class ExcludeFromSources
/**
* Any `Documentable` annotated with this annotation will be exported to HTML by the documentation
* processor.
*
* You can use @exportAsHtmlStart and @exportAsHtmlEnd to specify a range of the doc to
* export to HTML.
*
* **NOTE: DO NOT RENAME!**
*
* @param theme Whether to include a simple theme in the HTML file. Default is `true`.
* @param stripReferences Whether to strip `[references]` from the HTML file. Default is `true`.
* This is useful when you want to include the HTML file in a website, where the references are not
* needed or would break.
*/
@Target(
CLASS,
ANNOTATION_CLASS,
PROPERTY,
FIELD,
LOCAL_VARIABLE,
VALUE_PARAMETER,
CONSTRUCTOR,
FUNCTION,
PROPERTY_GETTER,
PROPERTY_SETTER,
TYPE,
TYPEALIAS,
FILE,
)
internal annotation class ExportAsHtml(val theme: Boolean = true, val stripReferences: Boolean = true)
@@ -0,0 +1,24 @@
package org.jetbrains.kotlinx.dataframe.impl.io
import io.deephaven.csv.containers.ByteSlice
import io.deephaven.csv.tokenization.Tokenizer.CustomDoubleParser
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
/**
* Wrapper around [FastDoubleParser] so we can use it from Deephaven.
*/
internal class DataFrameCustomDoubleParser(parserOptions: ParserOptions? = null) : CustomDoubleParser {
private val fastDoubleParser = FastDoubleParser(parserOptions)
override fun parse(bs: ByteSlice): Double =
try {
fastDoubleParser.parseOrNull(bs.data(), bs.begin(), bs.size())
} catch (_: Exception) {
null
} ?: throw NumberFormatException()
override fun parse(cs: CharSequence): Double =
fastDoubleParser.parseOrNull(cs.toString())
?: throw NumberFormatException()
}
@@ -0,0 +1,204 @@
package org.jetbrains.kotlinx.dataframe.impl.io
import io.deephaven.csv.parsers.DataType
import io.deephaven.csv.parsers.DataType.BOOLEAN_AS_BYTE
import io.deephaven.csv.parsers.DataType.BYTE
import io.deephaven.csv.parsers.DataType.CHAR
import io.deephaven.csv.parsers.DataType.DATETIME_AS_LONG
import io.deephaven.csv.parsers.DataType.DOUBLE
import io.deephaven.csv.parsers.DataType.FLOAT
import io.deephaven.csv.parsers.DataType.INT
import io.deephaven.csv.parsers.DataType.LONG
import io.deephaven.csv.parsers.DataType.SHORT
import io.deephaven.csv.parsers.DataType.STRING
import io.deephaven.csv.parsers.DataType.TIMESTAMP_AS_LONG
import io.deephaven.csv.sinks.Sink
import io.deephaven.csv.sinks.SinkFactory
import io.deephaven.csv.sinks.Source
import kotlinx.datetime.toKotlinLocalDateTime
import java.time.LocalDateTime
import java.time.ZoneOffset
import kotlin.time.Duration.Companion.nanoseconds
internal interface SinkSource<T : Any> :
Sink<T>,
Source<T>
/**
* Implementation of Deephaven's [Sink] and [Source] that stores data in an [ArrayList].
*
* The implementation is based on [Writing Your Own Data Sinks](https://github.com/deephaven/deephaven-csv/blob/main/ADVANCED.md).
*
* If we ever store column data unboxed / primitively, this needs to be modified.
*/
@Suppress("UNCHECKED_CAST")
internal class ListSink(val columnIndex: Int, val dataType: DataType) : SinkSource<Any> {
@Suppress("ktlint:standard:comment-wrapping", "ktlint:standard:no-consecutive-comments")
companion object {
val SINK_FACTORY: SinkFactory = SinkFactory.of(
// unused in Parsers.DEFAULT:
/* byteSinkSupplier = */ { ListSink(it, BYTE) as SinkSource<ByteArray> },
/* shortSinkSupplier = */ { ListSink(it, SHORT) as SinkSource<ShortArray> },
/* intSinkSupplier = */ { ListSink(it, INT) as SinkSource<IntArray> },
/* longSinkSupplier = */ { ListSink(it, LONG) as SinkSource<LongArray> },
// unused in Parsers.COMPLETE and Parsers.DEFAULT:
/* floatSinkSupplier = */ { ListSink(it, FLOAT) as SinkSource<FloatArray> },
/* doubleSinkSupplier = */ { ListSink(it, DOUBLE) as SinkSource<DoubleArray> },
/* booleanAsByteSinkSupplier = */ { ListSink(it, BOOLEAN_AS_BYTE) as SinkSource<ByteArray> },
/* charSinkSupplier = */ { ListSink(it, CHAR) as SinkSource<CharArray> },
/* stringSinkSupplier = */ { ListSink(it, STRING) as SinkSource<Array<String>> },
/* dateTimeAsLongSinkSupplier = */ { ListSink(it, DATETIME_AS_LONG) as SinkSource<LongArray> },
// unused in Parsers.COMPLETE and Parsers.DEFAULT:
/* timestampAsLongSinkSupplier = */ { ListSink(it, TIMESTAMP_AS_LONG) as SinkSource<LongArray> },
)
}
private val _data: MutableList<Any?> = ArrayList(1000)
val data: List<Any?>
get() = _data
var hasNulls: Boolean = false
private set
private fun getValue(src: Any, srcIndex: Int, isNull: BooleanArray): Any? =
if (isNull[srcIndex]) {
hasNulls = true
null
} else {
when (dataType) {
BOOLEAN_AS_BYTE -> (src as ByteArray)[srcIndex] == 1.toByte()
// unused in Parsers.DEFAULT
BYTE -> (src as ByteArray)[srcIndex]
// unused in Parsers.DEFAULT
SHORT -> (src as ShortArray)[srcIndex]
INT -> (src as IntArray)[srcIndex]
LONG -> (src as LongArray)[srcIndex]
// unused in Parsers.COMPLETE and Parsers.DEFAULT
FLOAT -> (src as FloatArray)[srcIndex]
DOUBLE -> (src as DoubleArray)[srcIndex]
CHAR -> (src as CharArray)[srcIndex]
STRING -> (src as Array<String>)[srcIndex]
DATETIME_AS_LONG -> (src as LongArray)[srcIndex].nanoseconds
.toComponents { seconds, nanoseconds ->
LocalDateTime.ofEpochSecond(seconds, nanoseconds, ZoneOffset.UTC)
}.toKotlinLocalDateTime()
// unused in Parsers.COMPLETE and Parsers.DEFAULT
TIMESTAMP_AS_LONG -> (src as LongArray)[srcIndex].nanoseconds
.toComponents { seconds, nanoseconds ->
LocalDateTime.ofEpochSecond(seconds, nanoseconds, ZoneOffset.UTC)
}.toKotlinLocalDateTime()
else -> error("unsupported parser")
}
}
private fun writeAppending(
src: Any,
destBegin: Int,
destEnd: Int,
isNull: BooleanArray,
) {
while (data.size < destBegin) {
_data += null
hasNulls = true
}
for ((srcIndex, _) in (destBegin..<destEnd).withIndex()) {
_data += getValue(src, srcIndex, isNull)
}
}
private fun writeReplacing(
src: Any,
destBegin: Int,
destEnd: Int,
isNull: BooleanArray,
) {
for ((srcIndex, destIndex) in (destBegin..<destEnd).withIndex()) {
_data[destIndex] = getValue(src, srcIndex, isNull)
}
}
override fun write(
src: Any,
isNull: BooleanArray,
destBegin: Long,
destEnd: Long,
appending: Boolean,
) {
if (destBegin == destEnd) return
val destBeginAsInt = destBegin.toInt()
val destEndAsInt = destEnd.toInt()
if (appending) {
writeAppending(src = src, destBegin = destBeginAsInt, destEnd = destEndAsInt, isNull = isNull)
} else {
writeReplacing(src = src, destBegin = destBeginAsInt, destEnd = destEndAsInt, isNull = isNull)
}
}
override fun read(
dest: Any,
isNull: BooleanArray,
srcBegin: Long,
srcEnd: Long,
) {
if (srcBegin == srcEnd) return
val srcBeginAsInt = srcBegin.toInt()
val srcEndAsInt = srcEnd.toInt()
when (dataType) {
BYTE -> {
dest as ByteArray
for ((srcIndex, destIndex) in (srcBeginAsInt..<srcEndAsInt).withIndex()) {
val value = data[srcIndex] as Byte?
if (value != null) dest[destIndex] = value
isNull[destIndex] = value == null
}
}
SHORT -> {
dest as ShortArray
for ((srcIndex, destIndex) in (srcBeginAsInt..<srcEndAsInt).withIndex()) {
val value = data[srcIndex] as Short?
if (value != null) dest[destIndex] = value
isNull[destIndex] = value == null
}
}
INT -> {
dest as IntArray
for ((srcIndex, destIndex) in (srcBeginAsInt..<srcEndAsInt).withIndex()) {
val value = data[srcIndex] as Int?
if (value != null) dest[destIndex] = value
isNull[destIndex] = value == null
}
}
LONG -> {
dest as LongArray
for ((srcIndex, destIndex) in (srcBeginAsInt..<srcEndAsInt).withIndex()) {
val value = data[srcIndex] as Long?
if (value != null) dest[destIndex] = value
isNull[destIndex] = value == null
}
}
// Deephaven's fast path for numeric type inference supports only byte, short, int, and long
// so this should never be reached
else -> error("unsupported sink state")
}
}
override fun getUnderlying(): ListSink = this
}
@@ -0,0 +1,387 @@
@file:JvmName("ReadDelimDeephavenKt")
package org.jetbrains.kotlinx.dataframe.impl.io
import io.deephaven.csv.CsvSpecs
import io.deephaven.csv.parsers.DataType
import io.deephaven.csv.parsers.DataType.BOOLEAN_AS_BYTE
import io.deephaven.csv.parsers.DataType.BYTE
import io.deephaven.csv.parsers.DataType.CHAR
import io.deephaven.csv.parsers.DataType.DATETIME_AS_LONG
import io.deephaven.csv.parsers.DataType.DOUBLE
import io.deephaven.csv.parsers.DataType.FLOAT
import io.deephaven.csv.parsers.DataType.INT
import io.deephaven.csv.parsers.DataType.LONG
import io.deephaven.csv.parsers.DataType.SHORT
import io.deephaven.csv.parsers.DataType.STRING
import io.deephaven.csv.parsers.DataType.TIMESTAMP_AS_LONG
import io.deephaven.csv.parsers.Parser
import io.deephaven.csv.parsers.Parsers
import io.deephaven.csv.reading.CsvReader
import io.deephaven.csv.util.CsvReaderException
import kotlinx.datetime.LocalDate
import kotlinx.datetime.LocalDateTime
import kotlinx.datetime.LocalTime
import org.apache.commons.io.input.BOMInputStream
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.api.convertTo
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import org.jetbrains.kotlinx.dataframe.api.parse
import org.jetbrains.kotlinx.dataframe.api.parser
import org.jetbrains.kotlinx.dataframe.api.tryParse
import org.jetbrains.kotlinx.dataframe.columns.ValueColumn
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_SPECS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ALLOW_MISSING_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CHARSET
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COL_TYPES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMPRESSION
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FIXED_COLUMN_WIDTHS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HAS_FIXED_WIDTH_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EMPTY_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EXCESS_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_SURROUNDING_SPACES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INPUT_STREAM_READ
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSER_OPTIONS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSE_PARALLEL
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.READ_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.SKIP_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TRIM_INSIDE_QUOTED
import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
import org.jetbrains.kotlinx.dataframe.io.AdjustCsvSpecs
import org.jetbrains.kotlinx.dataframe.io.ColType
import org.jetbrains.kotlinx.dataframe.io.Compression
import org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS
import org.jetbrains.kotlinx.dataframe.io.skippingBomCharacters
import org.jetbrains.kotlinx.dataframe.io.toKType
import org.jetbrains.kotlinx.dataframe.io.useDecompressed
import java.io.InputStream
import java.math.BigDecimal
import java.math.BigInteger
import java.net.URL
import java.nio.charset.Charset
import kotlin.reflect.KType
import kotlin.reflect.full.withNullability
import kotlin.reflect.typeOf
import kotlin.time.Duration
import kotlin.time.Instant as StdlibInstant
import kotlinx.datetime.Instant as DeprecatedInstant
/**
* Implementation to read delimiter-separated data from an [InputStream] based on the Deephaven CSV library.
*
* @include [INPUT_STREAM_READ]
* @include [CHARSET]
* @param delimiter The field delimiter character. The default is ',' for CSV, '\t' for TSV.
* @include [HEADER]
* @include [COL_TYPES]
* @include [SKIP_LINES]
* @include [READ_LINES]
* @include [HAS_FIXED_WIDTH_COLUMNS]
* @include [FIXED_COLUMN_WIDTHS]
* @include [PARSER_OPTIONS]
* @include [IGNORE_EMPTY_LINES]
* @include [ALLOW_MISSING_COLUMNS]
* @include [IGNORE_EXCESS_COLUMNS]
* @include [QUOTE]
* @include [IGNORE_SURROUNDING_SPACES]
* @include [TRIM_INSIDE_QUOTED]
* @include [PARSE_PARALLEL]
* @include [COMPRESSION]
* @include [ADJUST_CSV_SPECS]
*/
internal fun readDelimImpl(
inputStream: InputStream,
delimiter: Char,
header: List<String>,
charset: Charset?,
hasFixedWidthColumns: Boolean,
fixedColumnWidths: List<Int>,
colTypes: Map<String, ColType>,
skipLines: Long,
readLines: Long?,
parserOptions: ParserOptions?,
ignoreEmptyLines: Boolean,
allowMissingColumns: Boolean,
ignoreExcessColumns: Boolean,
quote: Char,
ignoreSurroundingSpaces: Boolean,
trimInsideQuoted: Boolean,
parseParallel: Boolean,
compression: Compression<*>,
adjustCsvSpecs: AdjustCsvSpecs,
): DataFrame<*> {
// set up the csv specs
val csvSpecs = with(CsvSpecs.builder()) {
customDoubleParser(DataFrameCustomDoubleParser(parserOptions))
// use the given nullStrings if provided, else take the global ones + some extras
val nullStrings = parserOptions?.nullStrings ?: (DataFrame.parser.nulls + DEFAULT_DELIM_NULL_STRINGS)
nullValueLiterals(nullStrings)
headerLegalizer(::legalizeHeader)
numRows(readLines ?: Long.MAX_VALUE)
ignoreEmptyLines(ignoreEmptyLines)
allowMissingColumns(allowMissingColumns)
ignoreExcessColumns(ignoreExcessColumns)
if (!hasFixedWidthColumns) delimiter(delimiter)
quote(quote)
ignoreSurroundingSpaces(ignoreSurroundingSpaces)
trim(trimInsideQuoted)
concurrent(parseParallel)
header(header)
hasFixedWidthColumns(hasFixedWidthColumns)
if (hasFixedWidthColumns && fixedColumnWidths.isNotEmpty()) fixedColumnWidths(fixedColumnWidths)
skipLines(takeHeaderFromCsv = header.isEmpty(), skipLines = skipLines)
parsers(parserOptions, colTypes)
adjustCsvSpecs(this, this)
}.build()
val csvReaderResult = inputStream.useDecompressed(compression) { decompressedInputStream ->
// read the csv
try {
val deBommedInputString = decompressedInputStream.skippingBomCharacters()
// choose charset like: provided? -> from BOM? -> UTF-8
val streamCharset = charset
?: (deBommedInputString as? BOMInputStream)?.bom?.let { Charset.forName(it.charsetName) }
?: Charsets.UTF_8
@Suppress("ktlint:standard:comment-wrapping")
CsvReader.read(
/* specs = */ csvSpecs,
/* stream = */ deBommedInputString,
/* streamCharset = */ streamCharset,
/* sinkFactory = */ ListSink.SINK_FACTORY,
)
} catch (e: CsvReaderException) {
// catch case when the file is empty and header needs to be inferred from it.
if (e.message ==
"Can't proceed because hasHeaderRow is set but input file is empty or shorter than skipHeaderRows"
) {
return@readDelimImpl DataFrame.empty()
}
throw IllegalStateException(
"Could not read delimiter-separated data: CsvReaderException: ${e.message}: ${e.cause?.message ?: ""}",
e,
)
}
}
val defaultColType = colTypes[ColType.DEFAULT]
// convert each ResultColumn to a DataColumn
val cols = csvReaderResult.map {
it.toDataColumn(
parserOptions = parserOptions,
desiredColType = colTypes[it.name()] ?: defaultColType,
)
}
return dataFrameOf(cols)
}
@Suppress("UNCHECKED_CAST")
private fun CsvReader.ResultColumn.toDataColumn(
parserOptions: ParserOptions?,
desiredColType: ColType?,
): DataColumn<*> {
val listSink = data()!! as ListSink
val columnData = listSink.data
val dataType = listSink.dataType
val hasNulls = listSink.hasNulls
val type = dataType().toKType().withNullability(hasNulls)
val column = DataColumn.createValueColumn(
name = name(),
values = columnData,
type = type,
)
if (dataType != STRING) return column
// attempt to perform additional parsing if necessary, will remain String if it fails
column as ValueColumn<String?>
return when {
desiredColType != null ->
column.convertTo(
newType = desiredColType.toKType().withNullability(true),
parserOptions = parserOptions,
)
else -> {
val givenSkipTypes = parserOptions?.skipTypes ?: DataFrame.parser.skipTypes
// no need to check for types that Deephaven already parses, skip those too
val adjustedSkipTypes = givenSkipTypes + typesDeephavenAlreadyParses
val adjustedParserOptions = (parserOptions ?: ParserOptions())
.copy(skipTypes = adjustedSkipTypes)
column.tryParse(adjustedParserOptions)
}
}
}
private fun DataType?.toKType(): KType =
when (this) {
BOOLEAN_AS_BYTE -> typeOf<Boolean>()
// unused in Parsers.DEFAULT
BYTE -> typeOf<Byte>()
// unused in Parsers.DEFAULT
SHORT -> typeOf<Short>()
INT -> typeOf<Int>()
LONG -> typeOf<Long>()
// unused in Parsers.COMPLETE and Parsers.DEFAULT
FLOAT -> typeOf<Float>()
DOUBLE -> typeOf<Double>()
DATETIME_AS_LONG -> typeOf<LocalDateTime>()
CHAR -> typeOf<Char>()
STRING -> typeOf<String>()
// unused in Parsers.COMPLETE and Parsers.DEFAULT
TIMESTAMP_AS_LONG -> typeOf<LocalDateTime>()
DataType.CUSTOM -> error("custom data type")
null -> error("null data type")
}
private fun legalizeHeader(header: Array<String>): Array<String> {
val generator = ColumnNameGenerator()
return header.map { generator.addUnique(it) }.toTypedArray()
}
private fun CsvSpecs.Builder.skipLines(takeHeaderFromCsv: Boolean, skipLines: Long): CsvSpecs.Builder =
if (takeHeaderFromCsv) {
skipHeaderRows(skipLines)
} else {
skipRows(skipLines)
}
/**
* Sets the correct parsers for the csv, based on [colTypes] and [ParserOptions.skipTypes].
* If [ColType.DEFAULT] is present, it sets the default parser.
*
* Logic overview:
*
* - if no [colTypes] are given
* - let deephaven use all its [default parsers][Parsers.DEFAULT] minus [Parsers.DATETIME]
* - subtract parsers of [skipTypes][ParserOptions.skipTypes] if those are supplied
* - if [colTypes] are supplied
* - if [ColType.DEFAULT] is among the values
* - set the parser for each supplied column+colType
* - let deephaven use _only_ the parser given as [ColType.DEFAULT] type
* - if [ColType.DEFAULT] is not among the values
* - set the parser for each supplied column+coltype
* - let deephaven use all its [default parsers][Parsers.DEFAULT] minus [Parsers.DATETIME]
* - subtract parsers of [skipTypes][ParserOptions.skipTypes] if those are supplied
*
* We will not use [Deephaven's DateTime parser][Parsers.DATETIME].
* This is done to avoid different behavior compared to [DataFrame.parse];
* Deephaven parses [Instant] as [LocalDateTime]. [Issue #1047](https://github.com/Kotlin/dataframe/issues/1047)
*
* Note that `skipTypes` will never skip a type explicitly set by `colTypes`.
* This is intended.
*/
private fun CsvSpecs.Builder.parsers(parserOptions: ParserOptions?, colTypes: Map<String, ColType>): CsvSpecs.Builder {
for ((colName, colType) in colTypes) {
if (colName == ColType.DEFAULT) continue
putParserForName(colName, colType.toCsvParser())
}
// BOOLEAN, INT, LONG, DOUBLE, CHAR, STRING
val defaultParsers = Parsers.DEFAULT - Parsers.DATETIME
val skipTypes = parserOptions?.skipTypes ?: DataFrame.parser.skipTypes
val parsersToUse = when {
ColType.DEFAULT in colTypes ->
listOf(colTypes[ColType.DEFAULT]!!.toCsvParser(), Parsers.STRING)
skipTypes.isNotEmpty() -> {
val parsersToSkip = skipTypes
.mapNotNull { it.toColType().toCsvParserOrNull() }
defaultParsers.toSet() - parsersToSkip.toSet()
}
else -> defaultParsers
}
parsers(parsersToUse)
return this
}
private fun CsvSpecs.Builder.header(header: List<String>): CsvSpecs.Builder =
if (header.isEmpty()) {
// take header from csv
hasHeaderRow(true)
} else {
hasHeaderRow(false)
.headers(header)
}
/**
* Converts a [ColType] to a [Parser] from the Deephaven CSV library.
* If no direct [Parser] exists, it returns `null`.
*/
internal fun ColType.toCsvParserOrNull(): Parser<*>? =
when (this) {
ColType.Int -> Parsers.INT
ColType.Long -> Parsers.LONG
ColType.Double -> Parsers.DOUBLE
ColType.Char -> Parsers.CHAR
ColType.Boolean -> Parsers.BOOLEAN
ColType.String -> Parsers.STRING
else -> null
}
/**
* Converts a [ColType] to a [Parser] from the Deephaven CSV library.
* If no direct [Parser] exists, it defaults to [Parsers.STRING] so that [DataFrame.parse] can handle it.
*/
internal fun ColType.toCsvParser(): Parser<*> = toCsvParserOrNull() ?: Parsers.STRING
internal fun KType.toColType(): ColType =
when (this.withNullability(false)) {
typeOf<Int>() -> ColType.Int
typeOf<Long>() -> ColType.Long
typeOf<Double>() -> ColType.Double
typeOf<Boolean>() -> ColType.Boolean
typeOf<BigDecimal>() -> ColType.BigDecimal
typeOf<BigInteger>() -> ColType.BigInteger
typeOf<LocalDate>() -> ColType.LocalDate
typeOf<LocalTime>() -> ColType.LocalTime
typeOf<LocalDateTime>() -> ColType.LocalDateTime
typeOf<String>() -> ColType.String
typeOf<DeprecatedInstant>() -> ColType.DeprecatedInstant
typeOf<StdlibInstant>() -> ColType.StdlibInstant
typeOf<Duration>() -> ColType.Duration
typeOf<URL>() -> ColType.Url
typeOf<DataFrame<*>>() -> ColType.JsonArray
typeOf<DataRow<*>>() -> ColType.JsonObject
typeOf<Char>() -> ColType.Char
else -> ColType.String
}
/**
* Types that Deephaven already parses, so we can skip them when
* defaulting to DataFrame's String parsers.
*
* [LocalDateTime] and [java.time.LocalDateTime] are not included because Deephaven cannot recognize all formats.
*/
internal val typesDeephavenAlreadyParses: Set<KType> =
setOf(
typeOf<Int>(),
typeOf<Long>(),
typeOf<Double>(),
typeOf<Char>(),
typeOf<Boolean>(),
)
@@ -0,0 +1,92 @@
@file:JvmName("WriteDelimDeephavenKt")
package org.jetbrains.kotlinx.dataframe.impl.io
import org.apache.commons.csv.CSVFormat
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.AnyRow
import org.jetbrains.kotlinx.dataframe.api.forEach
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CSV_DELIMITER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.WRITER_WRITE
import org.jetbrains.kotlinx.dataframe.io.AdjustCSVFormat
import org.jetbrains.kotlinx.dataframe.io.QuoteMode
import org.jetbrains.kotlinx.dataframe.io.toJson
import org.apache.commons.csv.QuoteMode as ApacheQuoteMode
/**
* Writes [df] to [writer] in a delimiter-separated format.
*
* @param df The data to write.
* @include [WRITER_WRITE]
* @include [CSV_DELIMITER]
* @include [CommonWriteDelimDocs.CommonWriteParams]
* @include [ADJUST_CSV_FORMAT]
*/
internal fun writeDelimImpl(
df: AnyFrame,
writer: Appendable,
delimiter: Char,
includeHeader: Boolean,
quote: Char?,
quoteMode: QuoteMode,
escapeChar: Char?,
commentChar: Char?,
headerComments: List<String>,
recordSeparator: String,
adjustCsvFormat: AdjustCSVFormat,
) {
// setup CSV format
val format = with(CSVFormat.Builder.create(CSVFormat.DEFAULT)) {
setDelimiter(delimiter)
setQuote(quote)
setSkipHeaderRecord(!includeHeader)
setQuoteMode(quoteMode.toApache())
setRecordSeparator(recordSeparator)
setEscape(escapeChar)
setCommentMarker(commentChar)
setHeaderComments(*headerComments.toTypedArray())
}.let { adjustCsvFormat(it, it) }
.get()
// let the format handle the writing, only converting AnyRow and AnyFrame to JSON
format.print(writer).use { printer ->
if (includeHeader) {
printer.printRecord(df.columnNames())
}
df.forEach {
val values = it.values().map {
when (it) {
is AnyRow -> try {
it.toJson()
} catch (_: NoClassDefFoundError) {
error(
"Encountered a DataRow value when writing to csv/tsv/delim. It must be serialized to JSON, requiring the 'dataframe-json' dependency.",
)
}
is AnyFrame -> try {
it.toJson()
} catch (_: NoClassDefFoundError) {
error(
"Encountered a DataFrame value when writing to csv/tsv/delim. It must be serialized to JSON, requiring the 'dataframe-json' dependency.",
)
}
else -> it
}
}
printer.printRecord(values)
}
}
}
internal fun QuoteMode.toApache(): ApacheQuoteMode =
when (this) {
QuoteMode.ALL -> ApacheQuoteMode.ALL
QuoteMode.MINIMAL -> ApacheQuoteMode.MINIMAL
QuoteMode.NON_NUMERIC -> ApacheQuoteMode.NON_NUMERIC
QuoteMode.NONE -> ApacheQuoteMode.NONE
QuoteMode.ALL_NON_NULL -> ApacheQuoteMode.ALL_NON_NULL
}
@@ -0,0 +1,26 @@
package org.jetbrains.kotlinx.dataframe.io
/** Defines quoting behavior. */
public enum class QuoteMode {
/** Quotes all fields. */
ALL,
/** Quotes all non-null fields. */
ALL_NON_NULL,
/**
* Quotes fields that contain special characters such as a field delimiter, quote character, or any of the
* characters in the line separator string.
*/
MINIMAL,
/** Quotes all non-numeric fields. */
NON_NUMERIC,
/**
* Never quotes fields. When the delimiter occurs in data, the printer prefixes it with the escape character. If the
* escape character is not set, format validation throws an exception.
*/
NONE,
}
@@ -0,0 +1,39 @@
@file:JvmName("CsvDeephavenKt")
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.codeGen.AbstractDefaultReadMethod
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams
import java.io.File
import java.io.InputStream
import java.nio.file.Path
import kotlin.reflect.typeOf
public class CsvDeephaven(private val delimiter: Char = DelimParams.CSV_DELIMITER) : SupportedDataFrameFormat {
override fun readDataFrame(stream: InputStream, header: List<String>): DataFrame<*> =
DataFrame.readCsv(inputStream = stream, header = header, delimiter = delimiter)
override fun readDataFrame(file: File, header: List<String>): DataFrame<*> =
DataFrame.readCsv(file = file, header = header, delimiter = delimiter)
override fun readDataFrame(path: Path, header: List<String>): DataFrame<*> =
DataFrame.readCsv(path = path, delimiter = delimiter, header = header)
override fun acceptsExtension(ext: String): Boolean = ext == "csv"
override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough
override val testOrder: Int = 20_000
override fun createDefaultReadMethod(pathRepresentation: String?): DefaultReadDfMethod {
val arguments = MethodArguments().add("delimiter", typeOf<Char>(), "'%L'", delimiter)
return DefaultReadCsvMethod(pathRepresentation, arguments)
}
}
private const val READ_CSV = "readCsv"
internal class DefaultReadCsvMethod(path: String?, arguments: MethodArguments) :
AbstractDefaultReadMethod(path, arguments, READ_CSV)
@@ -0,0 +1,516 @@
@file:JvmName("ReadCsvDeephavenKt")
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonReadDelimDocs
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_SPECS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ALLOW_MISSING_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CHARSET
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COL_TYPES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMPRESSION
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CSV_DELIMITER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FILE_OR_URL_READ
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FILE_READ
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FIXED_COLUMN_WIDTHS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HAS_FIXED_WIDTH_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EMPTY_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EXCESS_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_SURROUNDING_SPACES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INPUT_STREAM_READ
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSER_OPTIONS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSE_PARALLEL
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PATH_READ
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.READ_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.SKIP_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TRIM_INSIDE_QUOTED
import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl
import org.jetbrains.kotlinx.dataframe.util.READ_CSV_BINARY_COMPATIBILITY
import java.io.File
import java.io.FileInputStream
import java.io.InputStream
import java.net.URL
import java.nio.charset.Charset
import java.nio.file.Path
import kotlin.io.path.inputStream
/**
* @include [CommonReadDelimDocs.CsvDocs]
* @set [CommonReadDelimDocs.DATA_TITLE] File
* @set [CommonReadDelimDocs.DATA] file
* @include [PATH_READ]
* @include [CSV_DELIMITER]
* @include [COMPRESSION]
* @include [CommonReadDelimDocs.CommonReadParams]
*/
public fun DataFrame.Companion.readCsv(
path: Path,
delimiter: Char = CSV_DELIMITER,
header: List<String> = HEADER,
charset: Charset? = CHARSET,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = Compression.of(path),
): DataFrame<*> =
path.inputStream().use {
readDelimImpl(
inputStream = it,
delimiter = delimiter,
header = header,
charset = charset,
hasFixedWidthColumns = hasFixedWidthColumns,
fixedColumnWidths = fixedColumnWidths,
colTypes = colTypes,
skipLines = skipLines,
readLines = readLines,
parserOptions = parserOptions,
ignoreEmptyLines = ignoreEmptyLines,
allowMissingColumns = allowMissingColumns,
ignoreExcessColumns = ignoreExcessColumns,
quote = quote,
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
compression = compression,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
}
/**
* @include [CommonReadDelimDocs.CsvDocs]
* @set [CommonReadDelimDocs.DATA_TITLE] File
* @set [CommonReadDelimDocs.DATA] file
* @include [FILE_READ]
* @include [CSV_DELIMITER]
* @include [COMPRESSION]
* @include [CommonReadDelimDocs.CommonReadParams]
*/
public fun DataFrame.Companion.readCsv(
file: File,
delimiter: Char = CSV_DELIMITER,
header: List<String> = HEADER,
charset: Charset? = CHARSET,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = Compression.of(file),
): DataFrame<*> =
FileInputStream(file).use {
readDelimImpl(
inputStream = it,
delimiter = delimiter,
header = header,
charset = charset,
hasFixedWidthColumns = hasFixedWidthColumns,
fixedColumnWidths = fixedColumnWidths,
colTypes = colTypes,
skipLines = skipLines,
readLines = readLines,
parserOptions = parserOptions,
ignoreEmptyLines = ignoreEmptyLines,
allowMissingColumns = allowMissingColumns,
ignoreExcessColumns = ignoreExcessColumns,
quote = quote,
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
compression = compression,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
}
/**
* @include [CommonReadDelimDocs.CsvDocs]
* @set [CommonReadDelimDocs.DATA_TITLE] Url
* @set [CommonReadDelimDocs.DATA] url
* @include [DelimParams.URL_READ]
* @include [CSV_DELIMITER]
* @include [COMPRESSION]
* @include [CommonReadDelimDocs.CommonReadParams]
*/
public fun DataFrame.Companion.readCsv(
url: URL,
delimiter: Char = CSV_DELIMITER,
header: List<String> = HEADER,
charset: Charset? = CHARSET,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = Compression.of(url),
): DataFrame<*> =
catchHttpResponse(url) {
readDelimImpl(
inputStream = it,
delimiter = delimiter,
header = header,
charset = charset,
hasFixedWidthColumns = hasFixedWidthColumns,
fixedColumnWidths = fixedColumnWidths,
colTypes = colTypes,
skipLines = skipLines,
readLines = readLines,
parserOptions = parserOptions,
ignoreEmptyLines = ignoreEmptyLines,
allowMissingColumns = allowMissingColumns,
ignoreExcessColumns = ignoreExcessColumns,
quote = quote,
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
compression = compression,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
}
/**
* @include [CommonReadDelimDocs.CsvDocs]
* @set [CommonReadDelimDocs.DATA_TITLE] File or URL
* @set [CommonReadDelimDocs.DATA] file or url
* @include [FILE_OR_URL_READ]
* @include [CSV_DELIMITER]
* @include [COMPRESSION]
* @include [CommonReadDelimDocs.CommonReadParams]
*/
public fun DataFrame.Companion.readCsv(
fileOrUrl: String,
delimiter: Char = CSV_DELIMITER,
header: List<String> = HEADER,
charset: Charset? = CHARSET,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = Compression.of(fileOrUrl),
): DataFrame<*> =
catchHttpResponse(asUrl(fileOrUrl = fileOrUrl)) {
readDelimImpl(
inputStream = it,
delimiter = delimiter,
header = header,
charset = charset,
hasFixedWidthColumns = hasFixedWidthColumns,
fixedColumnWidths = fixedColumnWidths,
colTypes = colTypes,
skipLines = skipLines,
readLines = readLines,
parserOptions = parserOptions,
ignoreEmptyLines = ignoreEmptyLines,
allowMissingColumns = allowMissingColumns,
ignoreExcessColumns = ignoreExcessColumns,
quote = quote,
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
compression = compression,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
}
/**
* {@comment the only one with adjustCsvSpecs}
* @include [CommonReadDelimDocs.CsvDocs]
* @set [CommonReadDelimDocs.DATA_TITLE] InputStream
* @set [CommonReadDelimDocs.DATA] input stream
* @include [INPUT_STREAM_READ]
* @include [CSV_DELIMITER]
* @include [COMPRESSION]
* @include [CommonReadDelimDocs.CommonReadParams]
* @include [ADJUST_CSV_SPECS]
*/
public fun DataFrame.Companion.readCsv(
inputStream: InputStream,
delimiter: Char = CSV_DELIMITER,
header: List<String> = HEADER,
charset: Charset? = CHARSET,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = COMPRESSION,
adjustCsvSpecs: AdjustCsvSpecs = ADJUST_CSV_SPECS,
): DataFrame<*> =
readDelimImpl(
inputStream = inputStream,
delimiter = delimiter,
header = header,
charset = charset,
hasFixedWidthColumns = hasFixedWidthColumns,
fixedColumnWidths = fixedColumnWidths,
colTypes = colTypes,
skipLines = skipLines,
readLines = readLines,
parserOptions = parserOptions,
ignoreEmptyLines = ignoreEmptyLines,
allowMissingColumns = allowMissingColumns,
ignoreExcessColumns = ignoreExcessColumns,
quote = quote,
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
compression = compression,
adjustCsvSpecs = adjustCsvSpecs,
)
// region deprecations
@Deprecated(READ_CSV_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
public fun DataFrame.Companion.readCsv(
path: Path,
delimiter: Char = CSV_DELIMITER,
header: List<String> = HEADER,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = Compression.of(path),
): DataFrame<*> =
readCsv(
path,
delimiter,
header,
CHARSET,
hasFixedWidthColumns,
fixedColumnWidths,
colTypes,
skipLines,
readLines,
parserOptions,
ignoreEmptyLines,
allowMissingColumns,
ignoreExcessColumns,
quote,
ignoreSurroundingSpaces,
trimInsideQuoted,
parseParallel,
compression,
)
@Deprecated(READ_CSV_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
public fun DataFrame.Companion.readCsv(
file: File,
delimiter: Char = CSV_DELIMITER,
header: List<String> = HEADER,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = Compression.of(file),
): DataFrame<*> =
readCsv(
file,
delimiter,
header,
CHARSET,
hasFixedWidthColumns,
fixedColumnWidths,
colTypes,
skipLines,
readLines,
parserOptions,
ignoreEmptyLines,
allowMissingColumns,
ignoreExcessColumns,
quote,
ignoreSurroundingSpaces,
trimInsideQuoted,
parseParallel,
compression,
)
@Deprecated(READ_CSV_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
public fun DataFrame.Companion.readCsv(
url: URL,
delimiter: Char = CSV_DELIMITER,
header: List<String> = HEADER,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = Compression.of(url),
): DataFrame<*> =
readCsv(
url,
delimiter,
header,
CHARSET,
hasFixedWidthColumns,
fixedColumnWidths,
colTypes,
skipLines,
readLines,
parserOptions,
ignoreEmptyLines,
allowMissingColumns,
ignoreExcessColumns,
quote,
ignoreSurroundingSpaces,
trimInsideQuoted,
parseParallel,
compression,
)
@Deprecated(READ_CSV_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
public fun DataFrame.Companion.readCsv(
fileOrUrl: String,
delimiter: Char = CSV_DELIMITER,
header: List<String> = HEADER,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = Compression.of(fileOrUrl),
): DataFrame<*> =
readCsv(
fileOrUrl,
delimiter,
header,
CHARSET,
hasFixedWidthColumns,
fixedColumnWidths,
colTypes,
skipLines,
readLines,
parserOptions,
ignoreEmptyLines,
allowMissingColumns,
ignoreExcessColumns,
quote,
ignoreSurroundingSpaces,
trimInsideQuoted,
parseParallel,
compression,
)
@Deprecated(READ_CSV_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
public fun DataFrame.Companion.readCsv(
inputStream: InputStream,
delimiter: Char = CSV_DELIMITER,
header: List<String> = HEADER,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = COMPRESSION,
adjustCsvSpecs: AdjustCsvSpecs = ADJUST_CSV_SPECS,
): DataFrame<*> =
readCsv(
inputStream,
delimiter,
header,
CHARSET,
hasFixedWidthColumns,
fixedColumnWidths,
colTypes,
skipLines,
readLines,
parserOptions,
ignoreEmptyLines,
allowMissingColumns,
ignoreExcessColumns,
quote,
ignoreSurroundingSpaces,
trimInsideQuoted,
parseParallel,
compression,
adjustCsvSpecs,
)
// endregion
@@ -0,0 +1,71 @@
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonReadDelimDocs
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_SPECS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ALLOW_MISSING_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COL_TYPES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CSV_DELIMITER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FIXED_COLUMN_WIDTHS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HAS_FIXED_WIDTH_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EMPTY_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EXCESS_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_SURROUNDING_SPACES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSER_OPTIONS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSE_PARALLEL
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.READ_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.SKIP_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TEXT_READ
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TRIM_INSIDE_QUOTED
import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl
/**
* @include [CommonReadDelimDocs.CsvDocs]
* @set [CommonReadDelimDocs.DATA_TITLE] String
* @set [CommonReadDelimDocs.DATA] [String]
* @include [TEXT_READ]
* @include [CSV_DELIMITER]
* @include [CommonReadDelimDocs.CommonReadParams]
*/
public fun DataFrame.Companion.readCsvStr(
text: String,
delimiter: Char = CSV_DELIMITER,
header: List<String> = HEADER,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
): DataFrame<*> =
readDelimImpl(
inputStream = text.byteInputStream(),
charset = Charsets.UTF_8,
delimiter = delimiter,
header = header,
hasFixedWidthColumns = hasFixedWidthColumns,
fixedColumnWidths = fixedColumnWidths,
compression = Compression.None, // of course
colTypes = colTypes,
skipLines = skipLines,
readLines = readLines,
parserOptions = parserOptions,
ignoreEmptyLines = ignoreEmptyLines,
allowMissingColumns = allowMissingColumns,
ignoreExcessColumns = ignoreExcessColumns,
quote = quote,
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
@@ -0,0 +1,522 @@
@file:JvmName("ReadDelimDeephavenKt")
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonReadDelimDocs
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_SPECS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ALLOW_MISSING_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CHARSET
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COL_TYPES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMPRESSION
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.DELIM_DELIMITER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FILE_OR_URL_READ
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FILE_READ
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FIXED_COLUMN_WIDTHS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HAS_FIXED_WIDTH_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EMPTY_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EXCESS_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_SURROUNDING_SPACES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INPUT_STREAM_READ
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSER_OPTIONS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSE_PARALLEL
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PATH_READ
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.READ_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.SKIP_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TRIM_INSIDE_QUOTED
import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl
import org.jetbrains.kotlinx.dataframe.util.READ_DELIM_BINARY_COMPATIBILITY
import java.io.File
import java.io.FileInputStream
import java.io.InputStream
import java.net.URL
import java.nio.charset.Charset
import java.nio.file.Path
import kotlin.io.path.inputStream
/*
* TODO these currently clash with :core's readDelim(Str) functions.
* When those are deprecated, we can let the users fall-back to these.
* They do the same as readCsv(Str).
*/
/**
* @include [CommonReadDelimDocs.DelimDocs]
* @set [CommonReadDelimDocs.DATA_TITLE] File
* @set [CommonReadDelimDocs.DATA] file
* @include [PATH_READ]
* @include [DELIM_DELIMITER]
* @include [COMPRESSION]
* @include [CommonReadDelimDocs.CommonReadParams]
*/
public fun DataFrame.Companion.readDelim(
path: Path,
delimiter: Char = DELIM_DELIMITER,
header: List<String> = HEADER,
charset: Charset? = CHARSET,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = Compression.of(path),
): DataFrame<*> =
path.inputStream().use {
readDelimImpl(
inputStream = it,
delimiter = delimiter,
header = header,
charset = charset,
hasFixedWidthColumns = hasFixedWidthColumns,
fixedColumnWidths = fixedColumnWidths,
colTypes = colTypes,
skipLines = skipLines,
readLines = readLines,
parserOptions = parserOptions,
ignoreEmptyLines = ignoreEmptyLines,
allowMissingColumns = allowMissingColumns,
ignoreExcessColumns = ignoreExcessColumns,
quote = quote,
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
compression = compression,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
}
/**
* @include [CommonReadDelimDocs.DelimDocs]
* @set [CommonReadDelimDocs.DATA_TITLE] File
* @set [CommonReadDelimDocs.DATA] file
* @include [FILE_READ]
* @include [DELIM_DELIMITER]
* @include [COMPRESSION]
* @include [CommonReadDelimDocs.CommonReadParams]
*/
public fun DataFrame.Companion.readDelim(
file: File,
delimiter: Char = DELIM_DELIMITER,
header: List<String> = HEADER,
charset: Charset? = CHARSET,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = Compression.of(file),
): DataFrame<*> =
FileInputStream(file).use {
readDelimImpl(
inputStream = it,
delimiter = delimiter,
header = header,
charset = charset,
hasFixedWidthColumns = hasFixedWidthColumns,
fixedColumnWidths = fixedColumnWidths,
colTypes = colTypes,
skipLines = skipLines,
readLines = readLines,
parserOptions = parserOptions,
ignoreEmptyLines = ignoreEmptyLines,
allowMissingColumns = allowMissingColumns,
ignoreExcessColumns = ignoreExcessColumns,
quote = quote,
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
compression = compression,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
}
/**
* @include [CommonReadDelimDocs.DelimDocs]
* @set [CommonReadDelimDocs.DATA_TITLE] Url
* @set [CommonReadDelimDocs.DATA] url
* @include [DelimParams.URL_READ]
* @include [DELIM_DELIMITER]
* @include [COMPRESSION]
* @include [CommonReadDelimDocs.CommonReadParams]
*/
public fun DataFrame.Companion.readDelim(
url: URL,
delimiter: Char = DELIM_DELIMITER,
header: List<String> = HEADER,
charset: Charset? = CHARSET,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = Compression.of(url),
): DataFrame<*> =
catchHttpResponse(url) {
readDelimImpl(
inputStream = it,
delimiter = delimiter,
header = header,
charset = charset,
hasFixedWidthColumns = hasFixedWidthColumns,
fixedColumnWidths = fixedColumnWidths,
colTypes = colTypes,
skipLines = skipLines,
readLines = readLines,
parserOptions = parserOptions,
ignoreEmptyLines = ignoreEmptyLines,
allowMissingColumns = allowMissingColumns,
ignoreExcessColumns = ignoreExcessColumns,
quote = quote,
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
compression = compression,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
}
/**
* @include [CommonReadDelimDocs.DelimDocs]
* @set [CommonReadDelimDocs.DATA_TITLE] File or URL
* @set [CommonReadDelimDocs.DATA] file or url
* @include [FILE_OR_URL_READ]
* @include [DELIM_DELIMITER]
* @include [COMPRESSION]
* @include [CommonReadDelimDocs.CommonReadParams]
*/
public fun DataFrame.Companion.readDelim(
fileOrUrl: String,
delimiter: Char = DELIM_DELIMITER,
header: List<String> = HEADER,
charset: Charset? = CHARSET,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = Compression.of(fileOrUrl),
): DataFrame<*> =
catchHttpResponse(asUrl(fileOrUrl = fileOrUrl)) {
readDelimImpl(
inputStream = it,
delimiter = delimiter,
header = header,
charset = charset,
hasFixedWidthColumns = hasFixedWidthColumns,
fixedColumnWidths = fixedColumnWidths,
colTypes = colTypes,
skipLines = skipLines,
readLines = readLines,
parserOptions = parserOptions,
ignoreEmptyLines = ignoreEmptyLines,
allowMissingColumns = allowMissingColumns,
ignoreExcessColumns = ignoreExcessColumns,
quote = quote,
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
compression = compression,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
}
/**
* {@comment the only one with adjustCsvSpecs}
* @include [CommonReadDelimDocs.DelimDocs]
* @set [CommonReadDelimDocs.DATA_TITLE] InputStream
* @set [CommonReadDelimDocs.DATA] input stream
* @include [INPUT_STREAM_READ]
* @include [DELIM_DELIMITER]
* @include [COMPRESSION]
* @include [CommonReadDelimDocs.CommonReadParams]
* @include [ADJUST_CSV_SPECS]
*/
public fun DataFrame.Companion.readDelim(
inputStream: InputStream,
delimiter: Char = DELIM_DELIMITER,
header: List<String> = HEADER,
charset: Charset? = CHARSET,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = COMPRESSION,
adjustCsvSpecs: AdjustCsvSpecs = ADJUST_CSV_SPECS,
): DataFrame<*> =
readDelimImpl(
inputStream = inputStream,
delimiter = delimiter,
header = header,
charset = charset,
hasFixedWidthColumns = hasFixedWidthColumns,
fixedColumnWidths = fixedColumnWidths,
colTypes = colTypes,
skipLines = skipLines,
readLines = readLines,
parserOptions = parserOptions,
ignoreEmptyLines = ignoreEmptyLines,
allowMissingColumns = allowMissingColumns,
ignoreExcessColumns = ignoreExcessColumns,
quote = quote,
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
compression = compression,
adjustCsvSpecs = adjustCsvSpecs,
)
// region deprecations
@Deprecated(READ_DELIM_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
public fun DataFrame.Companion.readDelim(
path: Path,
delimiter: Char = DELIM_DELIMITER,
header: List<String> = HEADER,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = Compression.of(path),
): DataFrame<*> =
readDelim(
path,
delimiter,
header,
CHARSET,
hasFixedWidthColumns,
fixedColumnWidths,
colTypes,
skipLines,
readLines,
parserOptions,
ignoreEmptyLines,
allowMissingColumns,
ignoreExcessColumns,
quote,
ignoreSurroundingSpaces,
trimInsideQuoted,
parseParallel,
compression,
)
@Deprecated(READ_DELIM_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
public fun DataFrame.Companion.readDelim(
file: File,
delimiter: Char = DELIM_DELIMITER,
header: List<String> = HEADER,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = Compression.of(file),
): DataFrame<*> =
readDelim(
file,
delimiter,
header,
CHARSET,
hasFixedWidthColumns,
fixedColumnWidths,
colTypes,
skipLines,
readLines,
parserOptions,
ignoreEmptyLines,
allowMissingColumns,
ignoreExcessColumns,
quote,
ignoreSurroundingSpaces,
trimInsideQuoted,
parseParallel,
compression,
)
@Deprecated(READ_DELIM_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
public fun DataFrame.Companion.readDelim(
url: URL,
delimiter: Char = DELIM_DELIMITER,
header: List<String> = HEADER,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = Compression.of(url),
): DataFrame<*> =
readDelim(
url,
delimiter,
header,
CHARSET,
hasFixedWidthColumns,
fixedColumnWidths,
colTypes,
skipLines,
readLines,
parserOptions,
ignoreEmptyLines,
allowMissingColumns,
ignoreExcessColumns,
quote,
ignoreSurroundingSpaces,
trimInsideQuoted,
parseParallel,
compression,
)
@Deprecated(READ_DELIM_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
public fun DataFrame.Companion.readDelim(
fileOrUrl: String,
delimiter: Char = DELIM_DELIMITER,
header: List<String> = HEADER,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = Compression.of(fileOrUrl),
): DataFrame<*> =
readDelim(
fileOrUrl,
delimiter,
header,
CHARSET,
hasFixedWidthColumns,
fixedColumnWidths,
colTypes,
skipLines,
readLines,
parserOptions,
ignoreEmptyLines,
allowMissingColumns,
ignoreExcessColumns,
quote,
ignoreSurroundingSpaces,
trimInsideQuoted,
parseParallel,
compression,
)
@Deprecated(READ_DELIM_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
public fun DataFrame.Companion.readDelim(
inputStream: InputStream,
delimiter: Char = DELIM_DELIMITER,
header: List<String> = HEADER,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = COMPRESSION,
adjustCsvSpecs: AdjustCsvSpecs = ADJUST_CSV_SPECS,
): DataFrame<*> =
readDelim(
inputStream,
delimiter,
header,
CHARSET,
hasFixedWidthColumns,
fixedColumnWidths,
colTypes,
skipLines,
readLines,
parserOptions,
ignoreEmptyLines,
allowMissingColumns,
ignoreExcessColumns,
quote,
ignoreSurroundingSpaces,
trimInsideQuoted,
parseParallel,
compression,
adjustCsvSpecs,
)
// endregion
@@ -0,0 +1,71 @@
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonReadDelimDocs
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_SPECS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ALLOW_MISSING_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COL_TYPES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.DELIM_DELIMITER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FIXED_COLUMN_WIDTHS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HAS_FIXED_WIDTH_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EMPTY_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EXCESS_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_SURROUNDING_SPACES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSER_OPTIONS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSE_PARALLEL
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.READ_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.SKIP_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TEXT_READ
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TRIM_INSIDE_QUOTED
import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl
/**
* @include [CommonReadDelimDocs.DelimDocs]
* @set [CommonReadDelimDocs.DATA_TITLE] String
* @set [CommonReadDelimDocs.DATA] [String]
* @include [TEXT_READ]
* @include [DELIM_DELIMITER]
* @include [CommonReadDelimDocs.CommonReadParams]
*/
public fun DataFrame.Companion.readDelimStr(
text: String,
delimiter: Char = DELIM_DELIMITER,
header: List<String> = HEADER,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
): DataFrame<*> =
readDelimImpl(
inputStream = text.byteInputStream(),
charset = Charsets.UTF_8,
delimiter = delimiter,
header = header,
hasFixedWidthColumns = hasFixedWidthColumns,
fixedColumnWidths = fixedColumnWidths,
compression = Compression.None, // of course
colTypes = colTypes,
skipLines = skipLines,
readLines = readLines,
parserOptions = parserOptions,
ignoreEmptyLines = ignoreEmptyLines,
allowMissingColumns = allowMissingColumns,
ignoreExcessColumns = ignoreExcessColumns,
quote = quote,
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
@@ -0,0 +1,516 @@
@file:JvmName("ReadTsvDeephavenKt")
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonReadDelimDocs
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_SPECS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ALLOW_MISSING_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CHARSET
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COL_TYPES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMPRESSION
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FILE_OR_URL_READ
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FILE_READ
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FIXED_COLUMN_WIDTHS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HAS_FIXED_WIDTH_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EMPTY_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EXCESS_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_SURROUNDING_SPACES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INPUT_STREAM_READ
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSER_OPTIONS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSE_PARALLEL
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PATH_READ
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.READ_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.SKIP_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TRIM_INSIDE_QUOTED
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TSV_DELIMITER
import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl
import org.jetbrains.kotlinx.dataframe.util.READ_TSV_BINARY_COMPATIBILITY
import java.io.File
import java.io.FileInputStream
import java.io.InputStream
import java.net.URL
import java.nio.charset.Charset
import java.nio.file.Path
import kotlin.io.path.inputStream
/**
* @include [CommonReadDelimDocs.TsvDocs]
* @set [CommonReadDelimDocs.DATA_TITLE] File
* @set [CommonReadDelimDocs.DATA] file
* @include [PATH_READ]
* @include [TSV_DELIMITER]
* @include [COMPRESSION]
* @include [CommonReadDelimDocs.CommonReadParams]
*/
public fun DataFrame.Companion.readTsv(
path: Path,
delimiter: Char = TSV_DELIMITER,
header: List<String> = HEADER,
charset: Charset? = CHARSET,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = Compression.of(path),
): DataFrame<*> =
path.inputStream().use {
readDelimImpl(
inputStream = it,
delimiter = delimiter,
header = header,
charset = charset,
hasFixedWidthColumns = hasFixedWidthColumns,
fixedColumnWidths = fixedColumnWidths,
colTypes = colTypes,
skipLines = skipLines,
readLines = readLines,
parserOptions = parserOptions,
ignoreEmptyLines = ignoreEmptyLines,
allowMissingColumns = allowMissingColumns,
ignoreExcessColumns = ignoreExcessColumns,
quote = quote,
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
compression = compression,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
}
/**
* @include [CommonReadDelimDocs.TsvDocs]
* @set [CommonReadDelimDocs.DATA_TITLE] File
* @set [CommonReadDelimDocs.DATA] file
* @include [FILE_READ]
* @include [TSV_DELIMITER]
* @include [COMPRESSION]
* @include [CommonReadDelimDocs.CommonReadParams]
*/
public fun DataFrame.Companion.readTsv(
file: File,
delimiter: Char = TSV_DELIMITER,
header: List<String> = HEADER,
charset: Charset? = CHARSET,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = Compression.of(file),
): DataFrame<*> =
FileInputStream(file).use {
readDelimImpl(
inputStream = it,
delimiter = delimiter,
header = header,
charset = charset,
hasFixedWidthColumns = hasFixedWidthColumns,
fixedColumnWidths = fixedColumnWidths,
colTypes = colTypes,
skipLines = skipLines,
readLines = readLines,
parserOptions = parserOptions,
ignoreEmptyLines = ignoreEmptyLines,
allowMissingColumns = allowMissingColumns,
ignoreExcessColumns = ignoreExcessColumns,
quote = quote,
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
compression = compression,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
}
/**
* @include [CommonReadDelimDocs.TsvDocs]
* @set [CommonReadDelimDocs.DATA_TITLE] Url
* @set [CommonReadDelimDocs.DATA] url
* @include [DelimParams.URL_READ]
* @include [TSV_DELIMITER]
* @include [COMPRESSION]
* @include [CommonReadDelimDocs.CommonReadParams]
*/
public fun DataFrame.Companion.readTsv(
url: URL,
delimiter: Char = TSV_DELIMITER,
header: List<String> = HEADER,
charset: Charset? = CHARSET,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = Compression.of(url),
): DataFrame<*> =
catchHttpResponse(url) {
readDelimImpl(
inputStream = it,
delimiter = delimiter,
header = header,
charset = charset,
hasFixedWidthColumns = hasFixedWidthColumns,
fixedColumnWidths = fixedColumnWidths,
colTypes = colTypes,
skipLines = skipLines,
readLines = readLines,
parserOptions = parserOptions,
ignoreEmptyLines = ignoreEmptyLines,
allowMissingColumns = allowMissingColumns,
ignoreExcessColumns = ignoreExcessColumns,
quote = quote,
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
compression = compression,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
}
/**
* @include [CommonReadDelimDocs.TsvDocs]
* @set [CommonReadDelimDocs.DATA_TITLE] File or URL
* @set [CommonReadDelimDocs.DATA] file or url
* @include [FILE_OR_URL_READ]
* @include [TSV_DELIMITER]
* @include [COMPRESSION]
* @include [CommonReadDelimDocs.CommonReadParams]
*/
public fun DataFrame.Companion.readTsv(
fileOrUrl: String,
delimiter: Char = TSV_DELIMITER,
header: List<String> = HEADER,
charset: Charset? = CHARSET,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = Compression.of(fileOrUrl),
): DataFrame<*> =
catchHttpResponse(asUrl(fileOrUrl = fileOrUrl)) {
readDelimImpl(
inputStream = it,
delimiter = delimiter,
header = header,
charset = charset,
hasFixedWidthColumns = hasFixedWidthColumns,
fixedColumnWidths = fixedColumnWidths,
colTypes = colTypes,
skipLines = skipLines,
readLines = readLines,
parserOptions = parserOptions,
ignoreEmptyLines = ignoreEmptyLines,
allowMissingColumns = allowMissingColumns,
ignoreExcessColumns = ignoreExcessColumns,
quote = quote,
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
compression = compression,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
}
/**
* {@comment the only one with adjustCsvSpecs}
* @include [CommonReadDelimDocs.TsvDocs]
* @set [CommonReadDelimDocs.DATA_TITLE] InputStream
* @set [CommonReadDelimDocs.DATA] input stream
* @include [INPUT_STREAM_READ]
* @include [TSV_DELIMITER]
* @include [COMPRESSION]
* @include [CommonReadDelimDocs.CommonReadParams]
* @include [ADJUST_CSV_SPECS]
*/
public fun DataFrame.Companion.readTsv(
inputStream: InputStream,
delimiter: Char = TSV_DELIMITER,
header: List<String> = HEADER,
charset: Charset? = CHARSET,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = COMPRESSION,
adjustCsvSpecs: AdjustCsvSpecs = ADJUST_CSV_SPECS,
): DataFrame<*> =
readDelimImpl(
inputStream = inputStream,
delimiter = delimiter,
header = header,
charset = charset,
hasFixedWidthColumns = hasFixedWidthColumns,
fixedColumnWidths = fixedColumnWidths,
colTypes = colTypes,
skipLines = skipLines,
readLines = readLines,
parserOptions = parserOptions,
ignoreEmptyLines = ignoreEmptyLines,
allowMissingColumns = allowMissingColumns,
ignoreExcessColumns = ignoreExcessColumns,
quote = quote,
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
compression = compression,
adjustCsvSpecs = adjustCsvSpecs,
)
// region deprecations
@Deprecated(READ_TSV_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
public fun DataFrame.Companion.readTsv(
path: Path,
delimiter: Char = TSV_DELIMITER,
header: List<String> = HEADER,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = Compression.of(path),
): DataFrame<*> =
readTsv(
path,
delimiter,
header,
CHARSET,
hasFixedWidthColumns,
fixedColumnWidths,
colTypes,
skipLines,
readLines,
parserOptions,
ignoreEmptyLines,
allowMissingColumns,
ignoreExcessColumns,
quote,
ignoreSurroundingSpaces,
trimInsideQuoted,
parseParallel,
compression,
)
@Deprecated(READ_TSV_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
public fun DataFrame.Companion.readTsv(
file: File,
delimiter: Char = TSV_DELIMITER,
header: List<String> = HEADER,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = Compression.of(file),
): DataFrame<*> =
readTsv(
file,
delimiter,
header,
CHARSET,
hasFixedWidthColumns,
fixedColumnWidths,
colTypes,
skipLines,
readLines,
parserOptions,
ignoreEmptyLines,
allowMissingColumns,
ignoreExcessColumns,
quote,
ignoreSurroundingSpaces,
trimInsideQuoted,
parseParallel,
compression,
)
@Deprecated(READ_TSV_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
public fun DataFrame.Companion.readTsv(
url: URL,
delimiter: Char = TSV_DELIMITER,
header: List<String> = HEADER,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = Compression.of(url),
): DataFrame<*> =
readTsv(
url,
delimiter,
header,
CHARSET,
hasFixedWidthColumns,
fixedColumnWidths,
colTypes,
skipLines,
readLines,
parserOptions,
ignoreEmptyLines,
allowMissingColumns,
ignoreExcessColumns,
quote,
ignoreSurroundingSpaces,
trimInsideQuoted,
parseParallel,
compression,
)
@Deprecated(READ_TSV_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
public fun DataFrame.Companion.readTsv(
fileOrUrl: String,
delimiter: Char = TSV_DELIMITER,
header: List<String> = HEADER,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = Compression.of(fileOrUrl),
): DataFrame<*> =
readTsv(
fileOrUrl,
delimiter,
header,
CHARSET,
hasFixedWidthColumns,
fixedColumnWidths,
colTypes,
skipLines,
readLines,
parserOptions,
ignoreEmptyLines,
allowMissingColumns,
ignoreExcessColumns,
quote,
ignoreSurroundingSpaces,
trimInsideQuoted,
parseParallel,
compression,
)
@Deprecated(READ_TSV_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
public fun DataFrame.Companion.readTsv(
inputStream: InputStream,
delimiter: Char = TSV_DELIMITER,
header: List<String> = HEADER,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
compression: Compression<*> = COMPRESSION,
adjustCsvSpecs: AdjustCsvSpecs = ADJUST_CSV_SPECS,
): DataFrame<*> =
readTsv(
inputStream,
delimiter,
header,
CHARSET,
hasFixedWidthColumns,
fixedColumnWidths,
colTypes,
skipLines,
readLines,
parserOptions,
ignoreEmptyLines,
allowMissingColumns,
ignoreExcessColumns,
quote,
ignoreSurroundingSpaces,
trimInsideQuoted,
parseParallel,
compression,
adjustCsvSpecs,
)
// endregion
@@ -0,0 +1,71 @@
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonReadDelimDocs
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_SPECS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ALLOW_MISSING_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COL_TYPES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FIXED_COLUMN_WIDTHS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HAS_FIXED_WIDTH_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EMPTY_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EXCESS_COLUMNS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_SURROUNDING_SPACES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSER_OPTIONS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSE_PARALLEL
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.READ_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.SKIP_LINES
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TEXT_READ
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TRIM_INSIDE_QUOTED
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TSV_DELIMITER
import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl
/**
* @include [CommonReadDelimDocs.TsvDocs]
* @set [CommonReadDelimDocs.DATA_TITLE] String
* @set [CommonReadDelimDocs.DATA] [String]
* @include [TEXT_READ]
* @include [TSV_DELIMITER]
* @include [CommonReadDelimDocs.CommonReadParams]
*/
public fun DataFrame.Companion.readTsvStr(
text: String,
delimiter: Char = TSV_DELIMITER,
header: List<String> = HEADER,
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
colTypes: Map<String, ColType> = COL_TYPES,
skipLines: Long = SKIP_LINES,
readLines: Long? = READ_LINES,
parserOptions: ParserOptions? = PARSER_OPTIONS,
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
quote: Char = QUOTE,
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
parseParallel: Boolean = PARSE_PARALLEL,
): DataFrame<*> =
readDelimImpl(
inputStream = text.byteInputStream(),
charset = Charsets.UTF_8,
delimiter = delimiter,
header = header,
hasFixedWidthColumns = hasFixedWidthColumns,
fixedColumnWidths = fixedColumnWidths,
compression = Compression.None, // of course
colTypes = colTypes,
skipLines = skipLines,
readLines = readLines,
parserOptions = parserOptions,
ignoreEmptyLines = ignoreEmptyLines,
allowMissingColumns = allowMissingColumns,
ignoreExcessColumns = ignoreExcessColumns,
quote = quote,
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
trimInsideQuoted = trimInsideQuoted,
parseParallel = parseParallel,
adjustCsvSpecs = ADJUST_CSV_SPECS,
)
@@ -0,0 +1,48 @@
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMMENT_CHAR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CSV_DELIMITER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ESCAPE_CHAR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER_COMMENTS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INCLUDE_HEADER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE_MODE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.RECORD_SEPARATOR
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
/**
* @include [CommonWriteDelimDocs.CsvDocs]
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Convert
* @set [CommonWriteDelimDocs.DATA_TITLE] String
* @set [CommonWriteDelimDocs.DATA] [String]
* @include [CSV_DELIMITER]
* @include [CommonWriteDelimDocs.CommonWriteParams]
*/
public fun AnyFrame.toCsvStr(
delimiter: Char = CSV_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
): String =
buildString {
writeDelimImpl(
df = this@toCsvStr,
writer = this,
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = ADJUST_CSV_FORMAT,
)
}
@@ -0,0 +1,48 @@
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMMENT_CHAR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.DELIM_DELIMITER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ESCAPE_CHAR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER_COMMENTS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INCLUDE_HEADER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE_MODE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.RECORD_SEPARATOR
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
/**
* @include [CommonWriteDelimDocs.DelimDocs]
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Convert
* @set [CommonWriteDelimDocs.DATA_TITLE] String
* @set [CommonWriteDelimDocs.DATA] [String]
* @include [DELIM_DELIMITER]
* @include [CommonWriteDelimDocs.CommonWriteParams]
*/
public fun AnyFrame.toDelimStr(
delimiter: Char = DELIM_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
): String =
buildString {
writeDelimImpl(
df = this@toDelimStr,
writer = this,
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = ADJUST_CSV_FORMAT,
)
}
@@ -0,0 +1,48 @@
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMMENT_CHAR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ESCAPE_CHAR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER_COMMENTS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INCLUDE_HEADER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE_MODE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.RECORD_SEPARATOR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TSV_DELIMITER
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
/**
* @include [CommonWriteDelimDocs.TsvDocs]
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Convert
* @set [CommonWriteDelimDocs.DATA_TITLE] String
* @set [CommonWriteDelimDocs.DATA] [String]
* @include [TSV_DELIMITER]
* @include [CommonWriteDelimDocs.CommonWriteParams]
*/
public fun AnyFrame.toTsvStr(
includeHeader: Boolean = INCLUDE_HEADER,
delimiter: Char = TSV_DELIMITER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
): String =
buildString {
writeDelimImpl(
df = this@toTsvStr,
writer = this,
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = ADJUST_CSV_FORMAT,
)
}
@@ -0,0 +1,39 @@
@file:JvmName("TsvDeephavenKt")
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.codeGen.AbstractDefaultReadMethod
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams
import java.io.File
import java.io.InputStream
import java.nio.file.Path
import kotlin.reflect.typeOf
public class TsvDeephaven(private val delimiter: Char = DelimParams.TSV_DELIMITER) : SupportedDataFrameFormat {
override fun readDataFrame(stream: InputStream, header: List<String>): DataFrame<*> =
DataFrame.readTsv(inputStream = stream, header = header, delimiter = delimiter)
override fun readDataFrame(file: File, header: List<String>): DataFrame<*> =
DataFrame.readTsv(file = file, header = header, delimiter = delimiter)
override fun readDataFrame(path: Path, header: List<String>): DataFrame<*> =
DataFrame.readTsv(path = path, header = header, delimiter = delimiter)
override fun acceptsExtension(ext: String): Boolean = ext == "tsv"
override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough
override val testOrder: Int = 30_000
override fun createDefaultReadMethod(pathRepresentation: String?): DefaultReadDfMethod {
val arguments = MethodArguments().add("delimiter", typeOf<Char>(), "'%L'", delimiter)
return DefaultReadTsvMethod(pathRepresentation, arguments)
}
}
private const val READ_TSV = "readTsv"
internal class DefaultReadTsvMethod(path: String?, arguments: MethodArguments) :
AbstractDefaultReadMethod(path, arguments, READ_TSV)
@@ -0,0 +1,29 @@
package org.jetbrains.kotlinx.dataframe.io
import io.deephaven.csv.CsvSpecs
import org.apache.commons.csv.CSVFormat
import org.jetbrains.kotlinx.dataframe.documentationCsv.ExcludeFromSources
/** [\["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"\]][DEFAULT_DELIM_NULL_STRINGS] */
@ExcludeFromSources
internal typealias DefaultNullStringsContentLink = Nothing
/**
* Default strings that are considered null when reading CSV / TSV / delim files:
*
* @include [DefaultNullStringsContentLink]
*/
public val DEFAULT_DELIM_NULL_STRINGS: Set<String> =
setOf("", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil")
/**
* Typealias for `CsvSpecs.Builder.(CsvSpecs.Builder) -> CsvSpecs.Builder`.
* A lambda where you can overwrite or adjust any of the CSV specs.
*/
public typealias AdjustCsvSpecs = CsvSpecs.Builder.(CsvSpecs.Builder) -> CsvSpecs.Builder
/**
* Typealias for `CSVFormat.Builder.(CSVFormat.Builder) -> CSVFormat.Builder`.
* A lambda where you can overwrite or adjust any of the CSV format options.
*/
public typealias AdjustCSVFormat = CSVFormat.Builder.(CSVFormat.Builder) -> CSVFormat.Builder
@@ -0,0 +1,162 @@
@file:JvmName("WriteCsvDeephavenKt")
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMMENT_CHAR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CSV_DELIMITER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ESCAPE_CHAR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FILE_WRITE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER_COMMENTS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INCLUDE_HEADER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PATH_WRITE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE_MODE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.RECORD_SEPARATOR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.WRITER_WRITE
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
import java.io.File
import java.io.FileWriter
import java.nio.file.Path
import kotlin.io.path.writer
/**
* @include [CommonWriteDelimDocs.CsvDocs]
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Write
* @set [CommonWriteDelimDocs.DATA_TITLE] File
* @set [CommonWriteDelimDocs.DATA] file
* @include [PATH_WRITE]
* @include [CSV_DELIMITER]
* @include [CommonWriteDelimDocs.CommonWriteParams]
*/
public fun AnyFrame.writeCsv(
path: Path,
delimiter: Char = CSV_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
): Unit =
writeDelimImpl(
df = this,
writer = path.writer(),
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = ADJUST_CSV_FORMAT,
)
/**
* @include [CommonWriteDelimDocs.CsvDocs]
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Write
* @set [CommonWriteDelimDocs.DATA_TITLE] File
* @set [CommonWriteDelimDocs.DATA] file
* @include [FILE_WRITE]
* @include [CSV_DELIMITER]
* @include [CommonWriteDelimDocs.CommonWriteParams]
*/
public fun AnyFrame.writeCsv(
file: File,
delimiter: Char = CSV_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
): Unit =
writeDelimImpl(
df = this,
writer = FileWriter(file),
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = ADJUST_CSV_FORMAT,
)
/**
* @include [CommonWriteDelimDocs.CsvDocs]
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Write
* @set [CommonWriteDelimDocs.DATA_TITLE] File
* @set [CommonWriteDelimDocs.DATA] file
* @include [PATH_WRITE]
* @include [CSV_DELIMITER]
* @include [CommonWriteDelimDocs.CommonWriteParams]
*/
public fun AnyFrame.writeCsv(
path: String,
delimiter: Char = CSV_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
): Unit =
writeDelimImpl(
df = this,
writer = FileWriter(path),
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = ADJUST_CSV_FORMAT,
)
/**
* {@comment only one with adjustCsvFormat}
* @include [CommonWriteDelimDocs.CsvDocs]
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Write
* @set [CommonWriteDelimDocs.DATA_TITLE] Appendable
* @set [CommonWriteDelimDocs.DATA] [Appendable]
* @include [WRITER_WRITE]
* @include [CSV_DELIMITER]
* @include [CommonWriteDelimDocs.CommonWriteParams]
* @include [ADJUST_CSV_FORMAT]
*/
public fun AnyFrame.writeCsv(
writer: Appendable,
delimiter: Char = CSV_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
adjustCsvFormat: AdjustCSVFormat = ADJUST_CSV_FORMAT,
): Unit =
writeDelimImpl(
df = this,
writer = writer,
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = adjustCsvFormat,
)
@@ -0,0 +1,162 @@
@file:JvmName("WriteDelimDeephavenKt")
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMMENT_CHAR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.DELIM_DELIMITER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ESCAPE_CHAR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FILE_WRITE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER_COMMENTS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INCLUDE_HEADER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PATH_WRITE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE_MODE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.RECORD_SEPARATOR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.WRITER_WRITE
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
import java.io.File
import java.io.FileWriter
import java.nio.file.Path
import kotlin.io.path.writer
/**
* @include [CommonWriteDelimDocs.DelimDocs]
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Write
* @set [CommonWriteDelimDocs.DATA_TITLE] File
* @set [CommonWriteDelimDocs.DATA] file
* @include [PATH_WRITE]
* @include [DELIM_DELIMITER]
* @include [CommonWriteDelimDocs.CommonWriteParams]
*/
public fun AnyFrame.writeDelim(
path: Path,
delimiter: Char = DELIM_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
): Unit =
writeDelimImpl(
df = this,
writer = path.writer(),
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = ADJUST_CSV_FORMAT,
)
/**
* @include [CommonWriteDelimDocs.DelimDocs]
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Write
* @set [CommonWriteDelimDocs.DATA_TITLE] File
* @set [CommonWriteDelimDocs.DATA] file
* @include [FILE_WRITE]
* @include [DELIM_DELIMITER]
* @include [CommonWriteDelimDocs.CommonWriteParams]
*/
public fun AnyFrame.writeDelim(
file: File,
delimiter: Char = DELIM_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
): Unit =
writeDelimImpl(
df = this,
writer = FileWriter(file),
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = ADJUST_CSV_FORMAT,
)
/**
* @include [CommonWriteDelimDocs.DelimDocs]
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Write
* @set [CommonWriteDelimDocs.DATA_TITLE] File
* @set [CommonWriteDelimDocs.DATA] file
* @include [PATH_WRITE]
* @include [DELIM_DELIMITER]
* @include [CommonWriteDelimDocs.CommonWriteParams]
*/
public fun AnyFrame.writeDelim(
path: String,
delimiter: Char = DELIM_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
): Unit =
writeDelimImpl(
df = this,
writer = FileWriter(path),
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = ADJUST_CSV_FORMAT,
)
/**
* {@comment only one with adjustCsvFormat}
* @include [CommonWriteDelimDocs.DelimDocs]
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Write
* @set [CommonWriteDelimDocs.DATA_TITLE] Appendable
* @set [CommonWriteDelimDocs.DATA] [Appendable]
* @include [WRITER_WRITE]
* @include [DELIM_DELIMITER]
* @include [CommonWriteDelimDocs.CommonWriteParams]
* @include [ADJUST_CSV_FORMAT]
*/
public fun AnyFrame.writeDelim(
writer: Appendable,
delimiter: Char = DELIM_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
adjustCsvFormat: AdjustCSVFormat = ADJUST_CSV_FORMAT,
): Unit =
writeDelimImpl(
df = this,
writer = writer,
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = adjustCsvFormat,
)
@@ -0,0 +1,162 @@
@file:JvmName("WriteTsvDeephavenKt")
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.AnyFrame
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMMENT_CHAR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ESCAPE_CHAR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FILE_WRITE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER_COMMENTS
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INCLUDE_HEADER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PATH_WRITE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE_MODE
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.RECORD_SEPARATOR
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TSV_DELIMITER
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.WRITER_WRITE
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
import java.io.File
import java.io.FileWriter
import java.nio.file.Path
import kotlin.io.path.writer
/**
* @include [CommonWriteDelimDocs.TsvDocs]
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Write
* @set [CommonWriteDelimDocs.DATA_TITLE] File
* @set [CommonWriteDelimDocs.DATA] file
* @include [PATH_WRITE]
* @include [TSV_DELIMITER]
* @include [CommonWriteDelimDocs.CommonWriteParams]
*/
public fun AnyFrame.writeTsv(
path: Path,
delimiter: Char = TSV_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
): Unit =
writeDelimImpl(
df = this,
writer = path.writer(),
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = ADJUST_CSV_FORMAT,
)
/**
* @include [CommonWriteDelimDocs.TsvDocs]
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Write
* @set [CommonWriteDelimDocs.DATA_TITLE] File
* @set [CommonWriteDelimDocs.DATA] file
* @include [FILE_WRITE]
* @include [TSV_DELIMITER]
* @include [CommonWriteDelimDocs.CommonWriteParams]
*/
public fun AnyFrame.writeTsv(
file: File,
delimiter: Char = TSV_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
): Unit =
writeDelimImpl(
df = this,
writer = FileWriter(file),
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = ADJUST_CSV_FORMAT,
)
/**
* @include [CommonWriteDelimDocs.TsvDocs]
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Write
* @set [CommonWriteDelimDocs.DATA_TITLE] File
* @set [CommonWriteDelimDocs.DATA] file
* @include [PATH_WRITE]
* @include [TSV_DELIMITER]
* @include [CommonWriteDelimDocs.CommonWriteParams]
*/
public fun AnyFrame.writeTsv(
path: String,
delimiter: Char = TSV_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
): Unit =
writeDelimImpl(
df = this,
writer = FileWriter(path),
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = ADJUST_CSV_FORMAT,
)
/**
* {@comment only one with adjustCsvFormat}
* @include [CommonWriteDelimDocs.TsvDocs]
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Write
* @set [CommonWriteDelimDocs.DATA_TITLE] Appendable
* @set [CommonWriteDelimDocs.DATA] [Appendable]
* @include [WRITER_WRITE]
* @include [TSV_DELIMITER]
* @include [CommonWriteDelimDocs.CommonWriteParams]
* @include [ADJUST_CSV_FORMAT]
*/
public fun AnyFrame.writeTsv(
writer: Appendable,
delimiter: Char = TSV_DELIMITER,
includeHeader: Boolean = INCLUDE_HEADER,
quote: Char? = QUOTE,
quoteMode: QuoteMode = QUOTE_MODE,
escapeChar: Char? = ESCAPE_CHAR,
commentChar: Char? = COMMENT_CHAR,
headerComments: List<String> = HEADER_COMMENTS,
recordSeparator: String = RECORD_SEPARATOR,
adjustCsvFormat: AdjustCSVFormat = ADJUST_CSV_FORMAT,
): Unit =
writeDelimImpl(
df = this,
writer = writer,
delimiter = delimiter,
includeHeader = includeHeader,
quote = quote,
quoteMode = quoteMode,
escapeChar = escapeChar,
commentChar = commentChar,
headerComments = headerComments,
recordSeparator = recordSeparator,
adjustCsvFormat = adjustCsvFormat,
)
@@ -0,0 +1,30 @@
@file:JvmName("CsvDeprecationMessagesKt")
package org.jetbrains.kotlinx.dataframe.util
/*
* This file contains deprecation messages for the whole core module.
* After each release, all messages should be reviewed and updated.
* Level.WARNING -> Level.ERROR
* Level.ERROR -> Remove
*/
// region WARNING in 0.15, ERROR in 1.0
private const val MESSAGE_1_0 = "Will be ERROR in 1.0."
internal const val READ_CSV_BINARY_COMPATIBILITY = "This overload is here to maintain binary compatibility."
internal const val READ_TSV_BINARY_COMPATIBILITY = "This overload is here to maintain binary compatibility."
internal const val READ_DELIM_BINARY_COMPATIBILITY = "This overload is here to maintain binary compatibility."
// endregion
// region WARNING in 1.0, ERROR in 1.1
private const val MESSAGE_1_1 = "Will be ERROR in 1.1."
// endregion
// region keep across releases
// endregion
@@ -0,0 +1,2 @@
org.jetbrains.kotlinx.dataframe.io.CsvDeephaven
org.jetbrains.kotlinx.dataframe.io.TsvDeephaven
@@ -0,0 +1,54 @@
package org.jetbrains.kotlinx.dataframe.io
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.openjdk.jmh.annotations.Benchmark
import org.openjdk.jmh.annotations.BenchmarkMode
import org.openjdk.jmh.annotations.Measurement
import org.openjdk.jmh.annotations.Mode
import org.openjdk.jmh.annotations.Param
import org.openjdk.jmh.annotations.Scope
import org.openjdk.jmh.annotations.Setup
import org.openjdk.jmh.annotations.State
import org.openjdk.jmh.annotations.TearDown
import org.openjdk.jmh.annotations.Warmup
import java.io.File
import java.util.concurrent.TimeUnit
@BenchmarkMode(Mode.SingleShotTime)
@Warmup(iterations = 10, time = 5, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 10, timeUnit = TimeUnit.SECONDS)
@State(Scope.Benchmark)
open class BenchmarkTest {
@Param("small", "medium", "large")
var type = ""
var file: File? = null
@Setup
fun setup() {
System.setProperty("org.slf4j.simpleLogger.defaultLogLevel", "info")
file = File(
"src/test/resources/" + when (type) {
"small" -> "testCSV.csv"
"medium" -> "gross-domestic-product-june-2024-quarter.csv"
"large" -> "largeCsv.csv.gz"
else -> throw IllegalArgumentException("Invalid type")
},
)
}
@TearDown
fun tearDown() {
file = null
}
@Benchmark
fun apache() {
DataFrame.readCSV(file!!)
}
@Benchmark
fun deephaven() {
DataFrame.readCsv(file!!)
}
}
@@ -0,0 +1,887 @@
package org.jetbrains.kotlinx.dataframe.io
import io.deephaven.csv.parsers.Parsers
import io.kotest.assertions.throwables.shouldNotThrowAny
import io.kotest.assertions.throwables.shouldThrow
import io.kotest.matchers.collections.shouldContainInOrder
import io.kotest.matchers.nulls.shouldNotBeNull
import io.kotest.matchers.shouldBe
import io.kotest.matchers.shouldNotBe
import kotlinx.datetime.LocalDate
import kotlinx.datetime.LocalDateTime
import org.intellij.lang.annotations.Language
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.api.allNulls
import org.jetbrains.kotlinx.dataframe.api.convert
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import org.jetbrains.kotlinx.dataframe.api.group
import org.jetbrains.kotlinx.dataframe.api.groupBy
import org.jetbrains.kotlinx.dataframe.api.into
import org.jetbrains.kotlinx.dataframe.api.isEmpty
import org.jetbrains.kotlinx.dataframe.api.parser
import org.jetbrains.kotlinx.dataframe.api.print
import org.jetbrains.kotlinx.dataframe.api.schema
import org.jetbrains.kotlinx.dataframe.api.toStr
import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
import org.junit.After
import org.junit.Before
import org.junit.Test
import java.io.File
import java.io.StringWriter
import java.math.BigDecimal
import java.net.URL
import java.util.Locale
import java.util.zip.GZIPInputStream
import kotlin.reflect.KClass
import kotlin.reflect.typeOf
import kotlin.time.Instant as StdlibInstant
import kotlinx.datetime.Instant as DeprecatedInstant
// can be enabled for showing logs for these tests
private const val SHOW_LOGS = false
@Suppress("ktlint:standard:argument-list-wrapping")
class DelimCsvTsvTests {
private val logLevel = "org.slf4j.simpleLogger.log.${FastDoubleParser::class.qualifiedName}"
private var loggerBefore: String? = null
@Before
fun setLogger() {
if (!SHOW_LOGS) return
loggerBefore = System.getProperty(logLevel)
System.setProperty(logLevel, "trace")
}
@After
fun restoreLogger() {
if (!SHOW_LOGS) return
if (loggerBefore != null) {
System.setProperty(logLevel, loggerBefore)
}
}
@Test
fun readNulls() {
@Language("CSV")
val src =
"""
first,second
2,,
3,,
""".trimIndent()
val df = DataFrame.readCsvStr(src)
df.rowsCount() shouldBe 2
df.columnsCount() shouldBe 2
df["first"].type() shouldBe typeOf<Int>()
df["second"].allNulls() shouldBe true
df["second"].type() shouldBe typeOf<String?>()
}
@Test
fun write() {
val df = dataFrameOf("col1", "col2")(
1, null,
2, null,
).convert("col2").toStr()
val str = StringWriter()
df.writeCsv(str)
val res = DataFrame.readCsvStr(str.buffer.toString())
res shouldBe df
}
@Test
fun readCsv() {
val df = DataFrame.read(simpleCsv)
df.columnsCount() shouldBe 11
df.rowsCount() shouldBe 5
df.columnNames()[5] shouldBe "duplicate1"
df.columnNames()[6] shouldBe "duplicate11"
df["duplicate1"].type() shouldBe typeOf<Char?>()
df["double"].type() shouldBe typeOf<Double?>()
df["number"].type() shouldBe typeOf<Double>()
df["time"].type() shouldBe typeOf<LocalDateTime>()
df.print(columnTypes = true, borders = true, title = true)
}
@Test
fun `readCsv different charset`() {
val df = DataFrame.readCsv(simpleCsv)
DataFrame.readCsv(simpleCsvUtf16le) shouldBe df
DataFrame.readCsv(simpleCsvUtf16le, charset = Charsets.UTF_16LE) shouldBe df
DataFrame.readCsv(simpleCsvUtf16le, charset = Charsets.UTF_16BE) shouldNotBe df
DataFrame.readCsv(simpleCsvUtf16le, charset = Charsets.UTF_8) shouldNotBe df
}
@Test
fun `readCsv gz compressed different charset`() {
val df = DataFrame.readCsv(simpleCsv)
DataFrame.readCsv(simpleCsvUtf16leGz) shouldBe df
DataFrame.readCsv(simpleCsvUtf16leGz, charset = Charsets.UTF_16LE) shouldBe df
DataFrame.readCsv(simpleCsvUtf16leGz, charset = Charsets.UTF_16BE) shouldNotBe df
DataFrame.readCsv(simpleCsvUtf16leGz, charset = Charsets.UTF_8) shouldNotBe df
}
@Test
fun `readCsv zip compressed different charset`() {
val df = DataFrame.readCsv(simpleCsv)
DataFrame.readCsv(simpleCsvUtf16leZip) shouldBe df
DataFrame.readCsv(simpleCsvUtf16leZip, charset = Charsets.UTF_16LE) shouldBe df
DataFrame.readCsv(simpleCsvUtf16leZip, charset = Charsets.UTF_16BE) shouldNotBe df
DataFrame.readCsv(simpleCsvUtf16leZip, charset = Charsets.UTF_8) shouldNotBe df
}
@Test
fun `read ZIP Csv`() {
DataFrame.readCsv(simpleCsvZip) shouldBe DataFrame.readCsv(simpleCsv)
shouldThrow<IllegalStateException> {
DataFrame.readCsv(notCsv)
}
}
@Test
fun `read GZ Csv`() {
DataFrame.readCsv(simpleCsvGz) shouldBe DataFrame.readCsv(simpleCsv)
}
@Test
fun `read custom compression Csv`() {
DataFrame.readCsv(
simpleCsvGz,
compression = Compression(::GZIPInputStream),
) shouldBe DataFrame.readCsv(simpleCsv)
}
@Test
fun `read 2 compressed Csv`() {
shouldThrow<IllegalArgumentException> { DataFrame.readCsv(twoCsvsZip) }
}
@Test
fun readCsvWithFrenchLocaleAndAlternativeDelimiter() {
val df = DataFrame.readCsv(
url = csvWithFrenchLocale,
delimiter = ';',
parserOptions = ParserOptions(locale = Locale.FRENCH),
)
df.columnsCount() shouldBe 11
df.rowsCount() shouldBe 5
df.columnNames()[5] shouldBe "duplicate1"
df.columnNames()[6] shouldBe "duplicate11"
df["duplicate1"].type() shouldBe typeOf<Char?>()
df["double"].type() shouldBe typeOf<Double?>()
df["number"].type() shouldBe typeOf<Double>()
df["time"].type() shouldBe typeOf<LocalDateTime>()
println(df)
}
private fun assertColumnType(columnName: String, kClass: KClass<*>, schema: DataFrameSchema) {
val col = schema.columns[columnName]
col.shouldNotBeNull()
col.type.classifier shouldBe kClass
}
@Test
fun readCsvWithFloats() {
val df = DataFrame.readCsv(wineCsv, delimiter = ';')
val schema = df.schema()
assertColumnType("citric acid", Double::class, schema)
assertColumnType("alcohol", Double::class, schema)
assertColumnType("quality", Int::class, schema)
}
@Test
fun `read standard CSV with floats when user has alternative locale`() {
val currentLocale = Locale.getDefault()
try {
Locale.setDefault(Locale.forLanguageTag("ru-RU"))
val df = DataFrame.readCsv(wineCsv, delimiter = ';')
val schema = df.schema()
assertColumnType("citric acid", Double::class, schema)
assertColumnType("alcohol", Double::class, schema)
assertColumnType("quality", Int::class, schema)
} finally {
Locale.setDefault(currentLocale)
}
}
@Test
fun `read with custom header`() {
val header = ('A'..'K').map { it.toString() }
val df = DataFrame.readCsv(simpleCsv, header = header, skipLines = 1)
df.columnNames() shouldBe header
df["B"].type() shouldBe typeOf<Int>()
val headerShort = ('A'..'E').map { it.toString() }
val dfShort = DataFrame.readCsv(simpleCsv, header = headerShort, skipLines = 1)
dfShort.columnsCount() shouldBe 5
dfShort.columnNames() shouldBe headerShort
}
@Test
fun `read first rows`() {
val expected =
listOf(
"untitled",
"user_id",
"name",
"duplicate",
"username",
"duplicate1",
"duplicate11",
"double",
"number",
"time",
"empty",
)
val dfHeader = DataFrame.readCsv(simpleCsv, readLines = 0)
dfHeader.rowsCount() shouldBe 0
dfHeader.columnNames() shouldBe expected
val dfThree = DataFrame.readCsv(simpleCsv, readLines = 3)
dfThree.rowsCount() shouldBe 3
val dfFull = DataFrame.readCsv(simpleCsv, readLines = 10)
dfFull.rowsCount() shouldBe 5
}
@Test
fun `if string starts with a number, it should be parsed as a string anyway`() {
@Language("CSV")
val df = DataFrame.readCsvStr(
"""
duration,floatDuration
12 min,1.0
15,12.98 sec
1 Season,0.9 parsec
""".trimIndent(),
)
df["duration"].type() shouldBe typeOf<String>()
df["floatDuration"].type() shouldBe typeOf<String>()
}
@Test
fun `if record has fewer columns than header then pad it with nulls`() {
@Language("CSV")
val csvContent =
"""
col1,col2,col3
568,801,587
780,588
""".trimIndent()
val df = shouldNotThrowAny {
DataFrame.readCsvStr(csvContent)
}
df shouldBe dataFrameOf("col1", "col2", "col3")(
568, 801, 587,
780, 588, null,
)
}
@Test
fun `write and read frame column`() {
val df = dataFrameOf("a", "b", "c")(
1, 2, 3,
1, 3, 2,
2, 1, 3,
)
val grouped = df.groupBy("a").into("g")
val str = grouped.toCsvStr(escapeChar = null)
val res = DataFrame.readCsvStr(str, quote = '"')
res shouldBe grouped
}
@Test
fun `write and read column group`() {
val df = dataFrameOf("a", "b", "c")(
1, 2, 3,
1, 3, 2,
)
val grouped = df.group("b", "c").into("d")
val str = grouped.toCsvStr()
val res = DataFrame.readCsvStr(str)
res shouldBe grouped
}
@Test
fun `CSV String of saved dataframe starts with column name`() {
val df = dataFrameOf("a")(1)
df.toCsvStr().first() shouldBe 'a'
}
@Test
fun `guess tsv`() {
val df = DataFrame.read(testResource("abc.tsv"))
df.columnsCount() shouldBe 3
df.rowsCount() shouldBe 2
}
@Test
fun `write csv without header produce correct file`() {
val df = dataFrameOf("a", "b", "c")(
1, 2, 3,
1, 3, 2,
)
df.writeCsv(
path = "src/test/resources/without_header.csv",
includeHeader = false,
recordSeparator = "\r\n",
)
val producedFile = File("src/test/resources/without_header.csv")
producedFile.exists() shouldBe true
producedFile.readText() shouldBe "1,2,3\r\n1,3,2\r\n"
producedFile.delete()
}
@Test
fun `check integrity of example data`() {
shouldThrow<IllegalStateException> {
// cannot read file with blank line at the start
DataFrame.readCsv("../data/jetbrains repositories.csv")
}
shouldThrow<IllegalStateException> {
// ignoreEmptyLines only ignores intermediate empty lines
DataFrame.readCsv("../data/jetbrains repositories.csv", ignoreEmptyLines = true)
}
val df = DataFrame.readCsv(
"../data/jetbrains repositories.csv",
skipLines = 1, // we need to skip the empty lines manually
)
df.columnNames() shouldBe listOf("full_name", "html_url", "stargazers_count", "topics", "watchers")
df.columnTypes() shouldBe listOf(
typeOf<String>(),
typeOf<URL>(),
typeOf<Int>(),
typeOf<String>(),
typeOf<Int>(),
)
// same file without empty line at the beginning
df shouldBe DataFrame.readCsv("../data/jetbrains_repositories.csv")
}
@Test
fun `readCsvStr delimiter`() {
@Language("TSV")
val tsv =
"""
a b c
1 2 3
""".trimIndent()
val df = DataFrame.readCsvStr(tsv, '\t')
df shouldBe dataFrameOf("a", "b", "c")(1, 2, 3)
}
@Test
fun `file with BOM`() {
val df = DataFrame.readCsv(withBomCsv, delimiter = ';')
df.columnNames() shouldBe listOf("Column1", "Column2")
}
@Test
fun `read empty CSV`() {
val emptyDelimStr = DataFrame.readCsvStr("")
emptyDelimStr shouldBe DataFrame.empty()
val emptyWidthStr = DataFrame.readCsvStr("", hasFixedWidthColumns = true)
emptyWidthStr shouldBe DataFrame.empty()
val emptyCsvFile = DataFrame.readCsv(File.createTempFile("empty", "csv"))
emptyCsvFile shouldBe DataFrame.empty()
val emptyCsvFileManualHeader = DataFrame.readCsv(
file = File.createTempFile("empty", "csv"),
header = listOf("a", "b", "c"),
)
emptyCsvFileManualHeader.apply {
isEmpty() shouldBe true
columnNames() shouldBe listOf("a", "b", "c")
columnTypes() shouldBe listOf(typeOf<String>(), typeOf<String>(), typeOf<String>())
}
val emptyCsvFileWithHeader = DataFrame.readCsv(
file = File.createTempFile("empty", "csv").also { it.writeText("a,b,c") },
)
emptyCsvFileWithHeader.apply {
isEmpty() shouldBe true
columnNames() shouldBe listOf("a", "b", "c")
columnTypes() shouldBe listOf(typeOf<String>(), typeOf<String>(), typeOf<String>())
}
val emptyTsvStr = DataFrame.readTsv(File.createTempFile("empty", "tsv"))
emptyTsvStr shouldBe DataFrame.empty()
}
@Test
fun `read Csv with comments`() {
@Language("CSV")
val csv =
"""
# This is a comment
a,b,c
1,2,3
""".trimIndent()
val df = DataFrame.readCsvStr(csv, skipLines = 1L)
df shouldBe dataFrameOf("a", "b", "c")(1, 2, 3)
}
@Test
fun `csv with empty lines`() {
@Language("CSV")
val csv =
"""
a,b,c
1,2,3
4,5,6
""".trimIndent()
val df1 = DataFrame.readCsvStr(csv)
df1 shouldBe dataFrameOf("a", "b", "c")(
1, 2, 3,
null, null, null,
4, 5, 6,
)
val df2 = DataFrame.readCsvStr(csv, ignoreEmptyLines = true)
df2 shouldBe dataFrameOf("a", "b", "c")(
1, 2, 3,
4, 5, 6,
)
shouldThrow<IllegalStateException> { DataFrame.readCsvStr(csv, allowMissingColumns = false) }
}
@Test
fun `don't read folder`() {
shouldThrow<IllegalArgumentException> { DataFrame.readCsv("") }
shouldThrow<IllegalArgumentException> { DataFrame.readCsv("NON EXISTENT FILE") }
}
@Test
fun `cannot auto-parse specific date string`() {
@Language("csv")
val frenchCsv =
"""
name; price; date;
a;12,45; 05/06/2021;
b;-13,35;14/07/2025;
c;100 123,35;;
d;-204 235,23;;
e;NaN;;
f;null;;
""".trimIndent()
val dfDeephaven = DataFrame.readCsvStr(
text = frenchCsv,
delimiter = ';',
)
// could not parse, remains String
dfDeephaven["date"].type() shouldBe typeOf<String?>()
val dfDataFrame = DataFrame.readCsvStr(
text = frenchCsv,
delimiter = ';',
// setting any locale skips deephaven's date parsing
parserOptions = ParserOptions(locale = Locale.ROOT),
)
// could not parse, remains String
dfDataFrame["date"].type() shouldBe typeOf<String?>()
}
@Test
fun `parse with other locales`() {
@Language("csv")
val frenchCsv =
"""
name; price; date;
a;12,45; 05/06/2021;
b;-13,35;14/07/2025;
c;100 123,35;;
d;-204 235,23;;
e;NaN;;
f;null;;
""".trimIndent()
val frenchDf = DataFrame.readCsvStr(
text = frenchCsv,
delimiter = ';',
parserOptions = ParserOptions(
dateTimePattern = "dd/MM/yyyy",
locale = Locale.FRENCH,
),
)
frenchDf["price"].type() shouldBe typeOf<Double?>()
frenchDf["date"].type() shouldBe typeOf<LocalDate?>()
@Language("csv")
val dutchCsv =
"""
name; price;
a;12,45;
b;-13,35;
c;100.123,35;
d;-204.235,23;
e;NaN;
f;null;
""".trimIndent()
val dutchDf = DataFrame.readCsvStr(
text = dutchCsv,
delimiter = ';',
parserOptions = ParserOptions(
locale = Locale.forLanguageTag("nl-NL"),
),
)
dutchDf["price"].type() shouldBe typeOf<Double?>()
// skipping this test on windows due to lack of support for Arabic locales
if (!System.getProperty("os.name").startsWith("Windows")) {
// while negative numbers in RTL languages cannot be parsed thanks to Java, others work
@Language("csv")
val arabicCsv =
"""
الاسم; السعر;
أ;١٢٫٤٥;
ب;١٣٫٣٥;
ج;١٠٠٫١٢٣;
د;٢٠٤٫٢٣٥;
هـ;ليس رقم;
و;null;
""".trimIndent()
val easternArabicDf = DataFrame.readCsvStr(
arabicCsv,
delimiter = ';',
parserOptions = ParserOptions(
locale = Locale.forLanguageTag("ar-001"),
),
)
easternArabicDf["السعر"].type() shouldBe typeOf<Double?>()
easternArabicDf["الاسم"].type() shouldBe typeOf<String>() // apparently not a char
}
}
@Test
fun `handle slightly mixed locales`() {
@Language("csv")
val estonianWrongMinus =
"""
name; price;
a;12,45;
b;-13,35;
c;100 123,35;
d;-204 235,23;
e;NaN;
f;null;
""".trimIndent()
val estonianDf1 = DataFrame.readCsvStr(
text = estonianWrongMinus,
delimiter = ';',
parserOptions = ParserOptions(
locale = Locale.forLanguageTag("et-EE"),
),
)
estonianDf1["price"].type() shouldBe typeOf<Double?>()
// also test the global setting
DataFrame.parser.locale = Locale.forLanguageTag("et-EE")
val estonianDf2 = DataFrame.readCsvStr(
text = estonianWrongMinus,
delimiter = ';',
)
estonianDf2 shouldBe estonianDf1
DataFrame.parser.resetToDefault()
}
@Test
fun `NA and custom null string in double column`() {
val df1 = DataFrame.readCsv(
msleepCsv,
parserOptions = ParserOptions(
nullStrings = DEFAULT_DELIM_NULL_STRINGS + "nothing",
),
)
df1["name"].type() shouldBe typeOf<String>()
df1["genus"].type() shouldBe typeOf<String>()
df1["vore"].type() shouldBe typeOf<String?>()
df1["order"].type() shouldBe typeOf<String>()
df1["conservation"].type() shouldBe typeOf<String?>()
df1["sleep_total"].type() shouldBe typeOf<Double>()
df1["sleep_rem"].type() shouldBe typeOf<Double?>()
df1["sleep_cycle"].type() shouldBe typeOf<Double?>()
df1["awake"].type() shouldBe typeOf<Double>()
df1["brainwt"].type() shouldBe typeOf<Double?>()
df1["bodywt"].type() shouldBe typeOf<Double?>()
// Also test the global setting
DataFrame.parser.addNullString("nothing")
DEFAULT_DELIM_NULL_STRINGS.forEach {
DataFrame.parser.addNullString(it)
}
val df2 = DataFrame.readCsv(msleepCsv)
df2 shouldBe df1
DataFrame.parser.resetToDefault()
}
@Test
fun `multiple spaces as delimiter`() {
@Language("csv")
val csv =
"""
NAME STATUS AGE NUMBER LABELS
argo-events Active 2y77d 1234 app.kubernetes.io/instance=argo-events,kubernetes.io/metadata.name=argo-events
argo-workflows Active 2y77d 1234 app.kubernetes.io/instance=argo-workflows,kubernetes.io/metadata.name=argo-workflows
argocd Active 5y18d 1234 kubernetes.io/metadata.name=argocd
beta Active 4y235d 1234 kubernetes.io/metadata.name=beta
""".trimIndent()
val df1 = DataFrame.readCsvStr(
text = csv,
hasFixedWidthColumns = true,
)
df1["NAME"].type() shouldBe typeOf<String>()
df1["STATUS"].type() shouldBe typeOf<String>()
df1["AGE"].type() shouldBe typeOf<String>()
df1["NUMBER"].type() shouldBe typeOf<Int>()
df1["LABELS"].type() shouldBe typeOf<String>()
val df2 = DataFrame.readCsvStr(
text = csv,
hasFixedWidthColumns = true,
fixedColumnWidths = listOf(25, 9, 9, 9, 100),
skipLines = 1,
header = listOf("name", "status", "age", "number", "labels"),
)
df2["name"].type() shouldBe typeOf<String>()
df2["status"].type() shouldBe typeOf<String>()
df2["age"].type() shouldBe typeOf<String>()
df2["number"].type() shouldBe typeOf<Int>()
df2["labels"].type() shouldBe typeOf<String>()
}
@Test
fun `handle default coltype with other parameters`() {
val df = DataFrame.readCsv(
simpleCsv,
header = listOf("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"),
skipLines = 2,
colTypes = mapOf(
"a" to ColType.Int,
"b" to ColType.Double,
ColType.DEFAULT to ColType.String,
),
)
df.columnTypes().shouldContainInOrder(
typeOf<Int>(),
typeOf<Double>(),
typeOf<String>(),
typeOf<String?>(),
typeOf<String>(),
typeOf<String?>(),
typeOf<String?>(),
typeOf<String?>(),
typeOf<String>(),
typeOf<String>(),
typeOf<String?>(),
)
df.rowsCount() shouldBe 4
}
@Test
fun `skipping types`() {
val df1 = DataFrame.readCsv(
irisDataset,
colTypes = mapOf("sepal.length" to ColType.Double),
parserOptions = ParserOptions(
skipTypes = setOf(typeOf<Double>()),
),
)
df1["sepal.length"].type() shouldBe typeOf<Double>()
df1["sepal.width"].type() shouldBe typeOf<BigDecimal>()
df1["petal.length"].type() shouldBe typeOf<BigDecimal>()
df1["petal.width"].type() shouldBe typeOf<BigDecimal>()
df1["variety"].type() shouldBe typeOf<String>()
// Also test the global setting
DataFrame.parser.addSkipType(typeOf<Double>())
val df2 = DataFrame.readCsv(
irisDataset,
colTypes = mapOf("sepal.length" to ColType.Double),
)
df2 shouldBe df1
DataFrame.parser.resetToDefault()
}
// Issue #921
@Test
fun `read csv with custom null strings and given type`() {
@Language("CSV")
val csv =
"""
a,b
noppes,2
1.2,
3,45
,noppes
1.3,1
""".trimIndent()
val df1 = DataFrame.readCsvStr(
csv,
parserOptions = ParserOptions(
nullStrings = setOf("noppes", ""),
),
colTypes = mapOf("a" to ColType.Double, "b" to ColType.Int),
)
df1 shouldBe dataFrameOf("a", "b")(
null, 2,
1.2, null,
3.0, 45,
null, null,
1.3, 1,
)
// Also test the global setting
DataFrame.parser.addNullString("noppes")
DataFrame.parser.addNullString("")
val df2 = DataFrame.readCsvStr(
csv,
colTypes = mapOf("a" to ColType.Double, "b" to ColType.Int),
)
df2 shouldBe df1
DataFrame.parser.resetToDefault()
}
// Issue #1047
@Test
fun `Only use Deephaven datetime parser with custom csv specs`() {
@Language("csv")
val csvContent =
"""
with_timezone_offset,without_timezone_offset
2024-12-12T13:00:00+01:00,2024-12-12T13:00:00
""".trimIndent()
// use DFs parsers by default for datetime-like columns
val df1 = DataFrame.readCsvStr(csvContent)
df1["with_timezone_offset"].let {
it.type() shouldBe typeOf<StdlibInstant>()
it[0] shouldBe StdlibInstant.parse("2024-12-12T13:00:00+01:00")
}
df1["without_timezone_offset"].let {
it.type() shouldBe typeOf<LocalDateTime>()
it[0] shouldBe LocalDateTime.parse("2024-12-12T13:00:00")
}
// enable fast datetime parser for the first column with adjustCsvSpecs
val df2 = DataFrame.readCsv(
inputStream = csvContent.byteInputStream(),
adjustCsvSpecs = {
putParserForName("with_timezone_offset", Parsers.DATETIME)
},
)
df2["with_timezone_offset"].let {
it.type() shouldBe typeOf<LocalDateTime>()
it[0] shouldBe LocalDateTime.parse("2024-12-12T12:00:00")
}
df2["without_timezone_offset"].let {
it.type() shouldBe typeOf<LocalDateTime>()
it[0] shouldBe LocalDateTime.parse("2024-12-12T13:00:00")
}
}
@Test
fun `test parsing kotlin-time-Instant`() {
@Language("csv")
val csvContent =
"""
with_timezone_offset,without_timezone_offset
2024-12-12T13:00:00+01:00,2024-12-12T13:00:00
""".trimIndent()
DataFrame.parser.parseExperimentalInstant = true
// use DFs parsers by default for datetime-like columns
val df1 = DataFrame.readCsvStr(csvContent)
df1["with_timezone_offset"].let {
it.type() shouldBe typeOf<StdlibInstant>()
it[0] shouldBe StdlibInstant.parse("2024-12-12T13:00:00+01:00")
}
DataFrame.parser.resetToDefault()
}
@Test
fun `json dependency test`() {
val df = dataFrameOf("firstName", "lastName")(
"John", "Doe",
"Jane", "Doe",
).group { "firstName" and "lastName" }.into { "name" }
df.toCsvStr(quote = '\'') shouldBe
"""
name
'{"firstName":"John","lastName":"Doe"}'
'{"firstName":"Jane","lastName":"Doe"}'
""".trimIndent()
}
companion object {
private val irisDataset = testCsv("irisDataset")
private val simpleCsv = testCsv("testCSV")
private val simpleCsvUtf16le = testCsv("testCSV-utf-16-le-bom")
private val simpleCsvUtf16leGz = testResource("testCSV-utf16le-bom.csv.gz")
private val simpleCsvUtf16leZip = testResource("testCSV-utf-16-le-bom.zip")
private val simpleCsvZip = testResource("testCSV.zip")
private val twoCsvsZip = testResource("two csvs.zip")
private val simpleCsvGz = testResource("testCSV.csv.gz")
private val csvWithFrenchLocale = testCsv("testCSVwithFrenchLocale")
private val wineCsv = testCsv("wine")
private val withBomCsv = testCsv("with-bom")
private val msleepCsv = testCsv("msleep")
private val notCsv = testResource("not-csv.zip")
}
}
fun testResource(resourcePath: String): URL = DelimCsvTsvTests::class.java.classLoader.getResource(resourcePath)!!
fun testCsv(csvName: String) = testResource("$csvName.csv")
+3
View File
@@ -0,0 +1,3 @@
A B C
1 2 3
4 5 6
1 A B C
2 1 2 3
3 4 5 6
@@ -0,0 +1,608 @@
,work_year,experience_level,employment_type,job_title,salary,salary_currency,salary_in_usd,employee_residence,remote_ratio,company_location,company_size
0,2020,MI,FT,Data Scientist,70000,EUR,79833,DE,0,DE,L
1,2020,SE,FT,Machine Learning Scientist,260000,USD,260000,JP,0,JP,S
2,2020,SE,FT,Big Data Engineer,85000,GBP,109024,GB,50,GB,M
3,2020,MI,FT,Product Data Analyst,20000,USD,20000,HN,0,HN,S
4,2020,SE,FT,Machine Learning Engineer,150000,USD,150000,US,50,US,L
5,2020,EN,FT,Data Analyst,72000,USD,72000,US,100,US,L
6,2020,SE,FT,Lead Data Scientist,190000,USD,190000,US,100,US,S
7,2020,MI,FT,Data Scientist,11000000,HUF,35735,HU,50,HU,L
8,2020,MI,FT,Business Data Analyst,135000,USD,135000,US,100,US,L
9,2020,SE,FT,Lead Data Engineer,125000,USD,125000,NZ,50,NZ,S
10,2020,EN,FT,Data Scientist,45000,EUR,51321,FR,0,FR,S
11,2020,MI,FT,Data Scientist,3000000,INR,40481,IN,0,IN,L
12,2020,EN,FT,Data Scientist,35000,EUR,39916,FR,0,FR,M
13,2020,MI,FT,Lead Data Analyst,87000,USD,87000,US,100,US,L
14,2020,MI,FT,Data Analyst,85000,USD,85000,US,100,US,L
15,2020,MI,FT,Data Analyst,8000,USD,8000,PK,50,PK,L
16,2020,EN,FT,Data Engineer,4450000,JPY,41689,JP,100,JP,S
17,2020,SE,FT,Big Data Engineer,100000,EUR,114047,PL,100,GB,S
18,2020,EN,FT,Data Science Consultant,423000,INR,5707,IN,50,IN,M
19,2020,MI,FT,Lead Data Engineer,56000,USD,56000,PT,100,US,M
20,2020,MI,FT,Machine Learning Engineer,299000,CNY,43331,CN,0,CN,M
21,2020,MI,FT,Product Data Analyst,450000,INR,6072,IN,100,IN,L
22,2020,SE,FT,Data Engineer,42000,EUR,47899,GR,50,GR,L
23,2020,MI,FT,BI Data Analyst,98000,USD,98000,US,0,US,M
24,2020,MI,FT,Lead Data Scientist,115000,USD,115000,AE,0,AE,L
25,2020,EX,FT,Director of Data Science,325000,USD,325000,US,100,US,L
26,2020,EN,FT,Research Scientist,42000,USD,42000,NL,50,NL,L
27,2020,SE,FT,Data Engineer,720000,MXN,33511,MX,0,MX,S
28,2020,EN,CT,Business Data Analyst,100000,USD,100000,US,100,US,L
29,2020,SE,FT,Machine Learning Manager,157000,CAD,117104,CA,50,CA,L
30,2020,MI,FT,Data Engineering Manager,51999,EUR,59303,DE,100,DE,S
31,2020,EN,FT,Big Data Engineer,70000,USD,70000,US,100,US,L
32,2020,SE,FT,Data Scientist,60000,EUR,68428,GR,100,US,L
33,2020,MI,FT,Research Scientist,450000,USD,450000,US,0,US,M
34,2020,MI,FT,Data Analyst,41000,EUR,46759,FR,50,FR,L
35,2020,MI,FT,Data Engineer,65000,EUR,74130,AT,50,AT,L
36,2020,MI,FT,Data Science Consultant,103000,USD,103000,US,100,US,L
37,2020,EN,FT,Machine Learning Engineer,250000,USD,250000,US,50,US,L
38,2020,EN,FT,Data Analyst,10000,USD,10000,NG,100,NG,S
39,2020,EN,FT,Machine Learning Engineer,138000,USD,138000,US,100,US,S
40,2020,MI,FT,Data Scientist,45760,USD,45760,PH,100,US,S
41,2020,EX,FT,Data Engineering Manager,70000,EUR,79833,ES,50,ES,L
42,2020,MI,FT,Machine Learning Infrastructure Engineer,44000,EUR,50180,PT,0,PT,M
43,2020,MI,FT,Data Engineer,106000,USD,106000,US,100,US,L
44,2020,MI,FT,Data Engineer,88000,GBP,112872,GB,50,GB,L
45,2020,EN,PT,ML Engineer,14000,EUR,15966,DE,100,DE,S
46,2020,MI,FT,Data Scientist,60000,GBP,76958,GB,100,GB,S
47,2020,SE,FT,Data Engineer,188000,USD,188000,US,100,US,L
48,2020,MI,FT,Data Scientist,105000,USD,105000,US,100,US,L
49,2020,MI,FT,Data Engineer,61500,EUR,70139,FR,50,FR,L
50,2020,EN,FT,Data Analyst,450000,INR,6072,IN,0,IN,S
51,2020,EN,FT,Data Analyst,91000,USD,91000,US,100,US,L
52,2020,EN,FT,AI Scientist,300000,DKK,45896,DK,50,DK,S
53,2020,EN,FT,Data Engineer,48000,EUR,54742,PK,100,DE,L
54,2020,SE,FL,Computer Vision Engineer,60000,USD,60000,RU,100,US,S
55,2020,SE,FT,Principal Data Scientist,130000,EUR,148261,DE,100,DE,M
56,2020,MI,FT,Data Scientist,34000,EUR,38776,ES,100,ES,M
57,2020,MI,FT,Data Scientist,118000,USD,118000,US,100,US,M
58,2020,SE,FT,Data Scientist,120000,USD,120000,US,50,US,L
59,2020,MI,FT,Data Scientist,138350,USD,138350,US,100,US,M
60,2020,MI,FT,Data Engineer,110000,USD,110000,US,100,US,L
61,2020,MI,FT,Data Engineer,130800,USD,130800,ES,100,US,M
62,2020,EN,PT,Data Scientist,19000,EUR,21669,IT,50,IT,S
63,2020,SE,FT,Data Scientist,412000,USD,412000,US,100,US,L
64,2020,SE,FT,Machine Learning Engineer,40000,EUR,45618,HR,100,HR,S
65,2020,EN,FT,Data Scientist,55000,EUR,62726,DE,50,DE,S
66,2020,EN,FT,Data Scientist,43200,EUR,49268,DE,0,DE,S
67,2020,SE,FT,Data Science Manager,190200,USD,190200,US,100,US,M
68,2020,EN,FT,Data Scientist,105000,USD,105000,US,100,US,S
69,2020,SE,FT,Data Scientist,80000,EUR,91237,AT,0,AT,S
70,2020,MI,FT,Data Scientist,55000,EUR,62726,FR,50,LU,S
71,2020,MI,FT,Data Scientist,37000,EUR,42197,FR,50,FR,S
72,2021,EN,FT,Research Scientist,60000,GBP,82528,GB,50,GB,L
73,2021,EX,FT,BI Data Analyst,150000,USD,150000,IN,100,US,L
74,2021,EX,FT,Head of Data,235000,USD,235000,US,100,US,L
75,2021,SE,FT,Data Scientist,45000,EUR,53192,FR,50,FR,L
76,2021,MI,FT,BI Data Analyst,100000,USD,100000,US,100,US,M
77,2021,MI,PT,3D Computer Vision Researcher,400000,INR,5409,IN,50,IN,M
78,2021,MI,CT,ML Engineer,270000,USD,270000,US,100,US,L
79,2021,EN,FT,Data Analyst,80000,USD,80000,US,100,US,M
80,2021,SE,FT,Data Analytics Engineer,67000,EUR,79197,DE,100,DE,L
81,2021,MI,FT,Data Engineer,140000,USD,140000,US,100,US,L
82,2021,MI,FT,Applied Data Scientist,68000,CAD,54238,GB,50,CA,L
83,2021,MI,FT,Machine Learning Engineer,40000,EUR,47282,ES,100,ES,S
84,2021,EX,FT,Director of Data Science,130000,EUR,153667,IT,100,PL,L
85,2021,MI,FT,Data Engineer,110000,PLN,28476,PL,100,PL,L
86,2021,EN,FT,Data Analyst,50000,EUR,59102,FR,50,FR,M
87,2021,MI,FT,Data Analytics Engineer,110000,USD,110000,US,100,US,L
88,2021,SE,FT,Lead Data Analyst,170000,USD,170000,US,100,US,L
89,2021,SE,FT,Data Analyst,80000,USD,80000,BG,100,US,S
90,2021,SE,FT,Marketing Data Analyst,75000,EUR,88654,GR,100,DK,L
91,2021,EN,FT,Data Science Consultant,65000,EUR,76833,DE,100,DE,S
92,2021,MI,FT,Lead Data Analyst,1450000,INR,19609,IN,100,IN,L
93,2021,SE,FT,Lead Data Engineer,276000,USD,276000,US,0,US,L
94,2021,EN,FT,Data Scientist,2200000,INR,29751,IN,50,IN,L
95,2021,MI,FT,Cloud Data Engineer,120000,SGD,89294,SG,50,SG,L
96,2021,EN,PT,AI Scientist,12000,USD,12000,BR,100,US,S
97,2021,MI,FT,Financial Data Analyst,450000,USD,450000,US,100,US,L
98,2021,EN,FT,Computer Vision Software Engineer,70000,USD,70000,US,100,US,M
99,2021,MI,FT,Computer Vision Software Engineer,81000,EUR,95746,DE,100,US,S
100,2021,MI,FT,Data Analyst,75000,USD,75000,US,0,US,L
101,2021,SE,FT,Data Engineer,150000,USD,150000,US,100,US,L
102,2021,MI,FT,BI Data Analyst,11000000,HUF,36259,HU,50,US,L
103,2021,MI,FT,Data Analyst,62000,USD,62000,US,0,US,L
104,2021,MI,FT,Data Scientist,73000,USD,73000,US,0,US,L
105,2021,MI,FT,Data Analyst,37456,GBP,51519,GB,50,GB,L
106,2021,MI,FT,Research Scientist,235000,CAD,187442,CA,100,CA,L
107,2021,SE,FT,Data Engineer,115000,USD,115000,US,100,US,S
108,2021,SE,FT,Data Engineer,150000,USD,150000,US,100,US,M
109,2021,EN,FT,Data Engineer,2250000,INR,30428,IN,100,IN,L
110,2021,SE,FT,Machine Learning Engineer,80000,EUR,94564,DE,50,DE,L
111,2021,SE,FT,Director of Data Engineering,82500,GBP,113476,GB,100,GB,M
112,2021,SE,FT,Lead Data Engineer,75000,GBP,103160,GB,100,GB,S
113,2021,EN,PT,AI Scientist,12000,USD,12000,PK,100,US,M
114,2021,MI,FT,Data Engineer,38400,EUR,45391,NL,100,NL,L
115,2021,EN,FT,Machine Learning Scientist,225000,USD,225000,US,100,US,L
116,2021,MI,FT,Data Scientist,50000,USD,50000,NG,100,NG,L
117,2021,MI,FT,Data Science Engineer,34000,EUR,40189,GR,100,GR,M
118,2021,EN,FT,Data Analyst,90000,USD,90000,US,100,US,S
119,2021,MI,FT,Data Engineer,200000,USD,200000,US,100,US,L
120,2021,MI,FT,Big Data Engineer,60000,USD,60000,ES,50,RO,M
121,2021,SE,FT,Principal Data Engineer,200000,USD,200000,US,100,US,M
122,2021,EN,FT,Data Analyst,50000,USD,50000,US,100,US,M
123,2021,EN,FT,Applied Data Scientist,80000,GBP,110037,GB,0,GB,L
124,2021,EN,PT,Data Analyst,8760,EUR,10354,ES,50,ES,M
125,2021,MI,FT,Principal Data Scientist,151000,USD,151000,US,100,US,L
126,2021,SE,FT,Machine Learning Scientist,120000,USD,120000,US,50,US,S
127,2021,MI,FT,Data Scientist,700000,INR,9466,IN,0,IN,S
128,2021,EN,FT,Machine Learning Engineer,20000,USD,20000,IN,100,IN,S
129,2021,SE,FT,Lead Data Scientist,3000000,INR,40570,IN,50,IN,L
130,2021,EN,FT,Machine Learning Developer,100000,USD,100000,IQ,50,IQ,S
131,2021,EN,FT,Data Scientist,42000,EUR,49646,FR,50,FR,M
132,2021,MI,FT,Applied Machine Learning Scientist,38400,USD,38400,VN,100,US,M
133,2021,SE,FT,Computer Vision Engineer,24000,USD,24000,BR,100,BR,M
134,2021,EN,FT,Data Scientist,100000,USD,100000,US,0,US,S
135,2021,MI,FT,Data Analyst,90000,USD,90000,US,100,US,M
136,2021,MI,FT,ML Engineer,7000000,JPY,63711,JP,50,JP,S
137,2021,MI,FT,ML Engineer,8500000,JPY,77364,JP,50,JP,S
138,2021,SE,FT,Principal Data Scientist,220000,USD,220000,US,0,US,L
139,2021,EN,FT,Data Scientist,80000,USD,80000,US,100,US,M
140,2021,MI,FT,Data Analyst,135000,USD,135000,US,100,US,L
141,2021,SE,FT,Data Science Manager,240000,USD,240000,US,0,US,L
142,2021,SE,FT,Data Engineering Manager,150000,USD,150000,US,0,US,L
143,2021,MI,FT,Data Scientist,82500,USD,82500,US,100,US,S
144,2021,MI,FT,Data Engineer,100000,USD,100000,US,100,US,L
145,2021,SE,FT,Machine Learning Engineer,70000,EUR,82744,BE,50,BE,M
146,2021,MI,FT,Research Scientist,53000,EUR,62649,FR,50,FR,M
147,2021,MI,FT,Data Engineer,90000,USD,90000,US,100,US,L
148,2021,SE,FT,Data Engineering Manager,153000,USD,153000,US,100,US,L
149,2021,SE,FT,Cloud Data Engineer,160000,USD,160000,BR,100,US,S
150,2021,SE,FT,Director of Data Science,168000,USD,168000,JP,0,JP,S
151,2021,MI,FT,Data Scientist,150000,USD,150000,US,100,US,M
152,2021,MI,FT,Data Scientist,95000,CAD,75774,CA,100,CA,L
153,2021,EN,FT,Data Scientist,13400,USD,13400,UA,100,UA,L
154,2021,SE,FT,Data Science Manager,144000,USD,144000,US,100,US,L
155,2021,SE,FT,Data Science Engineer,159500,CAD,127221,CA,50,CA,L
156,2021,MI,FT,Data Scientist,160000,SGD,119059,SG,100,IL,M
157,2021,MI,FT,Applied Machine Learning Scientist,423000,USD,423000,US,50,US,L
158,2021,SE,FT,Data Analytics Manager,120000,USD,120000,US,100,US,M
159,2021,EN,FT,Machine Learning Engineer,125000,USD,125000,US,100,US,S
160,2021,EX,FT,Head of Data,230000,USD,230000,RU,50,RU,L
161,2021,EX,FT,Head of Data Science,85000,USD,85000,RU,0,RU,M
162,2021,MI,FT,Data Engineer,24000,EUR,28369,MT,50,MT,L
163,2021,EN,FT,Data Science Consultant,54000,EUR,63831,DE,50,DE,L
164,2021,EX,FT,Director of Data Science,110000,EUR,130026,DE,50,DE,M
165,2021,SE,FT,Data Specialist,165000,USD,165000,US,100,US,L
166,2021,EN,FT,Data Engineer,80000,USD,80000,US,100,US,L
167,2021,EX,FT,Director of Data Science,250000,USD,250000,US,0,US,L
168,2021,EN,FT,BI Data Analyst,55000,USD,55000,US,50,US,S
169,2021,MI,FT,Data Architect,150000,USD,150000,US,100,US,L
170,2021,MI,FT,Data Architect,170000,USD,170000,US,100,US,L
171,2021,MI,FT,Data Engineer,60000,GBP,82528,GB,100,GB,L
172,2021,EN,FT,Data Analyst,60000,USD,60000,US,100,US,S
173,2021,SE,FT,Principal Data Scientist,235000,USD,235000,US,100,US,L
174,2021,SE,FT,Research Scientist,51400,EUR,60757,PT,50,PT,L
175,2021,SE,FT,Data Engineering Manager,174000,USD,174000,US,100,US,L
176,2021,MI,FT,Data Scientist,58000,MXN,2859,MX,0,MX,S
177,2021,MI,FT,Data Scientist,30400000,CLP,40038,CL,100,CL,L
178,2021,EN,FT,Machine Learning Engineer,81000,USD,81000,US,50,US,S
179,2021,MI,FT,Data Scientist,420000,INR,5679,IN,100,US,S
180,2021,MI,FT,Big Data Engineer,1672000,INR,22611,IN,0,IN,L
181,2021,MI,FT,Data Scientist,76760,EUR,90734,DE,50,DE,L
182,2021,MI,FT,Data Engineer,22000,EUR,26005,RO,0,US,L
183,2021,SE,FT,Finance Data Analyst,45000,GBP,61896,GB,50,GB,L
184,2021,MI,FL,Machine Learning Scientist,12000,USD,12000,PK,50,PK,M
185,2021,MI,FT,Data Engineer,4000,USD,4000,IR,100,IR,M
186,2021,SE,FT,Data Analytics Engineer,50000,USD,50000,VN,100,GB,M
187,2021,EX,FT,Data Science Consultant,59000,EUR,69741,FR,100,ES,S
188,2021,SE,FT,Data Engineer,65000,EUR,76833,RO,50,GB,S
189,2021,MI,FT,Machine Learning Engineer,74000,USD,74000,JP,50,JP,S
190,2021,SE,FT,Data Science Manager,152000,USD,152000,US,100,FR,L
191,2021,EN,FT,Machine Learning Engineer,21844,USD,21844,CO,50,CO,M
192,2021,MI,FT,Big Data Engineer,18000,USD,18000,MD,0,MD,S
193,2021,SE,FT,Data Science Manager,174000,USD,174000,US,100,US,L
194,2021,SE,FT,Research Scientist,120500,CAD,96113,CA,50,CA,L
195,2021,MI,FT,Data Scientist,147000,USD,147000,US,50,US,L
196,2021,EN,FT,BI Data Analyst,9272,USD,9272,KE,100,KE,S
197,2021,SE,FT,Machine Learning Engineer,1799997,INR,24342,IN,100,IN,L
198,2021,SE,FT,Data Science Manager,4000000,INR,54094,IN,50,US,L
199,2021,EN,FT,Data Science Consultant,90000,USD,90000,US,100,US,S
200,2021,MI,FT,Data Scientist,52000,EUR,61467,DE,50,AT,M
201,2021,SE,FT,Machine Learning Infrastructure Engineer,195000,USD,195000,US,100,US,M
202,2021,MI,FT,Data Scientist,32000,EUR,37825,ES,100,ES,L
203,2021,SE,FT,Research Scientist,50000,USD,50000,FR,100,US,S
204,2021,MI,FT,Data Scientist,160000,USD,160000,US,100,US,L
205,2021,MI,FT,Data Scientist,69600,BRL,12901,BR,0,BR,S
206,2021,SE,FT,Machine Learning Engineer,200000,USD,200000,US,100,US,L
207,2021,SE,FT,Data Engineer,165000,USD,165000,US,0,US,M
208,2021,MI,FL,Data Engineer,20000,USD,20000,IT,0,US,L
209,2021,SE,FT,Data Analytics Manager,120000,USD,120000,US,0,US,L
210,2021,MI,FT,Machine Learning Engineer,21000,EUR,24823,SI,50,SI,L
211,2021,MI,FT,Research Scientist,48000,EUR,56738,FR,50,FR,S
212,2021,MI,FT,Data Engineer,48000,GBP,66022,HK,50,GB,S
213,2021,EN,FT,Big Data Engineer,435000,INR,5882,IN,0,CH,L
214,2021,EN,FT,Machine Learning Engineer,21000,EUR,24823,DE,50,DE,M
215,2021,SE,FT,Principal Data Engineer,185000,USD,185000,US,100,US,L
216,2021,EN,PT,Computer Vision Engineer,180000,DKK,28609,DK,50,DK,S
217,2021,MI,FT,Data Scientist,76760,EUR,90734,DE,50,DE,L
218,2021,MI,FT,Machine Learning Engineer,75000,EUR,88654,BE,100,BE,M
219,2021,SE,FT,Data Analytics Manager,140000,USD,140000,US,100,US,L
220,2021,MI,FT,Machine Learning Engineer,180000,PLN,46597,PL,100,PL,L
221,2021,MI,FT,Data Scientist,85000,GBP,116914,GB,50,GB,L
222,2021,MI,FT,Data Scientist,2500000,INR,33808,IN,0,IN,M
223,2021,MI,FT,Data Scientist,40900,GBP,56256,GB,50,GB,L
224,2021,SE,FT,Machine Learning Scientist,225000,USD,225000,US,100,CA,L
225,2021,EX,CT,Principal Data Scientist,416000,USD,416000,US,100,US,S
226,2021,SE,FT,Data Scientist,110000,CAD,87738,CA,100,CA,S
227,2021,MI,FT,Data Scientist,75000,EUR,88654,DE,50,DE,L
228,2021,SE,FT,Data Scientist,135000,USD,135000,US,0,US,L
229,2021,SE,FT,Data Analyst,90000,CAD,71786,CA,100,CA,M
230,2021,EN,FT,Big Data Engineer,1200000,INR,16228,IN,100,IN,L
231,2021,SE,FT,ML Engineer,256000,USD,256000,US,100,US,S
232,2021,SE,FT,Director of Data Engineering,200000,USD,200000,US,100,US,L
233,2021,SE,FT,Data Analyst,200000,USD,200000,US,100,US,L
234,2021,MI,FT,Data Architect,180000,USD,180000,US,100,US,L
235,2021,MI,FT,Head of Data Science,110000,USD,110000,US,0,US,S
236,2021,MI,FT,Research Scientist,80000,CAD,63810,CA,100,CA,M
237,2021,MI,FT,Data Scientist,39600,EUR,46809,ES,100,ES,M
238,2021,EN,FT,Data Scientist,4000,USD,4000,VN,0,VN,M
239,2021,EN,FT,Data Engineer,1600000,INR,21637,IN,50,IN,M
240,2021,SE,FT,Data Scientist,130000,CAD,103691,CA,100,CA,L
241,2021,MI,FT,Data Analyst,80000,USD,80000,US,100,US,L
242,2021,MI,FT,Data Engineer,110000,USD,110000,US,100,US,L
243,2021,SE,FT,Data Scientist,165000,USD,165000,US,100,US,L
244,2021,EN,FT,AI Scientist,1335000,INR,18053,IN,100,AS,S
245,2021,MI,FT,Data Engineer,52500,GBP,72212,GB,50,GB,L
246,2021,EN,FT,Data Scientist,31000,EUR,36643,FR,50,FR,L
247,2021,MI,FT,Data Engineer,108000,TRY,12103,TR,0,TR,M
248,2021,SE,FT,Data Engineer,70000,GBP,96282,GB,50,GB,L
249,2021,SE,FT,Principal Data Analyst,170000,USD,170000,US,100,US,M
250,2021,MI,FT,Data Scientist,115000,USD,115000,US,50,US,L
251,2021,EN,FT,Data Scientist,90000,USD,90000,US,100,US,S
252,2021,EX,FT,Principal Data Engineer,600000,USD,600000,US,100,US,L
253,2021,EN,FT,Data Scientist,2100000,INR,28399,IN,100,IN,M
254,2021,MI,FT,Data Analyst,93000,USD,93000,US,100,US,L
255,2021,SE,FT,Big Data Architect,125000,CAD,99703,CA,50,CA,M
256,2021,MI,FT,Data Engineer,200000,USD,200000,US,100,US,L
257,2021,SE,FT,Principal Data Scientist,147000,EUR,173762,DE,100,DE,M
258,2021,SE,FT,Machine Learning Engineer,185000,USD,185000,US,50,US,L
259,2021,EX,FT,Director of Data Science,120000,EUR,141846,DE,0,DE,L
260,2021,MI,FT,Data Scientist,130000,USD,130000,US,50,US,L
261,2021,SE,FT,Data Analyst,54000,EUR,63831,DE,50,DE,L
262,2021,MI,FT,Data Scientist,1250000,INR,16904,IN,100,IN,S
263,2021,SE,FT,Machine Learning Engineer,4900000,INR,66265,IN,0,IN,L
264,2021,MI,FT,Data Scientist,21600,EUR,25532,RS,100,DE,S
265,2021,SE,FT,Lead Data Engineer,160000,USD,160000,PR,50,US,S
266,2021,MI,FT,Data Engineer,93150,USD,93150,US,0,US,M
267,2021,MI,FT,Data Engineer,111775,USD,111775,US,0,US,M
268,2021,MI,FT,Data Engineer,250000,TRY,28016,TR,100,TR,M
269,2021,EN,FT,Data Engineer,55000,EUR,65013,DE,50,DE,M
270,2021,EN,FT,Data Engineer,72500,USD,72500,US,100,US,L
271,2021,SE,FT,Computer Vision Engineer,102000,BRL,18907,BR,0,BR,M
272,2021,EN,FT,Data Science Consultant,65000,EUR,76833,DE,0,DE,L
273,2021,EN,FT,Machine Learning Engineer,85000,USD,85000,NL,100,DE,S
274,2021,SE,FT,Data Scientist,65720,EUR,77684,FR,50,FR,M
275,2021,EN,FT,Data Scientist,100000,USD,100000,US,100,US,M
276,2021,EN,FT,Data Scientist,58000,USD,58000,US,50,US,L
277,2021,SE,FT,AI Scientist,55000,USD,55000,ES,100,ES,L
278,2021,SE,FT,Data Scientist,180000,TRY,20171,TR,50,TR,L
279,2021,EN,FT,Business Data Analyst,50000,EUR,59102,LU,100,LU,L
280,2021,MI,FT,Data Engineer,112000,USD,112000,US,100,US,L
281,2021,EN,FT,Research Scientist,100000,USD,100000,JE,0,CN,L
282,2021,MI,PT,Data Engineer,59000,EUR,69741,NL,100,NL,L
283,2021,SE,CT,Staff Data Scientist,105000,USD,105000,US,100,US,M
284,2021,MI,FT,Research Scientist,69999,USD,69999,CZ,50,CZ,L
285,2021,SE,FT,Data Science Manager,7000000,INR,94665,IN,50,IN,L
286,2021,SE,FT,Head of Data,87000,EUR,102839,SI,100,SI,L
287,2021,MI,FT,Data Scientist,109000,USD,109000,US,50,US,L
288,2021,MI,FT,Machine Learning Engineer,43200,EUR,51064,IT,50,IT,L
289,2022,SE,FT,Data Engineer,135000,USD,135000,US,100,US,M
290,2022,SE,FT,Data Analyst,155000,USD,155000,US,100,US,M
291,2022,SE,FT,Data Analyst,120600,USD,120600,US,100,US,M
292,2022,MI,FT,Data Scientist,130000,USD,130000,US,0,US,M
293,2022,MI,FT,Data Scientist,90000,USD,90000,US,0,US,M
294,2022,MI,FT,Data Engineer,170000,USD,170000,US,100,US,M
295,2022,MI,FT,Data Engineer,150000,USD,150000,US,100,US,M
296,2022,SE,FT,Data Analyst,102100,USD,102100,US,100,US,M
297,2022,SE,FT,Data Analyst,84900,USD,84900,US,100,US,M
298,2022,SE,FT,Data Scientist,136620,USD,136620,US,100,US,M
299,2022,SE,FT,Data Scientist,99360,USD,99360,US,100,US,M
300,2022,SE,FT,Data Scientist,90000,GBP,117789,GB,0,GB,M
301,2022,SE,FT,Data Scientist,80000,GBP,104702,GB,0,GB,M
302,2022,SE,FT,Data Scientist,146000,USD,146000,US,100,US,M
303,2022,SE,FT,Data Scientist,123000,USD,123000,US,100,US,M
304,2022,EN,FT,Data Engineer,40000,GBP,52351,GB,100,GB,M
305,2022,SE,FT,Data Analyst,99000,USD,99000,US,0,US,M
306,2022,SE,FT,Data Analyst,116000,USD,116000,US,0,US,M
307,2022,MI,FT,Data Analyst,106260,USD,106260,US,0,US,M
308,2022,MI,FT,Data Analyst,126500,USD,126500,US,0,US,M
309,2022,EX,FT,Data Engineer,242000,USD,242000,US,100,US,M
310,2022,EX,FT,Data Engineer,200000,USD,200000,US,100,US,M
311,2022,MI,FT,Data Scientist,50000,GBP,65438,GB,0,GB,M
312,2022,MI,FT,Data Scientist,30000,GBP,39263,GB,0,GB,M
313,2022,MI,FT,Data Engineer,60000,GBP,78526,GB,0,GB,M
314,2022,MI,FT,Data Engineer,40000,GBP,52351,GB,0,GB,M
315,2022,SE,FT,Data Scientist,165220,USD,165220,US,100,US,M
316,2022,EN,FT,Data Engineer,35000,GBP,45807,GB,100,GB,M
317,2022,SE,FT,Data Scientist,120160,USD,120160,US,100,US,M
318,2022,SE,FT,Data Analyst,90320,USD,90320,US,100,US,M
319,2022,SE,FT,Data Engineer,181940,USD,181940,US,0,US,M
320,2022,SE,FT,Data Engineer,132320,USD,132320,US,0,US,M
321,2022,SE,FT,Data Engineer,220110,USD,220110,US,0,US,M
322,2022,SE,FT,Data Engineer,160080,USD,160080,US,0,US,M
323,2022,SE,FT,Data Scientist,180000,USD,180000,US,0,US,L
324,2022,SE,FT,Data Scientist,120000,USD,120000,US,0,US,L
325,2022,SE,FT,Data Analyst,124190,USD,124190,US,100,US,M
326,2022,EX,FT,Data Analyst,130000,USD,130000,US,100,US,M
327,2022,EX,FT,Data Analyst,110000,USD,110000,US,100,US,M
328,2022,SE,FT,Data Analyst,170000,USD,170000,US,100,US,M
329,2022,MI,FT,Data Analyst,115500,USD,115500,US,100,US,M
330,2022,SE,FT,Data Analyst,112900,USD,112900,US,100,US,M
331,2022,SE,FT,Data Analyst,90320,USD,90320,US,100,US,M
332,2022,SE,FT,Data Analyst,112900,USD,112900,US,100,US,M
333,2022,SE,FT,Data Analyst,90320,USD,90320,US,100,US,M
334,2022,SE,FT,Data Engineer,165400,USD,165400,US,100,US,M
335,2022,SE,FT,Data Engineer,132320,USD,132320,US,100,US,M
336,2022,MI,FT,Data Analyst,167000,USD,167000,US,100,US,M
337,2022,SE,FT,Data Engineer,243900,USD,243900,US,100,US,M
338,2022,SE,FT,Data Analyst,136600,USD,136600,US,100,US,M
339,2022,SE,FT,Data Analyst,109280,USD,109280,US,100,US,M
340,2022,SE,FT,Data Engineer,128875,USD,128875,US,100,US,M
341,2022,SE,FT,Data Engineer,93700,USD,93700,US,100,US,M
342,2022,EX,FT,Head of Data Science,224000,USD,224000,US,100,US,M
343,2022,EX,FT,Head of Data Science,167875,USD,167875,US,100,US,M
344,2022,EX,FT,Analytics Engineer,175000,USD,175000,US,100,US,M
345,2022,SE,FT,Data Engineer,156600,USD,156600,US,100,US,M
346,2022,SE,FT,Data Engineer,108800,USD,108800,US,0,US,M
347,2022,SE,FT,Data Scientist,95550,USD,95550,US,0,US,M
348,2022,SE,FT,Data Engineer,113000,USD,113000,US,0,US,L
349,2022,SE,FT,Data Analyst,135000,USD,135000,US,100,US,M
350,2022,SE,FT,Data Science Manager,161342,USD,161342,US,100,US,M
351,2022,SE,FT,Data Science Manager,137141,USD,137141,US,100,US,M
352,2022,SE,FT,Data Scientist,167000,USD,167000,US,100,US,M
353,2022,SE,FT,Data Scientist,123000,USD,123000,US,100,US,M
354,2022,SE,FT,Data Engineer,60000,GBP,78526,GB,0,GB,M
355,2022,SE,FT,Data Engineer,50000,GBP,65438,GB,0,GB,M
356,2022,SE,FT,Data Scientist,150000,USD,150000,US,0,US,M
357,2022,SE,FT,Data Scientist,211500,USD,211500,US,100,US,M
358,2022,SE,FT,Data Architect,192400,USD,192400,CA,100,CA,M
359,2022,SE,FT,Data Architect,90700,USD,90700,CA,100,CA,M
360,2022,SE,FT,Data Analyst,130000,USD,130000,CA,100,CA,M
361,2022,SE,FT,Data Analyst,61300,USD,61300,CA,100,CA,M
362,2022,SE,FT,Data Analyst,130000,USD,130000,CA,100,CA,M
363,2022,SE,FT,Data Analyst,61300,USD,61300,CA,100,CA,M
364,2022,SE,FT,Data Engineer,160000,USD,160000,US,0,US,L
365,2022,SE,FT,Data Scientist,138600,USD,138600,US,100,US,M
366,2022,SE,FT,Data Engineer,136000,USD,136000,US,0,US,M
367,2022,MI,FT,Data Analyst,58000,USD,58000,US,0,US,S
368,2022,EX,FT,Analytics Engineer,135000,USD,135000,US,100,US,M
369,2022,SE,FT,Data Scientist,170000,USD,170000,US,100,US,M
370,2022,SE,FT,Data Scientist,123000,USD,123000,US,100,US,M
371,2022,SE,FT,Machine Learning Engineer,189650,USD,189650,US,0,US,M
372,2022,SE,FT,Machine Learning Engineer,164996,USD,164996,US,0,US,M
373,2022,MI,FT,ETL Developer,50000,EUR,54957,GR,0,GR,M
374,2022,MI,FT,ETL Developer,50000,EUR,54957,GR,0,GR,M
375,2022,EX,FT,Lead Data Engineer,150000,CAD,118187,CA,100,CA,S
376,2022,SE,FT,Data Analyst,132000,USD,132000,US,0,US,M
377,2022,SE,FT,Data Engineer,165400,USD,165400,US,100,US,M
378,2022,SE,FT,Data Architect,208775,USD,208775,US,100,US,M
379,2022,SE,FT,Data Architect,147800,USD,147800,US,100,US,M
380,2022,SE,FT,Data Engineer,136994,USD,136994,US,100,US,M
381,2022,SE,FT,Data Engineer,101570,USD,101570,US,100,US,M
382,2022,SE,FT,Data Analyst,128875,USD,128875,US,100,US,M
383,2022,SE,FT,Data Analyst,93700,USD,93700,US,100,US,M
384,2022,EX,FT,Head of Machine Learning,6000000,INR,79039,IN,50,IN,L
385,2022,SE,FT,Data Engineer,132320,USD,132320,US,100,US,M
386,2022,EN,FT,Machine Learning Engineer,28500,GBP,37300,GB,100,GB,L
387,2022,SE,FT,Data Analyst,164000,USD,164000,US,0,US,M
388,2022,SE,FT,Data Engineer,155000,USD,155000,US,100,US,M
389,2022,MI,FT,Machine Learning Engineer,95000,GBP,124333,GB,0,GB,M
390,2022,MI,FT,Machine Learning Engineer,75000,GBP,98158,GB,0,GB,M
391,2022,MI,FT,AI Scientist,120000,USD,120000,US,0,US,M
392,2022,SE,FT,Data Analyst,112900,USD,112900,US,100,US,M
393,2022,SE,FT,Data Analyst,90320,USD,90320,US,100,US,M
394,2022,SE,FT,Data Analytics Manager,145000,USD,145000,US,100,US,M
395,2022,SE,FT,Data Analytics Manager,105400,USD,105400,US,100,US,M
396,2022,MI,FT,Machine Learning Engineer,80000,EUR,87932,FR,100,DE,M
397,2022,MI,FT,Data Engineer,90000,GBP,117789,GB,0,GB,M
398,2022,SE,FT,Data Scientist,215300,USD,215300,US,100,US,L
399,2022,SE,FT,Data Scientist,158200,USD,158200,US,100,US,L
400,2022,SE,FT,Data Engineer,209100,USD,209100,US,100,US,L
401,2022,SE,FT,Data Engineer,154600,USD,154600,US,100,US,L
402,2022,SE,FT,Data Analyst,115934,USD,115934,US,0,US,M
403,2022,SE,FT,Data Analyst,81666,USD,81666,US,0,US,M
404,2022,SE,FT,Data Engineer,175000,USD,175000,US,100,US,M
405,2022,MI,FT,Data Engineer,75000,GBP,98158,GB,0,GB,M
406,2022,MI,FT,Data Analyst,58000,USD,58000,US,0,US,S
407,2022,SE,FT,Data Engineer,183600,USD,183600,US,100,US,L
408,2022,MI,FT,Data Analyst,40000,GBP,52351,GB,100,GB,M
409,2022,SE,FT,Data Scientist,180000,USD,180000,US,100,US,M
410,2022,MI,FT,Data Scientist,55000,GBP,71982,GB,0,GB,M
411,2022,MI,FT,Data Scientist,35000,GBP,45807,GB,0,GB,M
412,2022,MI,FT,Data Engineer,60000,EUR,65949,GR,100,GR,M
413,2022,MI,FT,Data Engineer,45000,EUR,49461,GR,100,GR,M
414,2022,MI,FT,Data Engineer,60000,GBP,78526,GB,100,GB,M
415,2022,MI,FT,Data Engineer,45000,GBP,58894,GB,100,GB,M
416,2022,SE,FT,Data Scientist,260000,USD,260000,US,100,US,M
417,2022,SE,FT,Data Science Engineer,60000,USD,60000,AR,100,MX,L
418,2022,MI,FT,Data Engineer,63900,USD,63900,US,0,US,M
419,2022,MI,FT,Machine Learning Scientist,160000,USD,160000,US,100,US,L
420,2022,MI,FT,Machine Learning Scientist,112300,USD,112300,US,100,US,L
421,2022,MI,FT,Data Science Manager,241000,USD,241000,US,100,US,M
422,2022,MI,FT,Data Science Manager,159000,USD,159000,US,100,US,M
423,2022,SE,FT,Data Scientist,180000,USD,180000,US,0,US,M
424,2022,SE,FT,Data Scientist,80000,USD,80000,US,0,US,M
425,2022,MI,FT,Data Engineer,82900,USD,82900,US,0,US,M
426,2022,SE,FT,Data Engineer,100800,USD,100800,US,100,US,L
427,2022,MI,FT,Data Engineer,45000,EUR,49461,ES,100,ES,M
428,2022,SE,FT,Data Scientist,140400,USD,140400,US,0,US,L
429,2022,MI,FT,Data Analyst,30000,GBP,39263,GB,100,GB,M
430,2022,MI,FT,Data Analyst,40000,EUR,43966,ES,100,ES,M
431,2022,MI,FT,Data Analyst,30000,EUR,32974,ES,100,ES,M
432,2022,MI,FT,Data Engineer,80000,EUR,87932,ES,100,ES,M
433,2022,MI,FT,Data Engineer,70000,EUR,76940,ES,100,ES,M
434,2022,MI,FT,Data Engineer,80000,GBP,104702,GB,100,GB,M
435,2022,MI,FT,Data Engineer,70000,GBP,91614,GB,100,GB,M
436,2022,MI,FT,Data Engineer,60000,EUR,65949,ES,100,ES,M
437,2022,MI,FT,Data Engineer,80000,EUR,87932,GR,100,GR,M
438,2022,SE,FT,Machine Learning Engineer,189650,USD,189650,US,0,US,M
439,2022,SE,FT,Machine Learning Engineer,164996,USD,164996,US,0,US,M
440,2022,MI,FT,Data Analyst,40000,EUR,43966,GR,100,GR,M
441,2022,MI,FT,Data Analyst,30000,EUR,32974,GR,100,GR,M
442,2022,MI,FT,Data Engineer,75000,GBP,98158,GB,100,GB,M
443,2022,MI,FT,Data Engineer,60000,GBP,78526,GB,100,GB,M
444,2022,SE,FT,Data Scientist,215300,USD,215300,US,0,US,L
445,2022,MI,FT,Data Engineer,70000,EUR,76940,GR,100,GR,M
446,2022,SE,FT,Data Engineer,209100,USD,209100,US,100,US,L
447,2022,SE,FT,Data Engineer,154600,USD,154600,US,100,US,L
448,2022,SE,FT,Data Engineer,180000,USD,180000,US,100,US,M
449,2022,EN,FT,ML Engineer,20000,EUR,21983,PT,100,PT,L
450,2022,SE,FT,Data Engineer,80000,USD,80000,US,100,US,M
451,2022,MI,FT,Machine Learning Developer,100000,CAD,78791,CA,100,CA,M
452,2022,EX,FT,Director of Data Science,250000,CAD,196979,CA,50,CA,L
453,2022,MI,FT,Machine Learning Engineer,120000,USD,120000,US,100,US,S
454,2022,EN,FT,Computer Vision Engineer,125000,USD,125000,US,0,US,M
455,2022,MI,FT,NLP Engineer,240000,CNY,37236,US,50,US,L
456,2022,SE,FT,Data Engineer,105000,USD,105000,US,100,US,M
457,2022,SE,FT,Lead Machine Learning Engineer,80000,EUR,87932,DE,0,DE,M
458,2022,MI,FT,Business Data Analyst,1400000,INR,18442,IN,100,IN,M
459,2022,MI,FT,Data Scientist,2400000,INR,31615,IN,100,IN,L
460,2022,MI,FT,Machine Learning Infrastructure Engineer,53000,EUR,58255,PT,50,PT,L
461,2022,EN,FT,Financial Data Analyst,100000,USD,100000,US,50,US,L
462,2022,MI,PT,Data Engineer,50000,EUR,54957,DE,50,DE,L
463,2022,EN,FT,Data Scientist,1400000,INR,18442,IN,100,IN,M
464,2022,SE,FT,Principal Data Scientist,148000,EUR,162674,DE,100,DE,M
465,2022,EN,FT,Data Engineer,120000,USD,120000,US,100,US,M
466,2022,SE,FT,Research Scientist,144000,USD,144000,US,50,US,L
467,2022,SE,FT,Data Scientist,104890,USD,104890,US,100,US,M
468,2022,SE,FT,Data Engineer,100000,USD,100000,US,100,US,M
469,2022,SE,FT,Data Scientist,140000,USD,140000,US,100,US,M
470,2022,MI,FT,Data Analyst,135000,USD,135000,US,100,US,M
471,2022,MI,FT,Data Analyst,50000,USD,50000,US,100,US,M
472,2022,SE,FT,Data Scientist,220000,USD,220000,US,100,US,M
473,2022,SE,FT,Data Scientist,140000,USD,140000,US,100,US,M
474,2022,MI,FT,Data Scientist,140000,GBP,183228,GB,0,GB,M
475,2022,MI,FT,Data Scientist,70000,GBP,91614,GB,0,GB,M
476,2022,SE,FT,Data Scientist,185100,USD,185100,US,100,US,M
477,2022,SE,FT,Machine Learning Engineer,220000,USD,220000,US,100,US,M
478,2022,MI,FT,Data Scientist,200000,USD,200000,US,100,US,M
479,2022,MI,FT,Data Scientist,120000,USD,120000,US,100,US,M
480,2022,SE,FT,Machine Learning Engineer,120000,USD,120000,AE,100,AE,S
481,2022,SE,FT,Machine Learning Engineer,65000,USD,65000,AE,100,AE,S
482,2022,EX,FT,Data Engineer,324000,USD,324000,US,100,US,M
483,2022,EX,FT,Data Engineer,216000,USD,216000,US,100,US,M
484,2022,SE,FT,Data Engineer,210000,USD,210000,US,100,US,M
485,2022,SE,FT,Machine Learning Engineer,120000,USD,120000,US,100,US,M
486,2022,SE,FT,Data Scientist,230000,USD,230000,US,100,US,M
487,2022,EN,PT,Data Scientist,100000,USD,100000,DZ,50,DZ,M
488,2022,MI,FL,Data Scientist,100000,USD,100000,CA,100,US,M
489,2022,EN,CT,Applied Machine Learning Scientist,29000,EUR,31875,TN,100,CZ,M
490,2022,SE,FT,Head of Data,200000,USD,200000,MY,100,US,M
491,2022,MI,FT,Principal Data Analyst,75000,USD,75000,CA,100,CA,S
492,2022,MI,FT,Data Scientist,150000,PLN,35590,PL,100,PL,L
493,2022,SE,FT,Machine Learning Developer,100000,CAD,78791,CA,100,CA,M
494,2022,SE,FT,Data Scientist,100000,USD,100000,BR,100,US,M
495,2022,MI,FT,Machine Learning Scientist,153000,USD,153000,US,50,US,M
496,2022,EN,FT,Data Engineer,52800,EUR,58035,PK,100,DE,M
497,2022,SE,FT,Data Scientist,165000,USD,165000,US,100,US,M
498,2022,SE,FT,Research Scientist,85000,EUR,93427,FR,50,FR,L
499,2022,EN,FT,Data Scientist,66500,CAD,52396,CA,100,CA,L
500,2022,SE,FT,Machine Learning Engineer,57000,EUR,62651,NL,100,NL,L
501,2022,MI,FT,Head of Data,30000,EUR,32974,EE,100,EE,S
502,2022,EN,FT,Data Scientist,40000,USD,40000,JP,100,MY,L
503,2022,MI,FT,Machine Learning Engineer,121000,AUD,87425,AU,100,AU,L
504,2022,SE,FT,Data Engineer,115000,USD,115000,US,100,US,M
505,2022,EN,FT,Data Scientist,120000,AUD,86703,AU,50,AU,M
506,2022,MI,FT,Applied Machine Learning Scientist,75000,USD,75000,BO,100,US,L
507,2022,MI,FT,Research Scientist,59000,EUR,64849,AT,0,AT,L
508,2022,EN,FT,Research Scientist,120000,USD,120000,US,100,US,L
509,2022,MI,FT,Applied Data Scientist,157000,USD,157000,US,100,US,L
510,2022,EN,FT,Computer Vision Software Engineer,150000,USD,150000,AU,100,AU,S
511,2022,MI,FT,Business Data Analyst,90000,CAD,70912,CA,50,CA,L
512,2022,EN,FT,Data Engineer,65000,USD,65000,US,100,US,S
513,2022,SE,FT,Machine Learning Engineer,65000,EUR,71444,IE,100,IE,S
514,2022,EN,FT,Data Analytics Engineer,20000,USD,20000,PK,0,PK,M
515,2022,MI,FT,Data Scientist,48000,USD,48000,RU,100,US,S
516,2022,SE,FT,Data Science Manager,152500,USD,152500,US,100,US,M
517,2022,MI,FT,Data Engineer,62000,EUR,68147,FR,100,FR,M
518,2022,MI,FT,Data Scientist,115000,CHF,122346,CH,0,CH,L
519,2022,SE,FT,Applied Data Scientist,380000,USD,380000,US,100,US,L
520,2022,MI,FT,Data Scientist,88000,CAD,69336,CA,100,CA,M
521,2022,EN,FT,Computer Vision Engineer,10000,USD,10000,PT,100,LU,M
522,2022,MI,FT,Data Analyst,20000,USD,20000,GR,100,GR,S
523,2022,SE,FT,Data Analytics Lead,405000,USD,405000,US,100,US,L
524,2022,MI,FT,Data Scientist,135000,USD,135000,US,100,US,L
525,2022,SE,FT,Applied Data Scientist,177000,USD,177000,US,100,US,L
526,2022,MI,FT,Data Scientist,78000,USD,78000,US,100,US,M
527,2022,SE,FT,Data Analyst,135000,USD,135000,US,100,US,M
528,2022,SE,FT,Data Analyst,100000,USD,100000,US,100,US,M
529,2022,SE,FT,Data Analyst,90320,USD,90320,US,100,US,M
530,2022,MI,FT,Data Analyst,85000,USD,85000,CA,0,CA,M
531,2022,MI,FT,Data Analyst,75000,USD,75000,CA,0,CA,M
532,2022,SE,FT,Machine Learning Engineer,214000,USD,214000,US,100,US,M
533,2022,SE,FT,Machine Learning Engineer,192600,USD,192600,US,100,US,M
534,2022,SE,FT,Data Architect,266400,USD,266400,US,100,US,M
535,2022,SE,FT,Data Architect,213120,USD,213120,US,100,US,M
536,2022,SE,FT,Data Analyst,112900,USD,112900,US,100,US,M
537,2022,SE,FT,Data Engineer,155000,USD,155000,US,100,US,M
538,2022,MI,FT,Data Scientist,141300,USD,141300,US,0,US,M
539,2022,MI,FT,Data Scientist,102100,USD,102100,US,0,US,M
540,2022,SE,FT,Data Analyst,115934,USD,115934,US,100,US,M
541,2022,SE,FT,Data Analyst,81666,USD,81666,US,100,US,M
542,2022,MI,FT,Data Engineer,206699,USD,206699,US,0,US,M
543,2022,MI,FT,Data Engineer,99100,USD,99100,US,0,US,M
544,2022,SE,FT,Data Engineer,130000,USD,130000,US,100,US,M
545,2022,SE,FT,Data Engineer,115000,USD,115000,US,100,US,M
546,2022,SE,FT,Data Engineer,110500,USD,110500,US,100,US,M
547,2022,SE,FT,Data Engineer,130000,USD,130000,US,100,US,M
548,2022,SE,FT,Data Analyst,99050,USD,99050,US,100,US,M
549,2022,SE,FT,Data Engineer,160000,USD,160000,US,100,US,M
550,2022,SE,FT,Data Scientist,205300,USD,205300,US,0,US,L
551,2022,SE,FT,Data Scientist,140400,USD,140400,US,0,US,L
552,2022,SE,FT,Data Scientist,176000,USD,176000,US,100,US,M
553,2022,SE,FT,Data Scientist,144000,USD,144000,US,100,US,M
554,2022,SE,FT,Data Engineer,200100,USD,200100,US,100,US,M
555,2022,SE,FT,Data Engineer,160000,USD,160000,US,100,US,M
556,2022,SE,FT,Data Engineer,145000,USD,145000,US,100,US,M
557,2022,SE,FT,Data Engineer,70500,USD,70500,US,0,US,M
558,2022,SE,FT,Data Scientist,205300,USD,205300,US,0,US,M
559,2022,SE,FT,Data Scientist,140400,USD,140400,US,0,US,M
560,2022,SE,FT,Analytics Engineer,205300,USD,205300,US,0,US,M
561,2022,SE,FT,Analytics Engineer,184700,USD,184700,US,0,US,M
562,2022,SE,FT,Data Engineer,175100,USD,175100,US,100,US,M
563,2022,SE,FT,Data Engineer,140250,USD,140250,US,100,US,M
564,2022,SE,FT,Data Analyst,116150,USD,116150,US,100,US,M
565,2022,SE,FT,Data Engineer,54000,USD,54000,US,0,US,M
566,2022,SE,FT,Data Analyst,170000,USD,170000,US,100,US,M
567,2022,MI,FT,Data Analyst,50000,GBP,65438,GB,0,GB,M
568,2022,SE,FT,Data Analyst,80000,USD,80000,US,100,US,M
569,2022,SE,FT,Data Scientist,140000,USD,140000,US,100,US,M
570,2022,SE,FT,Data Scientist,210000,USD,210000,US,100,US,M
571,2022,SE,FT,Data Scientist,140000,USD,140000,US,100,US,M
572,2022,SE,FT,Data Analyst,100000,USD,100000,US,100,US,M
573,2022,SE,FT,Data Analyst,69000,USD,69000,US,100,US,M
574,2022,SE,FT,Data Scientist,210000,USD,210000,US,100,US,M
575,2022,SE,FT,Data Scientist,140000,USD,140000,US,100,US,M
576,2022,SE,FT,Data Scientist,210000,USD,210000,US,100,US,M
577,2022,SE,FT,Data Analyst,150075,USD,150075,US,100,US,M
578,2022,SE,FT,Data Engineer,100000,USD,100000,US,100,US,M
579,2022,SE,FT,Data Engineer,25000,USD,25000,US,100,US,M
580,2022,SE,FT,Data Analyst,126500,USD,126500,US,100,US,M
581,2022,SE,FT,Data Analyst,106260,USD,106260,US,100,US,M
582,2022,SE,FT,Data Engineer,220110,USD,220110,US,100,US,M
583,2022,SE,FT,Data Engineer,160080,USD,160080,US,100,US,M
584,2022,SE,FT,Data Analyst,105000,USD,105000,US,100,US,M
585,2022,SE,FT,Data Analyst,110925,USD,110925,US,100,US,M
586,2022,MI,FT,Data Analyst,35000,GBP,45807,GB,0,GB,M
587,2022,SE,FT,Data Scientist,140000,USD,140000,US,100,US,M
588,2022,SE,FT,Data Analyst,99000,USD,99000,US,0,US,M
589,2022,SE,FT,Data Analyst,60000,USD,60000,US,100,US,M
590,2022,SE,FT,Data Architect,192564,USD,192564,US,100,US,M
591,2022,SE,FT,Data Architect,144854,USD,144854,US,100,US,M
592,2022,SE,FT,Data Scientist,230000,USD,230000,US,100,US,M
593,2022,SE,FT,Data Scientist,150000,USD,150000,US,100,US,M
594,2022,SE,FT,Data Analytics Manager,150260,USD,150260,US,100,US,M
595,2022,SE,FT,Data Analytics Manager,109280,USD,109280,US,100,US,M
596,2022,SE,FT,Data Scientist,210000,USD,210000,US,100,US,M
597,2022,SE,FT,Data Analyst,170000,USD,170000,US,100,US,M
598,2022,MI,FT,Data Scientist,160000,USD,160000,US,100,US,M
599,2022,MI,FT,Data Scientist,130000,USD,130000,US,100,US,M
600,2022,EN,FT,Data Analyst,67000,USD,67000,CA,0,CA,M
601,2022,EN,FT,Data Analyst,52000,USD,52000,CA,0,CA,M
602,2022,SE,FT,Data Engineer,154000,USD,154000,US,100,US,M
603,2022,SE,FT,Data Engineer,126000,USD,126000,US,100,US,M
604,2022,SE,FT,Data Analyst,129000,USD,129000,US,0,US,M
605,2022,SE,FT,Data Analyst,150000,USD,150000,US,100,US,M
606,2022,MI,FT,AI Scientist,200000,USD,200000,IN,100,US,L
1 work_year experience_level employment_type job_title salary salary_currency salary_in_usd employee_residence remote_ratio company_location company_size
2 0 2020 MI FT Data Scientist 70000 EUR 79833 DE 0 DE L
3 1 2020 SE FT Machine Learning Scientist 260000 USD 260000 JP 0 JP S
4 2 2020 SE FT Big Data Engineer 85000 GBP 109024 GB 50 GB M
5 3 2020 MI FT Product Data Analyst 20000 USD 20000 HN 0 HN S
6 4 2020 SE FT Machine Learning Engineer 150000 USD 150000 US 50 US L
7 5 2020 EN FT Data Analyst 72000 USD 72000 US 100 US L
8 6 2020 SE FT Lead Data Scientist 190000 USD 190000 US 100 US S
9 7 2020 MI FT Data Scientist 11000000 HUF 35735 HU 50 HU L
10 8 2020 MI FT Business Data Analyst 135000 USD 135000 US 100 US L
11 9 2020 SE FT Lead Data Engineer 125000 USD 125000 NZ 50 NZ S
12 10 2020 EN FT Data Scientist 45000 EUR 51321 FR 0 FR S
13 11 2020 MI FT Data Scientist 3000000 INR 40481 IN 0 IN L
14 12 2020 EN FT Data Scientist 35000 EUR 39916 FR 0 FR M
15 13 2020 MI FT Lead Data Analyst 87000 USD 87000 US 100 US L
16 14 2020 MI FT Data Analyst 85000 USD 85000 US 100 US L
17 15 2020 MI FT Data Analyst 8000 USD 8000 PK 50 PK L
18 16 2020 EN FT Data Engineer 4450000 JPY 41689 JP 100 JP S
19 17 2020 SE FT Big Data Engineer 100000 EUR 114047 PL 100 GB S
20 18 2020 EN FT Data Science Consultant 423000 INR 5707 IN 50 IN M
21 19 2020 MI FT Lead Data Engineer 56000 USD 56000 PT 100 US M
22 20 2020 MI FT Machine Learning Engineer 299000 CNY 43331 CN 0 CN M
23 21 2020 MI FT Product Data Analyst 450000 INR 6072 IN 100 IN L
24 22 2020 SE FT Data Engineer 42000 EUR 47899 GR 50 GR L
25 23 2020 MI FT BI Data Analyst 98000 USD 98000 US 0 US M
26 24 2020 MI FT Lead Data Scientist 115000 USD 115000 AE 0 AE L
27 25 2020 EX FT Director of Data Science 325000 USD 325000 US 100 US L
28 26 2020 EN FT Research Scientist 42000 USD 42000 NL 50 NL L
29 27 2020 SE FT Data Engineer 720000 MXN 33511 MX 0 MX S
30 28 2020 EN CT Business Data Analyst 100000 USD 100000 US 100 US L
31 29 2020 SE FT Machine Learning Manager 157000 CAD 117104 CA 50 CA L
32 30 2020 MI FT Data Engineering Manager 51999 EUR 59303 DE 100 DE S
33 31 2020 EN FT Big Data Engineer 70000 USD 70000 US 100 US L
34 32 2020 SE FT Data Scientist 60000 EUR 68428 GR 100 US L
35 33 2020 MI FT Research Scientist 450000 USD 450000 US 0 US M
36 34 2020 MI FT Data Analyst 41000 EUR 46759 FR 50 FR L
37 35 2020 MI FT Data Engineer 65000 EUR 74130 AT 50 AT L
38 36 2020 MI FT Data Science Consultant 103000 USD 103000 US 100 US L
39 37 2020 EN FT Machine Learning Engineer 250000 USD 250000 US 50 US L
40 38 2020 EN FT Data Analyst 10000 USD 10000 NG 100 NG S
41 39 2020 EN FT Machine Learning Engineer 138000 USD 138000 US 100 US S
42 40 2020 MI FT Data Scientist 45760 USD 45760 PH 100 US S
43 41 2020 EX FT Data Engineering Manager 70000 EUR 79833 ES 50 ES L
44 42 2020 MI FT Machine Learning Infrastructure Engineer 44000 EUR 50180 PT 0 PT M
45 43 2020 MI FT Data Engineer 106000 USD 106000 US 100 US L
46 44 2020 MI FT Data Engineer 88000 GBP 112872 GB 50 GB L
47 45 2020 EN PT ML Engineer 14000 EUR 15966 DE 100 DE S
48 46 2020 MI FT Data Scientist 60000 GBP 76958 GB 100 GB S
49 47 2020 SE FT Data Engineer 188000 USD 188000 US 100 US L
50 48 2020 MI FT Data Scientist 105000 USD 105000 US 100 US L
51 49 2020 MI FT Data Engineer 61500 EUR 70139 FR 50 FR L
52 50 2020 EN FT Data Analyst 450000 INR 6072 IN 0 IN S
53 51 2020 EN FT Data Analyst 91000 USD 91000 US 100 US L
54 52 2020 EN FT AI Scientist 300000 DKK 45896 DK 50 DK S
55 53 2020 EN FT Data Engineer 48000 EUR 54742 PK 100 DE L
56 54 2020 SE FL Computer Vision Engineer 60000 USD 60000 RU 100 US S
57 55 2020 SE FT Principal Data Scientist 130000 EUR 148261 DE 100 DE M
58 56 2020 MI FT Data Scientist 34000 EUR 38776 ES 100 ES M
59 57 2020 MI FT Data Scientist 118000 USD 118000 US 100 US M
60 58 2020 SE FT Data Scientist 120000 USD 120000 US 50 US L
61 59 2020 MI FT Data Scientist 138350 USD 138350 US 100 US M
62 60 2020 MI FT Data Engineer 110000 USD 110000 US 100 US L
63 61 2020 MI FT Data Engineer 130800 USD 130800 ES 100 US M
64 62 2020 EN PT Data Scientist 19000 EUR 21669 IT 50 IT S
65 63 2020 SE FT Data Scientist 412000 USD 412000 US 100 US L
66 64 2020 SE FT Machine Learning Engineer 40000 EUR 45618 HR 100 HR S
67 65 2020 EN FT Data Scientist 55000 EUR 62726 DE 50 DE S
68 66 2020 EN FT Data Scientist 43200 EUR 49268 DE 0 DE S
69 67 2020 SE FT Data Science Manager 190200 USD 190200 US 100 US M
70 68 2020 EN FT Data Scientist 105000 USD 105000 US 100 US S
71 69 2020 SE FT Data Scientist 80000 EUR 91237 AT 0 AT S
72 70 2020 MI FT Data Scientist 55000 EUR 62726 FR 50 LU S
73 71 2020 MI FT Data Scientist 37000 EUR 42197 FR 50 FR S
74 72 2021 EN FT Research Scientist 60000 GBP 82528 GB 50 GB L
75 73 2021 EX FT BI Data Analyst 150000 USD 150000 IN 100 US L
76 74 2021 EX FT Head of Data 235000 USD 235000 US 100 US L
77 75 2021 SE FT Data Scientist 45000 EUR 53192 FR 50 FR L
78 76 2021 MI FT BI Data Analyst 100000 USD 100000 US 100 US M
79 77 2021 MI PT 3D Computer Vision Researcher 400000 INR 5409 IN 50 IN M
80 78 2021 MI CT ML Engineer 270000 USD 270000 US 100 US L
81 79 2021 EN FT Data Analyst 80000 USD 80000 US 100 US M
82 80 2021 SE FT Data Analytics Engineer 67000 EUR 79197 DE 100 DE L
83 81 2021 MI FT Data Engineer 140000 USD 140000 US 100 US L
84 82 2021 MI FT Applied Data Scientist 68000 CAD 54238 GB 50 CA L
85 83 2021 MI FT Machine Learning Engineer 40000 EUR 47282 ES 100 ES S
86 84 2021 EX FT Director of Data Science 130000 EUR 153667 IT 100 PL L
87 85 2021 MI FT Data Engineer 110000 PLN 28476 PL 100 PL L
88 86 2021 EN FT Data Analyst 50000 EUR 59102 FR 50 FR M
89 87 2021 MI FT Data Analytics Engineer 110000 USD 110000 US 100 US L
90 88 2021 SE FT Lead Data Analyst 170000 USD 170000 US 100 US L
91 89 2021 SE FT Data Analyst 80000 USD 80000 BG 100 US S
92 90 2021 SE FT Marketing Data Analyst 75000 EUR 88654 GR 100 DK L
93 91 2021 EN FT Data Science Consultant 65000 EUR 76833 DE 100 DE S
94 92 2021 MI FT Lead Data Analyst 1450000 INR 19609 IN 100 IN L
95 93 2021 SE FT Lead Data Engineer 276000 USD 276000 US 0 US L
96 94 2021 EN FT Data Scientist 2200000 INR 29751 IN 50 IN L
97 95 2021 MI FT Cloud Data Engineer 120000 SGD 89294 SG 50 SG L
98 96 2021 EN PT AI Scientist 12000 USD 12000 BR 100 US S
99 97 2021 MI FT Financial Data Analyst 450000 USD 450000 US 100 US L
100 98 2021 EN FT Computer Vision Software Engineer 70000 USD 70000 US 100 US M
101 99 2021 MI FT Computer Vision Software Engineer 81000 EUR 95746 DE 100 US S
102 100 2021 MI FT Data Analyst 75000 USD 75000 US 0 US L
103 101 2021 SE FT Data Engineer 150000 USD 150000 US 100 US L
104 102 2021 MI FT BI Data Analyst 11000000 HUF 36259 HU 50 US L
105 103 2021 MI FT Data Analyst 62000 USD 62000 US 0 US L
106 104 2021 MI FT Data Scientist 73000 USD 73000 US 0 US L
107 105 2021 MI FT Data Analyst 37456 GBP 51519 GB 50 GB L
108 106 2021 MI FT Research Scientist 235000 CAD 187442 CA 100 CA L
109 107 2021 SE FT Data Engineer 115000 USD 115000 US 100 US S
110 108 2021 SE FT Data Engineer 150000 USD 150000 US 100 US M
111 109 2021 EN FT Data Engineer 2250000 INR 30428 IN 100 IN L
112 110 2021 SE FT Machine Learning Engineer 80000 EUR 94564 DE 50 DE L
113 111 2021 SE FT Director of Data Engineering 82500 GBP 113476 GB 100 GB M
114 112 2021 SE FT Lead Data Engineer 75000 GBP 103160 GB 100 GB S
115 113 2021 EN PT AI Scientist 12000 USD 12000 PK 100 US M
116 114 2021 MI FT Data Engineer 38400 EUR 45391 NL 100 NL L
117 115 2021 EN FT Machine Learning Scientist 225000 USD 225000 US 100 US L
118 116 2021 MI FT Data Scientist 50000 USD 50000 NG 100 NG L
119 117 2021 MI FT Data Science Engineer 34000 EUR 40189 GR 100 GR M
120 118 2021 EN FT Data Analyst 90000 USD 90000 US 100 US S
121 119 2021 MI FT Data Engineer 200000 USD 200000 US 100 US L
122 120 2021 MI FT Big Data Engineer 60000 USD 60000 ES 50 RO M
123 121 2021 SE FT Principal Data Engineer 200000 USD 200000 US 100 US M
124 122 2021 EN FT Data Analyst 50000 USD 50000 US 100 US M
125 123 2021 EN FT Applied Data Scientist 80000 GBP 110037 GB 0 GB L
126 124 2021 EN PT Data Analyst 8760 EUR 10354 ES 50 ES M
127 125 2021 MI FT Principal Data Scientist 151000 USD 151000 US 100 US L
128 126 2021 SE FT Machine Learning Scientist 120000 USD 120000 US 50 US S
129 127 2021 MI FT Data Scientist 700000 INR 9466 IN 0 IN S
130 128 2021 EN FT Machine Learning Engineer 20000 USD 20000 IN 100 IN S
131 129 2021 SE FT Lead Data Scientist 3000000 INR 40570 IN 50 IN L
132 130 2021 EN FT Machine Learning Developer 100000 USD 100000 IQ 50 IQ S
133 131 2021 EN FT Data Scientist 42000 EUR 49646 FR 50 FR M
134 132 2021 MI FT Applied Machine Learning Scientist 38400 USD 38400 VN 100 US M
135 133 2021 SE FT Computer Vision Engineer 24000 USD 24000 BR 100 BR M
136 134 2021 EN FT Data Scientist 100000 USD 100000 US 0 US S
137 135 2021 MI FT Data Analyst 90000 USD 90000 US 100 US M
138 136 2021 MI FT ML Engineer 7000000 JPY 63711 JP 50 JP S
139 137 2021 MI FT ML Engineer 8500000 JPY 77364 JP 50 JP S
140 138 2021 SE FT Principal Data Scientist 220000 USD 220000 US 0 US L
141 139 2021 EN FT Data Scientist 80000 USD 80000 US 100 US M
142 140 2021 MI FT Data Analyst 135000 USD 135000 US 100 US L
143 141 2021 SE FT Data Science Manager 240000 USD 240000 US 0 US L
144 142 2021 SE FT Data Engineering Manager 150000 USD 150000 US 0 US L
145 143 2021 MI FT Data Scientist 82500 USD 82500 US 100 US S
146 144 2021 MI FT Data Engineer 100000 USD 100000 US 100 US L
147 145 2021 SE FT Machine Learning Engineer 70000 EUR 82744 BE 50 BE M
148 146 2021 MI FT Research Scientist 53000 EUR 62649 FR 50 FR M
149 147 2021 MI FT Data Engineer 90000 USD 90000 US 100 US L
150 148 2021 SE FT Data Engineering Manager 153000 USD 153000 US 100 US L
151 149 2021 SE FT Cloud Data Engineer 160000 USD 160000 BR 100 US S
152 150 2021 SE FT Director of Data Science 168000 USD 168000 JP 0 JP S
153 151 2021 MI FT Data Scientist 150000 USD 150000 US 100 US M
154 152 2021 MI FT Data Scientist 95000 CAD 75774 CA 100 CA L
155 153 2021 EN FT Data Scientist 13400 USD 13400 UA 100 UA L
156 154 2021 SE FT Data Science Manager 144000 USD 144000 US 100 US L
157 155 2021 SE FT Data Science Engineer 159500 CAD 127221 CA 50 CA L
158 156 2021 MI FT Data Scientist 160000 SGD 119059 SG 100 IL M
159 157 2021 MI FT Applied Machine Learning Scientist 423000 USD 423000 US 50 US L
160 158 2021 SE FT Data Analytics Manager 120000 USD 120000 US 100 US M
161 159 2021 EN FT Machine Learning Engineer 125000 USD 125000 US 100 US S
162 160 2021 EX FT Head of Data 230000 USD 230000 RU 50 RU L
163 161 2021 EX FT Head of Data Science 85000 USD 85000 RU 0 RU M
164 162 2021 MI FT Data Engineer 24000 EUR 28369 MT 50 MT L
165 163 2021 EN FT Data Science Consultant 54000 EUR 63831 DE 50 DE L
166 164 2021 EX FT Director of Data Science 110000 EUR 130026 DE 50 DE M
167 165 2021 SE FT Data Specialist 165000 USD 165000 US 100 US L
168 166 2021 EN FT Data Engineer 80000 USD 80000 US 100 US L
169 167 2021 EX FT Director of Data Science 250000 USD 250000 US 0 US L
170 168 2021 EN FT BI Data Analyst 55000 USD 55000 US 50 US S
171 169 2021 MI FT Data Architect 150000 USD 150000 US 100 US L
172 170 2021 MI FT Data Architect 170000 USD 170000 US 100 US L
173 171 2021 MI FT Data Engineer 60000 GBP 82528 GB 100 GB L
174 172 2021 EN FT Data Analyst 60000 USD 60000 US 100 US S
175 173 2021 SE FT Principal Data Scientist 235000 USD 235000 US 100 US L
176 174 2021 SE FT Research Scientist 51400 EUR 60757 PT 50 PT L
177 175 2021 SE FT Data Engineering Manager 174000 USD 174000 US 100 US L
178 176 2021 MI FT Data Scientist 58000 MXN 2859 MX 0 MX S
179 177 2021 MI FT Data Scientist 30400000 CLP 40038 CL 100 CL L
180 178 2021 EN FT Machine Learning Engineer 81000 USD 81000 US 50 US S
181 179 2021 MI FT Data Scientist 420000 INR 5679 IN 100 US S
182 180 2021 MI FT Big Data Engineer 1672000 INR 22611 IN 0 IN L
183 181 2021 MI FT Data Scientist 76760 EUR 90734 DE 50 DE L
184 182 2021 MI FT Data Engineer 22000 EUR 26005 RO 0 US L
185 183 2021 SE FT Finance Data Analyst 45000 GBP 61896 GB 50 GB L
186 184 2021 MI FL Machine Learning Scientist 12000 USD 12000 PK 50 PK M
187 185 2021 MI FT Data Engineer 4000 USD 4000 IR 100 IR M
188 186 2021 SE FT Data Analytics Engineer 50000 USD 50000 VN 100 GB M
189 187 2021 EX FT Data Science Consultant 59000 EUR 69741 FR 100 ES S
190 188 2021 SE FT Data Engineer 65000 EUR 76833 RO 50 GB S
191 189 2021 MI FT Machine Learning Engineer 74000 USD 74000 JP 50 JP S
192 190 2021 SE FT Data Science Manager 152000 USD 152000 US 100 FR L
193 191 2021 EN FT Machine Learning Engineer 21844 USD 21844 CO 50 CO M
194 192 2021 MI FT Big Data Engineer 18000 USD 18000 MD 0 MD S
195 193 2021 SE FT Data Science Manager 174000 USD 174000 US 100 US L
196 194 2021 SE FT Research Scientist 120500 CAD 96113 CA 50 CA L
197 195 2021 MI FT Data Scientist 147000 USD 147000 US 50 US L
198 196 2021 EN FT BI Data Analyst 9272 USD 9272 KE 100 KE S
199 197 2021 SE FT Machine Learning Engineer 1799997 INR 24342 IN 100 IN L
200 198 2021 SE FT Data Science Manager 4000000 INR 54094 IN 50 US L
201 199 2021 EN FT Data Science Consultant 90000 USD 90000 US 100 US S
202 200 2021 MI FT Data Scientist 52000 EUR 61467 DE 50 AT M
203 201 2021 SE FT Machine Learning Infrastructure Engineer 195000 USD 195000 US 100 US M
204 202 2021 MI FT Data Scientist 32000 EUR 37825 ES 100 ES L
205 203 2021 SE FT Research Scientist 50000 USD 50000 FR 100 US S
206 204 2021 MI FT Data Scientist 160000 USD 160000 US 100 US L
207 205 2021 MI FT Data Scientist 69600 BRL 12901 BR 0 BR S
208 206 2021 SE FT Machine Learning Engineer 200000 USD 200000 US 100 US L
209 207 2021 SE FT Data Engineer 165000 USD 165000 US 0 US M
210 208 2021 MI FL Data Engineer 20000 USD 20000 IT 0 US L
211 209 2021 SE FT Data Analytics Manager 120000 USD 120000 US 0 US L
212 210 2021 MI FT Machine Learning Engineer 21000 EUR 24823 SI 50 SI L
213 211 2021 MI FT Research Scientist 48000 EUR 56738 FR 50 FR S
214 212 2021 MI FT Data Engineer 48000 GBP 66022 HK 50 GB S
215 213 2021 EN FT Big Data Engineer 435000 INR 5882 IN 0 CH L
216 214 2021 EN FT Machine Learning Engineer 21000 EUR 24823 DE 50 DE M
217 215 2021 SE FT Principal Data Engineer 185000 USD 185000 US 100 US L
218 216 2021 EN PT Computer Vision Engineer 180000 DKK 28609 DK 50 DK S
219 217 2021 MI FT Data Scientist 76760 EUR 90734 DE 50 DE L
220 218 2021 MI FT Machine Learning Engineer 75000 EUR 88654 BE 100 BE M
221 219 2021 SE FT Data Analytics Manager 140000 USD 140000 US 100 US L
222 220 2021 MI FT Machine Learning Engineer 180000 PLN 46597 PL 100 PL L
223 221 2021 MI FT Data Scientist 85000 GBP 116914 GB 50 GB L
224 222 2021 MI FT Data Scientist 2500000 INR 33808 IN 0 IN M
225 223 2021 MI FT Data Scientist 40900 GBP 56256 GB 50 GB L
226 224 2021 SE FT Machine Learning Scientist 225000 USD 225000 US 100 CA L
227 225 2021 EX CT Principal Data Scientist 416000 USD 416000 US 100 US S
228 226 2021 SE FT Data Scientist 110000 CAD 87738 CA 100 CA S
229 227 2021 MI FT Data Scientist 75000 EUR 88654 DE 50 DE L
230 228 2021 SE FT Data Scientist 135000 USD 135000 US 0 US L
231 229 2021 SE FT Data Analyst 90000 CAD 71786 CA 100 CA M
232 230 2021 EN FT Big Data Engineer 1200000 INR 16228 IN 100 IN L
233 231 2021 SE FT ML Engineer 256000 USD 256000 US 100 US S
234 232 2021 SE FT Director of Data Engineering 200000 USD 200000 US 100 US L
235 233 2021 SE FT Data Analyst 200000 USD 200000 US 100 US L
236 234 2021 MI FT Data Architect 180000 USD 180000 US 100 US L
237 235 2021 MI FT Head of Data Science 110000 USD 110000 US 0 US S
238 236 2021 MI FT Research Scientist 80000 CAD 63810 CA 100 CA M
239 237 2021 MI FT Data Scientist 39600 EUR 46809 ES 100 ES M
240 238 2021 EN FT Data Scientist 4000 USD 4000 VN 0 VN M
241 239 2021 EN FT Data Engineer 1600000 INR 21637 IN 50 IN M
242 240 2021 SE FT Data Scientist 130000 CAD 103691 CA 100 CA L
243 241 2021 MI FT Data Analyst 80000 USD 80000 US 100 US L
244 242 2021 MI FT Data Engineer 110000 USD 110000 US 100 US L
245 243 2021 SE FT Data Scientist 165000 USD 165000 US 100 US L
246 244 2021 EN FT AI Scientist 1335000 INR 18053 IN 100 AS S
247 245 2021 MI FT Data Engineer 52500 GBP 72212 GB 50 GB L
248 246 2021 EN FT Data Scientist 31000 EUR 36643 FR 50 FR L
249 247 2021 MI FT Data Engineer 108000 TRY 12103 TR 0 TR M
250 248 2021 SE FT Data Engineer 70000 GBP 96282 GB 50 GB L
251 249 2021 SE FT Principal Data Analyst 170000 USD 170000 US 100 US M
252 250 2021 MI FT Data Scientist 115000 USD 115000 US 50 US L
253 251 2021 EN FT Data Scientist 90000 USD 90000 US 100 US S
254 252 2021 EX FT Principal Data Engineer 600000 USD 600000 US 100 US L
255 253 2021 EN FT Data Scientist 2100000 INR 28399 IN 100 IN M
256 254 2021 MI FT Data Analyst 93000 USD 93000 US 100 US L
257 255 2021 SE FT Big Data Architect 125000 CAD 99703 CA 50 CA M
258 256 2021 MI FT Data Engineer 200000 USD 200000 US 100 US L
259 257 2021 SE FT Principal Data Scientist 147000 EUR 173762 DE 100 DE M
260 258 2021 SE FT Machine Learning Engineer 185000 USD 185000 US 50 US L
261 259 2021 EX FT Director of Data Science 120000 EUR 141846 DE 0 DE L
262 260 2021 MI FT Data Scientist 130000 USD 130000 US 50 US L
263 261 2021 SE FT Data Analyst 54000 EUR 63831 DE 50 DE L
264 262 2021 MI FT Data Scientist 1250000 INR 16904 IN 100 IN S
265 263 2021 SE FT Machine Learning Engineer 4900000 INR 66265 IN 0 IN L
266 264 2021 MI FT Data Scientist 21600 EUR 25532 RS 100 DE S
267 265 2021 SE FT Lead Data Engineer 160000 USD 160000 PR 50 US S
268 266 2021 MI FT Data Engineer 93150 USD 93150 US 0 US M
269 267 2021 MI FT Data Engineer 111775 USD 111775 US 0 US M
270 268 2021 MI FT Data Engineer 250000 TRY 28016 TR 100 TR M
271 269 2021 EN FT Data Engineer 55000 EUR 65013 DE 50 DE M
272 270 2021 EN FT Data Engineer 72500 USD 72500 US 100 US L
273 271 2021 SE FT Computer Vision Engineer 102000 BRL 18907 BR 0 BR M
274 272 2021 EN FT Data Science Consultant 65000 EUR 76833 DE 0 DE L
275 273 2021 EN FT Machine Learning Engineer 85000 USD 85000 NL 100 DE S
276 274 2021 SE FT Data Scientist 65720 EUR 77684 FR 50 FR M
277 275 2021 EN FT Data Scientist 100000 USD 100000 US 100 US M
278 276 2021 EN FT Data Scientist 58000 USD 58000 US 50 US L
279 277 2021 SE FT AI Scientist 55000 USD 55000 ES 100 ES L
280 278 2021 SE FT Data Scientist 180000 TRY 20171 TR 50 TR L
281 279 2021 EN FT Business Data Analyst 50000 EUR 59102 LU 100 LU L
282 280 2021 MI FT Data Engineer 112000 USD 112000 US 100 US L
283 281 2021 EN FT Research Scientist 100000 USD 100000 JE 0 CN L
284 282 2021 MI PT Data Engineer 59000 EUR 69741 NL 100 NL L
285 283 2021 SE CT Staff Data Scientist 105000 USD 105000 US 100 US M
286 284 2021 MI FT Research Scientist 69999 USD 69999 CZ 50 CZ L
287 285 2021 SE FT Data Science Manager 7000000 INR 94665 IN 50 IN L
288 286 2021 SE FT Head of Data 87000 EUR 102839 SI 100 SI L
289 287 2021 MI FT Data Scientist 109000 USD 109000 US 50 US L
290 288 2021 MI FT Machine Learning Engineer 43200 EUR 51064 IT 50 IT L
291 289 2022 SE FT Data Engineer 135000 USD 135000 US 100 US M
292 290 2022 SE FT Data Analyst 155000 USD 155000 US 100 US M
293 291 2022 SE FT Data Analyst 120600 USD 120600 US 100 US M
294 292 2022 MI FT Data Scientist 130000 USD 130000 US 0 US M
295 293 2022 MI FT Data Scientist 90000 USD 90000 US 0 US M
296 294 2022 MI FT Data Engineer 170000 USD 170000 US 100 US M
297 295 2022 MI FT Data Engineer 150000 USD 150000 US 100 US M
298 296 2022 SE FT Data Analyst 102100 USD 102100 US 100 US M
299 297 2022 SE FT Data Analyst 84900 USD 84900 US 100 US M
300 298 2022 SE FT Data Scientist 136620 USD 136620 US 100 US M
301 299 2022 SE FT Data Scientist 99360 USD 99360 US 100 US M
302 300 2022 SE FT Data Scientist 90000 GBP 117789 GB 0 GB M
303 301 2022 SE FT Data Scientist 80000 GBP 104702 GB 0 GB M
304 302 2022 SE FT Data Scientist 146000 USD 146000 US 100 US M
305 303 2022 SE FT Data Scientist 123000 USD 123000 US 100 US M
306 304 2022 EN FT Data Engineer 40000 GBP 52351 GB 100 GB M
307 305 2022 SE FT Data Analyst 99000 USD 99000 US 0 US M
308 306 2022 SE FT Data Analyst 116000 USD 116000 US 0 US M
309 307 2022 MI FT Data Analyst 106260 USD 106260 US 0 US M
310 308 2022 MI FT Data Analyst 126500 USD 126500 US 0 US M
311 309 2022 EX FT Data Engineer 242000 USD 242000 US 100 US M
312 310 2022 EX FT Data Engineer 200000 USD 200000 US 100 US M
313 311 2022 MI FT Data Scientist 50000 GBP 65438 GB 0 GB M
314 312 2022 MI FT Data Scientist 30000 GBP 39263 GB 0 GB M
315 313 2022 MI FT Data Engineer 60000 GBP 78526 GB 0 GB M
316 314 2022 MI FT Data Engineer 40000 GBP 52351 GB 0 GB M
317 315 2022 SE FT Data Scientist 165220 USD 165220 US 100 US M
318 316 2022 EN FT Data Engineer 35000 GBP 45807 GB 100 GB M
319 317 2022 SE FT Data Scientist 120160 USD 120160 US 100 US M
320 318 2022 SE FT Data Analyst 90320 USD 90320 US 100 US M
321 319 2022 SE FT Data Engineer 181940 USD 181940 US 0 US M
322 320 2022 SE FT Data Engineer 132320 USD 132320 US 0 US M
323 321 2022 SE FT Data Engineer 220110 USD 220110 US 0 US M
324 322 2022 SE FT Data Engineer 160080 USD 160080 US 0 US M
325 323 2022 SE FT Data Scientist 180000 USD 180000 US 0 US L
326 324 2022 SE FT Data Scientist 120000 USD 120000 US 0 US L
327 325 2022 SE FT Data Analyst 124190 USD 124190 US 100 US M
328 326 2022 EX FT Data Analyst 130000 USD 130000 US 100 US M
329 327 2022 EX FT Data Analyst 110000 USD 110000 US 100 US M
330 328 2022 SE FT Data Analyst 170000 USD 170000 US 100 US M
331 329 2022 MI FT Data Analyst 115500 USD 115500 US 100 US M
332 330 2022 SE FT Data Analyst 112900 USD 112900 US 100 US M
333 331 2022 SE FT Data Analyst 90320 USD 90320 US 100 US M
334 332 2022 SE FT Data Analyst 112900 USD 112900 US 100 US M
335 333 2022 SE FT Data Analyst 90320 USD 90320 US 100 US M
336 334 2022 SE FT Data Engineer 165400 USD 165400 US 100 US M
337 335 2022 SE FT Data Engineer 132320 USD 132320 US 100 US M
338 336 2022 MI FT Data Analyst 167000 USD 167000 US 100 US M
339 337 2022 SE FT Data Engineer 243900 USD 243900 US 100 US M
340 338 2022 SE FT Data Analyst 136600 USD 136600 US 100 US M
341 339 2022 SE FT Data Analyst 109280 USD 109280 US 100 US M
342 340 2022 SE FT Data Engineer 128875 USD 128875 US 100 US M
343 341 2022 SE FT Data Engineer 93700 USD 93700 US 100 US M
344 342 2022 EX FT Head of Data Science 224000 USD 224000 US 100 US M
345 343 2022 EX FT Head of Data Science 167875 USD 167875 US 100 US M
346 344 2022 EX FT Analytics Engineer 175000 USD 175000 US 100 US M
347 345 2022 SE FT Data Engineer 156600 USD 156600 US 100 US M
348 346 2022 SE FT Data Engineer 108800 USD 108800 US 0 US M
349 347 2022 SE FT Data Scientist 95550 USD 95550 US 0 US M
350 348 2022 SE FT Data Engineer 113000 USD 113000 US 0 US L
351 349 2022 SE FT Data Analyst 135000 USD 135000 US 100 US M
352 350 2022 SE FT Data Science Manager 161342 USD 161342 US 100 US M
353 351 2022 SE FT Data Science Manager 137141 USD 137141 US 100 US M
354 352 2022 SE FT Data Scientist 167000 USD 167000 US 100 US M
355 353 2022 SE FT Data Scientist 123000 USD 123000 US 100 US M
356 354 2022 SE FT Data Engineer 60000 GBP 78526 GB 0 GB M
357 355 2022 SE FT Data Engineer 50000 GBP 65438 GB 0 GB M
358 356 2022 SE FT Data Scientist 150000 USD 150000 US 0 US M
359 357 2022 SE FT Data Scientist 211500 USD 211500 US 100 US M
360 358 2022 SE FT Data Architect 192400 USD 192400 CA 100 CA M
361 359 2022 SE FT Data Architect 90700 USD 90700 CA 100 CA M
362 360 2022 SE FT Data Analyst 130000 USD 130000 CA 100 CA M
363 361 2022 SE FT Data Analyst 61300 USD 61300 CA 100 CA M
364 362 2022 SE FT Data Analyst 130000 USD 130000 CA 100 CA M
365 363 2022 SE FT Data Analyst 61300 USD 61300 CA 100 CA M
366 364 2022 SE FT Data Engineer 160000 USD 160000 US 0 US L
367 365 2022 SE FT Data Scientist 138600 USD 138600 US 100 US M
368 366 2022 SE FT Data Engineer 136000 USD 136000 US 0 US M
369 367 2022 MI FT Data Analyst 58000 USD 58000 US 0 US S
370 368 2022 EX FT Analytics Engineer 135000 USD 135000 US 100 US M
371 369 2022 SE FT Data Scientist 170000 USD 170000 US 100 US M
372 370 2022 SE FT Data Scientist 123000 USD 123000 US 100 US M
373 371 2022 SE FT Machine Learning Engineer 189650 USD 189650 US 0 US M
374 372 2022 SE FT Machine Learning Engineer 164996 USD 164996 US 0 US M
375 373 2022 MI FT ETL Developer 50000 EUR 54957 GR 0 GR M
376 374 2022 MI FT ETL Developer 50000 EUR 54957 GR 0 GR M
377 375 2022 EX FT Lead Data Engineer 150000 CAD 118187 CA 100 CA S
378 376 2022 SE FT Data Analyst 132000 USD 132000 US 0 US M
379 377 2022 SE FT Data Engineer 165400 USD 165400 US 100 US M
380 378 2022 SE FT Data Architect 208775 USD 208775 US 100 US M
381 379 2022 SE FT Data Architect 147800 USD 147800 US 100 US M
382 380 2022 SE FT Data Engineer 136994 USD 136994 US 100 US M
383 381 2022 SE FT Data Engineer 101570 USD 101570 US 100 US M
384 382 2022 SE FT Data Analyst 128875 USD 128875 US 100 US M
385 383 2022 SE FT Data Analyst 93700 USD 93700 US 100 US M
386 384 2022 EX FT Head of Machine Learning 6000000 INR 79039 IN 50 IN L
387 385 2022 SE FT Data Engineer 132320 USD 132320 US 100 US M
388 386 2022 EN FT Machine Learning Engineer 28500 GBP 37300 GB 100 GB L
389 387 2022 SE FT Data Analyst 164000 USD 164000 US 0 US M
390 388 2022 SE FT Data Engineer 155000 USD 155000 US 100 US M
391 389 2022 MI FT Machine Learning Engineer 95000 GBP 124333 GB 0 GB M
392 390 2022 MI FT Machine Learning Engineer 75000 GBP 98158 GB 0 GB M
393 391 2022 MI FT AI Scientist 120000 USD 120000 US 0 US M
394 392 2022 SE FT Data Analyst 112900 USD 112900 US 100 US M
395 393 2022 SE FT Data Analyst 90320 USD 90320 US 100 US M
396 394 2022 SE FT Data Analytics Manager 145000 USD 145000 US 100 US M
397 395 2022 SE FT Data Analytics Manager 105400 USD 105400 US 100 US M
398 396 2022 MI FT Machine Learning Engineer 80000 EUR 87932 FR 100 DE M
399 397 2022 MI FT Data Engineer 90000 GBP 117789 GB 0 GB M
400 398 2022 SE FT Data Scientist 215300 USD 215300 US 100 US L
401 399 2022 SE FT Data Scientist 158200 USD 158200 US 100 US L
402 400 2022 SE FT Data Engineer 209100 USD 209100 US 100 US L
403 401 2022 SE FT Data Engineer 154600 USD 154600 US 100 US L
404 402 2022 SE FT Data Analyst 115934 USD 115934 US 0 US M
405 403 2022 SE FT Data Analyst 81666 USD 81666 US 0 US M
406 404 2022 SE FT Data Engineer 175000 USD 175000 US 100 US M
407 405 2022 MI FT Data Engineer 75000 GBP 98158 GB 0 GB M
408 406 2022 MI FT Data Analyst 58000 USD 58000 US 0 US S
409 407 2022 SE FT Data Engineer 183600 USD 183600 US 100 US L
410 408 2022 MI FT Data Analyst 40000 GBP 52351 GB 100 GB M
411 409 2022 SE FT Data Scientist 180000 USD 180000 US 100 US M
412 410 2022 MI FT Data Scientist 55000 GBP 71982 GB 0 GB M
413 411 2022 MI FT Data Scientist 35000 GBP 45807 GB 0 GB M
414 412 2022 MI FT Data Engineer 60000 EUR 65949 GR 100 GR M
415 413 2022 MI FT Data Engineer 45000 EUR 49461 GR 100 GR M
416 414 2022 MI FT Data Engineer 60000 GBP 78526 GB 100 GB M
417 415 2022 MI FT Data Engineer 45000 GBP 58894 GB 100 GB M
418 416 2022 SE FT Data Scientist 260000 USD 260000 US 100 US M
419 417 2022 SE FT Data Science Engineer 60000 USD 60000 AR 100 MX L
420 418 2022 MI FT Data Engineer 63900 USD 63900 US 0 US M
421 419 2022 MI FT Machine Learning Scientist 160000 USD 160000 US 100 US L
422 420 2022 MI FT Machine Learning Scientist 112300 USD 112300 US 100 US L
423 421 2022 MI FT Data Science Manager 241000 USD 241000 US 100 US M
424 422 2022 MI FT Data Science Manager 159000 USD 159000 US 100 US M
425 423 2022 SE FT Data Scientist 180000 USD 180000 US 0 US M
426 424 2022 SE FT Data Scientist 80000 USD 80000 US 0 US M
427 425 2022 MI FT Data Engineer 82900 USD 82900 US 0 US M
428 426 2022 SE FT Data Engineer 100800 USD 100800 US 100 US L
429 427 2022 MI FT Data Engineer 45000 EUR 49461 ES 100 ES M
430 428 2022 SE FT Data Scientist 140400 USD 140400 US 0 US L
431 429 2022 MI FT Data Analyst 30000 GBP 39263 GB 100 GB M
432 430 2022 MI FT Data Analyst 40000 EUR 43966 ES 100 ES M
433 431 2022 MI FT Data Analyst 30000 EUR 32974 ES 100 ES M
434 432 2022 MI FT Data Engineer 80000 EUR 87932 ES 100 ES M
435 433 2022 MI FT Data Engineer 70000 EUR 76940 ES 100 ES M
436 434 2022 MI FT Data Engineer 80000 GBP 104702 GB 100 GB M
437 435 2022 MI FT Data Engineer 70000 GBP 91614 GB 100 GB M
438 436 2022 MI FT Data Engineer 60000 EUR 65949 ES 100 ES M
439 437 2022 MI FT Data Engineer 80000 EUR 87932 GR 100 GR M
440 438 2022 SE FT Machine Learning Engineer 189650 USD 189650 US 0 US M
441 439 2022 SE FT Machine Learning Engineer 164996 USD 164996 US 0 US M
442 440 2022 MI FT Data Analyst 40000 EUR 43966 GR 100 GR M
443 441 2022 MI FT Data Analyst 30000 EUR 32974 GR 100 GR M
444 442 2022 MI FT Data Engineer 75000 GBP 98158 GB 100 GB M
445 443 2022 MI FT Data Engineer 60000 GBP 78526 GB 100 GB M
446 444 2022 SE FT Data Scientist 215300 USD 215300 US 0 US L
447 445 2022 MI FT Data Engineer 70000 EUR 76940 GR 100 GR M
448 446 2022 SE FT Data Engineer 209100 USD 209100 US 100 US L
449 447 2022 SE FT Data Engineer 154600 USD 154600 US 100 US L
450 448 2022 SE FT Data Engineer 180000 USD 180000 US 100 US M
451 449 2022 EN FT ML Engineer 20000 EUR 21983 PT 100 PT L
452 450 2022 SE FT Data Engineer 80000 USD 80000 US 100 US M
453 451 2022 MI FT Machine Learning Developer 100000 CAD 78791 CA 100 CA M
454 452 2022 EX FT Director of Data Science 250000 CAD 196979 CA 50 CA L
455 453 2022 MI FT Machine Learning Engineer 120000 USD 120000 US 100 US S
456 454 2022 EN FT Computer Vision Engineer 125000 USD 125000 US 0 US M
457 455 2022 MI FT NLP Engineer 240000 CNY 37236 US 50 US L
458 456 2022 SE FT Data Engineer 105000 USD 105000 US 100 US M
459 457 2022 SE FT Lead Machine Learning Engineer 80000 EUR 87932 DE 0 DE M
460 458 2022 MI FT Business Data Analyst 1400000 INR 18442 IN 100 IN M
461 459 2022 MI FT Data Scientist 2400000 INR 31615 IN 100 IN L
462 460 2022 MI FT Machine Learning Infrastructure Engineer 53000 EUR 58255 PT 50 PT L
463 461 2022 EN FT Financial Data Analyst 100000 USD 100000 US 50 US L
464 462 2022 MI PT Data Engineer 50000 EUR 54957 DE 50 DE L
465 463 2022 EN FT Data Scientist 1400000 INR 18442 IN 100 IN M
466 464 2022 SE FT Principal Data Scientist 148000 EUR 162674 DE 100 DE M
467 465 2022 EN FT Data Engineer 120000 USD 120000 US 100 US M
468 466 2022 SE FT Research Scientist 144000 USD 144000 US 50 US L
469 467 2022 SE FT Data Scientist 104890 USD 104890 US 100 US M
470 468 2022 SE FT Data Engineer 100000 USD 100000 US 100 US M
471 469 2022 SE FT Data Scientist 140000 USD 140000 US 100 US M
472 470 2022 MI FT Data Analyst 135000 USD 135000 US 100 US M
473 471 2022 MI FT Data Analyst 50000 USD 50000 US 100 US M
474 472 2022 SE FT Data Scientist 220000 USD 220000 US 100 US M
475 473 2022 SE FT Data Scientist 140000 USD 140000 US 100 US M
476 474 2022 MI FT Data Scientist 140000 GBP 183228 GB 0 GB M
477 475 2022 MI FT Data Scientist 70000 GBP 91614 GB 0 GB M
478 476 2022 SE FT Data Scientist 185100 USD 185100 US 100 US M
479 477 2022 SE FT Machine Learning Engineer 220000 USD 220000 US 100 US M
480 478 2022 MI FT Data Scientist 200000 USD 200000 US 100 US M
481 479 2022 MI FT Data Scientist 120000 USD 120000 US 100 US M
482 480 2022 SE FT Machine Learning Engineer 120000 USD 120000 AE 100 AE S
483 481 2022 SE FT Machine Learning Engineer 65000 USD 65000 AE 100 AE S
484 482 2022 EX FT Data Engineer 324000 USD 324000 US 100 US M
485 483 2022 EX FT Data Engineer 216000 USD 216000 US 100 US M
486 484 2022 SE FT Data Engineer 210000 USD 210000 US 100 US M
487 485 2022 SE FT Machine Learning Engineer 120000 USD 120000 US 100 US M
488 486 2022 SE FT Data Scientist 230000 USD 230000 US 100 US M
489 487 2022 EN PT Data Scientist 100000 USD 100000 DZ 50 DZ M
490 488 2022 MI FL Data Scientist 100000 USD 100000 CA 100 US M
491 489 2022 EN CT Applied Machine Learning Scientist 29000 EUR 31875 TN 100 CZ M
492 490 2022 SE FT Head of Data 200000 USD 200000 MY 100 US M
493 491 2022 MI FT Principal Data Analyst 75000 USD 75000 CA 100 CA S
494 492 2022 MI FT Data Scientist 150000 PLN 35590 PL 100 PL L
495 493 2022 SE FT Machine Learning Developer 100000 CAD 78791 CA 100 CA M
496 494 2022 SE FT Data Scientist 100000 USD 100000 BR 100 US M
497 495 2022 MI FT Machine Learning Scientist 153000 USD 153000 US 50 US M
498 496 2022 EN FT Data Engineer 52800 EUR 58035 PK 100 DE M
499 497 2022 SE FT Data Scientist 165000 USD 165000 US 100 US M
500 498 2022 SE FT Research Scientist 85000 EUR 93427 FR 50 FR L
501 499 2022 EN FT Data Scientist 66500 CAD 52396 CA 100 CA L
502 500 2022 SE FT Machine Learning Engineer 57000 EUR 62651 NL 100 NL L
503 501 2022 MI FT Head of Data 30000 EUR 32974 EE 100 EE S
504 502 2022 EN FT Data Scientist 40000 USD 40000 JP 100 MY L
505 503 2022 MI FT Machine Learning Engineer 121000 AUD 87425 AU 100 AU L
506 504 2022 SE FT Data Engineer 115000 USD 115000 US 100 US M
507 505 2022 EN FT Data Scientist 120000 AUD 86703 AU 50 AU M
508 506 2022 MI FT Applied Machine Learning Scientist 75000 USD 75000 BO 100 US L
509 507 2022 MI FT Research Scientist 59000 EUR 64849 AT 0 AT L
510 508 2022 EN FT Research Scientist 120000 USD 120000 US 100 US L
511 509 2022 MI FT Applied Data Scientist 157000 USD 157000 US 100 US L
512 510 2022 EN FT Computer Vision Software Engineer 150000 USD 150000 AU 100 AU S
513 511 2022 MI FT Business Data Analyst 90000 CAD 70912 CA 50 CA L
514 512 2022 EN FT Data Engineer 65000 USD 65000 US 100 US S
515 513 2022 SE FT Machine Learning Engineer 65000 EUR 71444 IE 100 IE S
516 514 2022 EN FT Data Analytics Engineer 20000 USD 20000 PK 0 PK M
517 515 2022 MI FT Data Scientist 48000 USD 48000 RU 100 US S
518 516 2022 SE FT Data Science Manager 152500 USD 152500 US 100 US M
519 517 2022 MI FT Data Engineer 62000 EUR 68147 FR 100 FR M
520 518 2022 MI FT Data Scientist 115000 CHF 122346 CH 0 CH L
521 519 2022 SE FT Applied Data Scientist 380000 USD 380000 US 100 US L
522 520 2022 MI FT Data Scientist 88000 CAD 69336 CA 100 CA M
523 521 2022 EN FT Computer Vision Engineer 10000 USD 10000 PT 100 LU M
524 522 2022 MI FT Data Analyst 20000 USD 20000 GR 100 GR S
525 523 2022 SE FT Data Analytics Lead 405000 USD 405000 US 100 US L
526 524 2022 MI FT Data Scientist 135000 USD 135000 US 100 US L
527 525 2022 SE FT Applied Data Scientist 177000 USD 177000 US 100 US L
528 526 2022 MI FT Data Scientist 78000 USD 78000 US 100 US M
529 527 2022 SE FT Data Analyst 135000 USD 135000 US 100 US M
530 528 2022 SE FT Data Analyst 100000 USD 100000 US 100 US M
531 529 2022 SE FT Data Analyst 90320 USD 90320 US 100 US M
532 530 2022 MI FT Data Analyst 85000 USD 85000 CA 0 CA M
533 531 2022 MI FT Data Analyst 75000 USD 75000 CA 0 CA M
534 532 2022 SE FT Machine Learning Engineer 214000 USD 214000 US 100 US M
535 533 2022 SE FT Machine Learning Engineer 192600 USD 192600 US 100 US M
536 534 2022 SE FT Data Architect 266400 USD 266400 US 100 US M
537 535 2022 SE FT Data Architect 213120 USD 213120 US 100 US M
538 536 2022 SE FT Data Analyst 112900 USD 112900 US 100 US M
539 537 2022 SE FT Data Engineer 155000 USD 155000 US 100 US M
540 538 2022 MI FT Data Scientist 141300 USD 141300 US 0 US M
541 539 2022 MI FT Data Scientist 102100 USD 102100 US 0 US M
542 540 2022 SE FT Data Analyst 115934 USD 115934 US 100 US M
543 541 2022 SE FT Data Analyst 81666 USD 81666 US 100 US M
544 542 2022 MI FT Data Engineer 206699 USD 206699 US 0 US M
545 543 2022 MI FT Data Engineer 99100 USD 99100 US 0 US M
546 544 2022 SE FT Data Engineer 130000 USD 130000 US 100 US M
547 545 2022 SE FT Data Engineer 115000 USD 115000 US 100 US M
548 546 2022 SE FT Data Engineer 110500 USD 110500 US 100 US M
549 547 2022 SE FT Data Engineer 130000 USD 130000 US 100 US M
550 548 2022 SE FT Data Analyst 99050 USD 99050 US 100 US M
551 549 2022 SE FT Data Engineer 160000 USD 160000 US 100 US M
552 550 2022 SE FT Data Scientist 205300 USD 205300 US 0 US L
553 551 2022 SE FT Data Scientist 140400 USD 140400 US 0 US L
554 552 2022 SE FT Data Scientist 176000 USD 176000 US 100 US M
555 553 2022 SE FT Data Scientist 144000 USD 144000 US 100 US M
556 554 2022 SE FT Data Engineer 200100 USD 200100 US 100 US M
557 555 2022 SE FT Data Engineer 160000 USD 160000 US 100 US M
558 556 2022 SE FT Data Engineer 145000 USD 145000 US 100 US M
559 557 2022 SE FT Data Engineer 70500 USD 70500 US 0 US M
560 558 2022 SE FT Data Scientist 205300 USD 205300 US 0 US M
561 559 2022 SE FT Data Scientist 140400 USD 140400 US 0 US M
562 560 2022 SE FT Analytics Engineer 205300 USD 205300 US 0 US M
563 561 2022 SE FT Analytics Engineer 184700 USD 184700 US 0 US M
564 562 2022 SE FT Data Engineer 175100 USD 175100 US 100 US M
565 563 2022 SE FT Data Engineer 140250 USD 140250 US 100 US M
566 564 2022 SE FT Data Analyst 116150 USD 116150 US 100 US M
567 565 2022 SE FT Data Engineer 54000 USD 54000 US 0 US M
568 566 2022 SE FT Data Analyst 170000 USD 170000 US 100 US M
569 567 2022 MI FT Data Analyst 50000 GBP 65438 GB 0 GB M
570 568 2022 SE FT Data Analyst 80000 USD 80000 US 100 US M
571 569 2022 SE FT Data Scientist 140000 USD 140000 US 100 US M
572 570 2022 SE FT Data Scientist 210000 USD 210000 US 100 US M
573 571 2022 SE FT Data Scientist 140000 USD 140000 US 100 US M
574 572 2022 SE FT Data Analyst 100000 USD 100000 US 100 US M
575 573 2022 SE FT Data Analyst 69000 USD 69000 US 100 US M
576 574 2022 SE FT Data Scientist 210000 USD 210000 US 100 US M
577 575 2022 SE FT Data Scientist 140000 USD 140000 US 100 US M
578 576 2022 SE FT Data Scientist 210000 USD 210000 US 100 US M
579 577 2022 SE FT Data Analyst 150075 USD 150075 US 100 US M
580 578 2022 SE FT Data Engineer 100000 USD 100000 US 100 US M
581 579 2022 SE FT Data Engineer 25000 USD 25000 US 100 US M
582 580 2022 SE FT Data Analyst 126500 USD 126500 US 100 US M
583 581 2022 SE FT Data Analyst 106260 USD 106260 US 100 US M
584 582 2022 SE FT Data Engineer 220110 USD 220110 US 100 US M
585 583 2022 SE FT Data Engineer 160080 USD 160080 US 100 US M
586 584 2022 SE FT Data Analyst 105000 USD 105000 US 100 US M
587 585 2022 SE FT Data Analyst 110925 USD 110925 US 100 US M
588 586 2022 MI FT Data Analyst 35000 GBP 45807 GB 0 GB M
589 587 2022 SE FT Data Scientist 140000 USD 140000 US 100 US M
590 588 2022 SE FT Data Analyst 99000 USD 99000 US 0 US M
591 589 2022 SE FT Data Analyst 60000 USD 60000 US 100 US M
592 590 2022 SE FT Data Architect 192564 USD 192564 US 100 US M
593 591 2022 SE FT Data Architect 144854 USD 144854 US 100 US M
594 592 2022 SE FT Data Scientist 230000 USD 230000 US 100 US M
595 593 2022 SE FT Data Scientist 150000 USD 150000 US 100 US M
596 594 2022 SE FT Data Analytics Manager 150260 USD 150260 US 100 US M
597 595 2022 SE FT Data Analytics Manager 109280 USD 109280 US 100 US M
598 596 2022 SE FT Data Scientist 210000 USD 210000 US 100 US M
599 597 2022 SE FT Data Analyst 170000 USD 170000 US 100 US M
600 598 2022 MI FT Data Scientist 160000 USD 160000 US 100 US M
601 599 2022 MI FT Data Scientist 130000 USD 130000 US 100 US M
602 600 2022 EN FT Data Analyst 67000 USD 67000 CA 0 CA M
603 601 2022 EN FT Data Analyst 52000 USD 52000 CA 0 CA M
604 602 2022 SE FT Data Engineer 154000 USD 154000 US 100 US M
605 603 2022 SE FT Data Engineer 126000 USD 126000 US 100 US M
606 604 2022 SE FT Data Analyst 129000 USD 129000 US 0 US M
607 605 2022 SE FT Data Analyst 150000 USD 150000 US 100 US M
608 606 2022 MI FT AI Scientist 200000 USD 200000 IN 100 US L
@@ -0,0 +1,4 @@
duration,floatDuration
12 min,1.0
15,12.98 sec
1 Season,0.9 parsec
1 duration floatDuration
2 12 min 1.0
3 15 12.98 sec
4 1 Season 0.9 parsec
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,151 @@
"sepal.length","sepal.width","petal.length","petal.width","variety"
5.1,3.5,1.4,.2,"Setosa"
4.9,3,1.4,.2,"Setosa"
4.7,3.2,1.3,.2,"Setosa"
4.6,3.1,1.5,.2,"Setosa"
5,3.6,1.4,.2,"Setosa"
5.4,3.9,1.7,.4,"Setosa"
4.6,3.4,1.4,.3,"Setosa"
5,3.4,1.5,.2,"Setosa"
4.4,2.9,1.4,.2,"Setosa"
4.9,3.1,1.5,.1,"Setosa"
5.4,3.7,1.5,.2,"Setosa"
4.8,3.4,1.6,.2,"Setosa"
4.8,3,1.4,.1,"Setosa"
4.3,3,1.1,.1,"Setosa"
5.8,4,1.2,.2,"Setosa"
5.7,4.4,1.5,.4,"Setosa"
5.4,3.9,1.3,.4,"Setosa"
5.1,3.5,1.4,.3,"Setosa"
5.7,3.8,1.7,.3,"Setosa"
5.1,3.8,1.5,.3,"Setosa"
5.4,3.4,1.7,.2,"Setosa"
5.1,3.7,1.5,.4,"Setosa"
4.6,3.6,1,.2,"Setosa"
5.1,3.3,1.7,.5,"Setosa"
4.8,3.4,1.9,.2,"Setosa"
5,3,1.6,.2,"Setosa"
5,3.4,1.6,.4,"Setosa"
5.2,3.5,1.5,.2,"Setosa"
5.2,3.4,1.4,.2,"Setosa"
4.7,3.2,1.6,.2,"Setosa"
4.8,3.1,1.6,.2,"Setosa"
5.4,3.4,1.5,.4,"Setosa"
5.2,4.1,1.5,.1,"Setosa"
5.5,4.2,1.4,.2,"Setosa"
4.9,3.1,1.5,.2,"Setosa"
5,3.2,1.2,.2,"Setosa"
5.5,3.5,1.3,.2,"Setosa"
4.9,3.6,1.4,.1,"Setosa"
4.4,3,1.3,.2,"Setosa"
5.1,3.4,1.5,.2,"Setosa"
5,3.5,1.3,.3,"Setosa"
4.5,2.3,1.3,.3,"Setosa"
4.4,3.2,1.3,.2,"Setosa"
5,3.5,1.6,.6,"Setosa"
5.1,3.8,1.9,.4,"Setosa"
4.8,3,1.4,.3,"Setosa"
5.1,3.8,1.6,.2,"Setosa"
4.6,3.2,1.4,.2,"Setosa"
5.3,3.7,1.5,.2,"Setosa"
5,3.3,1.4,.2,"Setosa"
7,3.2,4.7,1.4,"Versicolor"
6.4,3.2,4.5,1.5,"Versicolor"
6.9,3.1,4.9,1.5,"Versicolor"
5.5,2.3,4,1.3,"Versicolor"
6.5,2.8,4.6,1.5,"Versicolor"
5.7,2.8,4.5,1.3,"Versicolor"
6.3,3.3,4.7,1.6,"Versicolor"
4.9,2.4,3.3,1,"Versicolor"
6.6,2.9,4.6,1.3,"Versicolor"
5.2,2.7,3.9,1.4,"Versicolor"
5,2,3.5,1,"Versicolor"
5.9,3,4.2,1.5,"Versicolor"
6,2.2,4,1,"Versicolor"
6.1,2.9,4.7,1.4,"Versicolor"
5.6,2.9,3.6,1.3,"Versicolor"
6.7,3.1,4.4,1.4,"Versicolor"
5.6,3,4.5,1.5,"Versicolor"
5.8,2.7,4.1,1,"Versicolor"
6.2,2.2,4.5,1.5,"Versicolor"
5.6,2.5,3.9,1.1,"Versicolor"
5.9,3.2,4.8,1.8,"Versicolor"
6.1,2.8,4,1.3,"Versicolor"
6.3,2.5,4.9,1.5,"Versicolor"
6.1,2.8,4.7,1.2,"Versicolor"
6.4,2.9,4.3,1.3,"Versicolor"
6.6,3,4.4,1.4,"Versicolor"
6.8,2.8,4.8,1.4,"Versicolor"
6.7,3,5,1.7,"Versicolor"
6,2.9,4.5,1.5,"Versicolor"
5.7,2.6,3.5,1,"Versicolor"
5.5,2.4,3.8,1.1,"Versicolor"
5.5,2.4,3.7,1,"Versicolor"
5.8,2.7,3.9,1.2,"Versicolor"
6,2.7,5.1,1.6,"Versicolor"
5.4,3,4.5,1.5,"Versicolor"
6,3.4,4.5,1.6,"Versicolor"
6.7,3.1,4.7,1.5,"Versicolor"
6.3,2.3,4.4,1.3,"Versicolor"
5.6,3,4.1,1.3,"Versicolor"
5.5,2.5,4,1.3,"Versicolor"
5.5,2.6,4.4,1.2,"Versicolor"
6.1,3,4.6,1.4,"Versicolor"
5.8,2.6,4,1.2,"Versicolor"
5,2.3,3.3,1,"Versicolor"
5.6,2.7,4.2,1.3,"Versicolor"
5.7,3,4.2,1.2,"Versicolor"
5.7,2.9,4.2,1.3,"Versicolor"
6.2,2.9,4.3,1.3,"Versicolor"
5.1,2.5,3,1.1,"Versicolor"
5.7,2.8,4.1,1.3,"Versicolor"
6.3,3.3,6,2.5,"Virginica"
5.8,2.7,5.1,1.9,"Virginica"
7.1,3,5.9,2.1,"Virginica"
6.3,2.9,5.6,1.8,"Virginica"
6.5,3,5.8,2.2,"Virginica"
7.6,3,6.6,2.1,"Virginica"
4.9,2.5,4.5,1.7,"Virginica"
7.3,2.9,6.3,1.8,"Virginica"
6.7,2.5,5.8,1.8,"Virginica"
7.2,3.6,6.1,2.5,"Virginica"
6.5,3.2,5.1,2,"Virginica"
6.4,2.7,5.3,1.9,"Virginica"
6.8,3,5.5,2.1,"Virginica"
5.7,2.5,5,2,"Virginica"
5.8,2.8,5.1,2.4,"Virginica"
6.4,3.2,5.3,2.3,"Virginica"
6.5,3,5.5,1.8,"Virginica"
7.7,3.8,6.7,2.2,"Virginica"
7.7,2.6,6.9,2.3,"Virginica"
6,2.2,5,1.5,"Virginica"
6.9,3.2,5.7,2.3,"Virginica"
5.6,2.8,4.9,2,"Virginica"
7.7,2.8,6.7,2,"Virginica"
6.3,2.7,4.9,1.8,"Virginica"
6.7,3.3,5.7,2.1,"Virginica"
7.2,3.2,6,1.8,"Virginica"
6.2,2.8,4.8,1.8,"Virginica"
6.1,3,4.9,1.8,"Virginica"
6.4,2.8,5.6,2.1,"Virginica"
7.2,3,5.8,1.6,"Virginica"
7.4,2.8,6.1,1.9,"Virginica"
7.9,3.8,6.4,2,"Virginica"
6.4,2.8,5.6,2.2,"Virginica"
6.3,2.8,5.1,1.5,"Virginica"
6.1,2.6,5.6,1.4,"Virginica"
7.7,3,6.1,2.3,"Virginica"
6.3,3.4,5.6,2.4,"Virginica"
6.4,3.1,5.5,1.8,"Virginica"
6,3,4.8,1.8,"Virginica"
6.9,3.1,5.4,2.1,"Virginica"
6.7,3.1,5.6,2.4,"Virginica"
6.9,3.1,5.1,2.3,"Virginica"
5.8,2.7,5.1,1.9,"Virginica"
6.8,3.2,5.9,2.3,"Virginica"
6.7,3.3,5.7,2.5,"Virginica"
6.7,3,5.2,2.3,"Virginica"
6.3,2.5,5,1.9,"Virginica"
6.5,3,5.2,2,"Virginica"
6.2,3.4,5.4,2.3,"Virginica"
5.9,3,5.1,1.8,"Virginica"
1 sepal.length sepal.width petal.length petal.width variety
2 5.1 3.5 1.4 .2 Setosa
3 4.9 3 1.4 .2 Setosa
4 4.7 3.2 1.3 .2 Setosa
5 4.6 3.1 1.5 .2 Setosa
6 5 3.6 1.4 .2 Setosa
7 5.4 3.9 1.7 .4 Setosa
8 4.6 3.4 1.4 .3 Setosa
9 5 3.4 1.5 .2 Setosa
10 4.4 2.9 1.4 .2 Setosa
11 4.9 3.1 1.5 .1 Setosa
12 5.4 3.7 1.5 .2 Setosa
13 4.8 3.4 1.6 .2 Setosa
14 4.8 3 1.4 .1 Setosa
15 4.3 3 1.1 .1 Setosa
16 5.8 4 1.2 .2 Setosa
17 5.7 4.4 1.5 .4 Setosa
18 5.4 3.9 1.3 .4 Setosa
19 5.1 3.5 1.4 .3 Setosa
20 5.7 3.8 1.7 .3 Setosa
21 5.1 3.8 1.5 .3 Setosa
22 5.4 3.4 1.7 .2 Setosa
23 5.1 3.7 1.5 .4 Setosa
24 4.6 3.6 1 .2 Setosa
25 5.1 3.3 1.7 .5 Setosa
26 4.8 3.4 1.9 .2 Setosa
27 5 3 1.6 .2 Setosa
28 5 3.4 1.6 .4 Setosa
29 5.2 3.5 1.5 .2 Setosa
30 5.2 3.4 1.4 .2 Setosa
31 4.7 3.2 1.6 .2 Setosa
32 4.8 3.1 1.6 .2 Setosa
33 5.4 3.4 1.5 .4 Setosa
34 5.2 4.1 1.5 .1 Setosa
35 5.5 4.2 1.4 .2 Setosa
36 4.9 3.1 1.5 .2 Setosa
37 5 3.2 1.2 .2 Setosa
38 5.5 3.5 1.3 .2 Setosa
39 4.9 3.6 1.4 .1 Setosa
40 4.4 3 1.3 .2 Setosa
41 5.1 3.4 1.5 .2 Setosa
42 5 3.5 1.3 .3 Setosa
43 4.5 2.3 1.3 .3 Setosa
44 4.4 3.2 1.3 .2 Setosa
45 5 3.5 1.6 .6 Setosa
46 5.1 3.8 1.9 .4 Setosa
47 4.8 3 1.4 .3 Setosa
48 5.1 3.8 1.6 .2 Setosa
49 4.6 3.2 1.4 .2 Setosa
50 5.3 3.7 1.5 .2 Setosa
51 5 3.3 1.4 .2 Setosa
52 7 3.2 4.7 1.4 Versicolor
53 6.4 3.2 4.5 1.5 Versicolor
54 6.9 3.1 4.9 1.5 Versicolor
55 5.5 2.3 4 1.3 Versicolor
56 6.5 2.8 4.6 1.5 Versicolor
57 5.7 2.8 4.5 1.3 Versicolor
58 6.3 3.3 4.7 1.6 Versicolor
59 4.9 2.4 3.3 1 Versicolor
60 6.6 2.9 4.6 1.3 Versicolor
61 5.2 2.7 3.9 1.4 Versicolor
62 5 2 3.5 1 Versicolor
63 5.9 3 4.2 1.5 Versicolor
64 6 2.2 4 1 Versicolor
65 6.1 2.9 4.7 1.4 Versicolor
66 5.6 2.9 3.6 1.3 Versicolor
67 6.7 3.1 4.4 1.4 Versicolor
68 5.6 3 4.5 1.5 Versicolor
69 5.8 2.7 4.1 1 Versicolor
70 6.2 2.2 4.5 1.5 Versicolor
71 5.6 2.5 3.9 1.1 Versicolor
72 5.9 3.2 4.8 1.8 Versicolor
73 6.1 2.8 4 1.3 Versicolor
74 6.3 2.5 4.9 1.5 Versicolor
75 6.1 2.8 4.7 1.2 Versicolor
76 6.4 2.9 4.3 1.3 Versicolor
77 6.6 3 4.4 1.4 Versicolor
78 6.8 2.8 4.8 1.4 Versicolor
79 6.7 3 5 1.7 Versicolor
80 6 2.9 4.5 1.5 Versicolor
81 5.7 2.6 3.5 1 Versicolor
82 5.5 2.4 3.8 1.1 Versicolor
83 5.5 2.4 3.7 1 Versicolor
84 5.8 2.7 3.9 1.2 Versicolor
85 6 2.7 5.1 1.6 Versicolor
86 5.4 3 4.5 1.5 Versicolor
87 6 3.4 4.5 1.6 Versicolor
88 6.7 3.1 4.7 1.5 Versicolor
89 6.3 2.3 4.4 1.3 Versicolor
90 5.6 3 4.1 1.3 Versicolor
91 5.5 2.5 4 1.3 Versicolor
92 5.5 2.6 4.4 1.2 Versicolor
93 6.1 3 4.6 1.4 Versicolor
94 5.8 2.6 4 1.2 Versicolor
95 5 2.3 3.3 1 Versicolor
96 5.6 2.7 4.2 1.3 Versicolor
97 5.7 3 4.2 1.2 Versicolor
98 5.7 2.9 4.2 1.3 Versicolor
99 6.2 2.9 4.3 1.3 Versicolor
100 5.1 2.5 3 1.1 Versicolor
101 5.7 2.8 4.1 1.3 Versicolor
102 6.3 3.3 6 2.5 Virginica
103 5.8 2.7 5.1 1.9 Virginica
104 7.1 3 5.9 2.1 Virginica
105 6.3 2.9 5.6 1.8 Virginica
106 6.5 3 5.8 2.2 Virginica
107 7.6 3 6.6 2.1 Virginica
108 4.9 2.5 4.5 1.7 Virginica
109 7.3 2.9 6.3 1.8 Virginica
110 6.7 2.5 5.8 1.8 Virginica
111 7.2 3.6 6.1 2.5 Virginica
112 6.5 3.2 5.1 2 Virginica
113 6.4 2.7 5.3 1.9 Virginica
114 6.8 3 5.5 2.1 Virginica
115 5.7 2.5 5 2 Virginica
116 5.8 2.8 5.1 2.4 Virginica
117 6.4 3.2 5.3 2.3 Virginica
118 6.5 3 5.5 1.8 Virginica
119 7.7 3.8 6.7 2.2 Virginica
120 7.7 2.6 6.9 2.3 Virginica
121 6 2.2 5 1.5 Virginica
122 6.9 3.2 5.7 2.3 Virginica
123 5.6 2.8 4.9 2 Virginica
124 7.7 2.8 6.7 2 Virginica
125 6.3 2.7 4.9 1.8 Virginica
126 6.7 3.3 5.7 2.1 Virginica
127 7.2 3.2 6 1.8 Virginica
128 6.2 2.8 4.8 1.8 Virginica
129 6.1 3 4.9 1.8 Virginica
130 6.4 2.8 5.6 2.1 Virginica
131 7.2 3 5.8 1.6 Virginica
132 7.4 2.8 6.1 1.9 Virginica
133 7.9 3.8 6.4 2 Virginica
134 6.4 2.8 5.6 2.2 Virginica
135 6.3 2.8 5.1 1.5 Virginica
136 6.1 2.6 5.6 1.4 Virginica
137 7.7 3 6.1 2.3 Virginica
138 6.3 3.4 5.6 2.4 Virginica
139 6.4 3.1 5.5 1.8 Virginica
140 6 3 4.8 1.8 Virginica
141 6.9 3.1 5.4 2.1 Virginica
142 6.7 3.1 5.6 2.4 Virginica
143 6.9 3.1 5.1 2.3 Virginica
144 5.8 2.7 5.1 1.9 Virginica
145 6.8 3.2 5.9 2.3 Virginica
146 6.7 3.3 5.7 2.5 Virginica
147 6.7 3 5.2 2.3 Virginica
148 6.3 2.5 5 1.9 Virginica
149 6.5 3 5.2 2 Virginica
150 6.2 3.4 5.4 2.3 Virginica
151 5.9 3 5.1 1.8 Virginica
Binary file not shown.
+84
View File
@@ -0,0 +1,84 @@
name,genus,vore,order,conservation,sleep_total,sleep_rem,sleep_cycle,awake,brainwt,bodywt
Cheetah,Acinonyx,carni,Carnivora,lc,12.1,nothing,NA,11.9,NA,50
Owl monkey,Aotus,omni,Primates,NA,17,1.8,NA,7,0.0155,0.48
Mountain beaver,Aplodontia,herbi,Rodentia,nt,14.4,2.4,NA,9.6,NA,1.35
Greater short-tailed shrew,Blarina,omni,Soricomorpha,lc,14.9,2.3,0.133333333,9.1,0.00029,0.019
Cow,Bos,herbi,Artiodactyla,domesticated,4,0.7,0.666666667,20,0.423,600
Three-toed sloth,Bradypus,herbi,Pilosa,NA,14.4,2.2,0.766666667,9.6,NA,3.85
Northern fur seal,Callorhinus,carni,Carnivora,vu,8.7,1.4,0.383333333,15.3,NA,20.49
Vesper mouse,Calomys,NA,Rodentia,NA,7,NA,NA,17,NA,0.045
Dog,Canis,carni,Carnivora,domesticated,10.1,2.9,0.333333333,13.9,0.07,14
Roe deer,Capreolus,herbi,Artiodactyla,lc,3,NA,NA,21,0.0982,14.8
Goat,Capri,herbi,Artiodactyla,lc,5.3,0.6,NA,18.7,0.115,33.5
Guinea pig,Cavis,herbi,Rodentia,domesticated,9.4,0.8,0.216666667,14.6,0.0055,0.728
Grivet,Cercopithecus,omni,Primates,lc,10,0.7,NA,14,NA,4.75
Chinchilla,Chinchilla,herbi,Rodentia,domesticated,12.5,1.5,0.116666667,11.5,0.0064,0.42
Star-nosed mole,Condylura,omni,Soricomorpha,lc,10.3,2.2,NA,13.7,0.001,0.06
African giant pouched rat,Cricetomys,omni,Rodentia,NA,8.3,2,NA,15.7,0.0066,1
Lesser short-tailed shrew,Cryptotis,omni,Soricomorpha,lc,9.1,1.4,0.15,14.9,0.00014,0.005
Long-nosed armadillo,Dasypus,carni,Cingulata,lc,17.4,3.1,0.383333333,6.6,0.0108,3.5
Tree hyrax,Dendrohyrax,herbi,Hyracoidea,lc,5.3,0.5,NA,18.7,0.0123,2.95
North American Opossum,Didelphis,omni,Didelphimorphia,lc,18,4.9,0.333333333,6,0.0063,1.7
Asian elephant,Elephas,herbi,Proboscidea,en,3.9,NA,NA,20.1,4.603,2547
Big brown bat,Eptesicus,insecti,Chiroptera,lc,19.7,3.9,0.116666667,4.3,3e-04,0.023
Horse,Equus,herbi,Perissodactyla,domesticated,2.9,0.6,1,21.1,0.655,521
Donkey,Equus,herbi,Perissodactyla,domesticated,3.1,0.4,NA,20.9,0.419,187
European hedgehog,Erinaceus,omni,Erinaceomorpha,lc,10.1,3.5,0.283333333,13.9,0.0035,0.77
Patas monkey,Erythrocebus,omni,Primates,lc,10.9,1.1,NA,13.1,0.115,10
Western american chipmunk,Eutamias,herbi,Rodentia,NA,14.9,NA,NA,9.1,NA,0.071
Domestic cat,Felis,carni,Carnivora,domesticated,12.5,3.2,0.416666667,11.5,0.0256,3.3
Galago,Galago,omni,Primates,NA,9.8,1.1,0.55,14.2,0.005,0.2
Giraffe,Giraffa,herbi,Artiodactyla,cd,1.9,0.4,NA,22.1,NA,899.995
Pilot whale,Globicephalus,carni,Cetacea,cd,2.7,0.1,NA,21.35,NA,800
Gray seal,Haliochoerus,carni,Carnivora,lc,6.2,1.5,NA,17.8,0.325,85
Gray hyrax,Heterohyrax,herbi,Hyracoidea,lc,6.3,0.6,NA,17.7,0.01227,2.625
Human,Homo,omni,Primates,NA,8,1.9,1.5,16,1.32,62
Mongoose lemur,Lemur,herbi,Primates,vu,9.5,0.9,NA,14.5,NA,1.67
African elephant,Loxodonta,herbi,Proboscidea,vu,3.3,NA,NA,20.7,5.712,6654
Thick-tailed opposum,Lutreolina,carni,Didelphimorphia,lc,19.4,6.6,NA,4.6,NA,0.37
Macaque,Macaca,omni,Primates,NA,10.1,1.2,0.75,13.9,0.179,6.8
Mongolian gerbil,Meriones,herbi,Rodentia,lc,14.2,1.9,NA,9.8,NA,0.053
Golden hamster,Mesocricetus,herbi,Rodentia,en,14.3,3.1,0.2,9.7,0.001,0.12
Vole ,Microtus,herbi,Rodentia,NA,12.8,NA,NA,11.2,NA,0.035
House mouse,Mus,herbi,Rodentia,nt,12.5,1.4,0.183333333,11.5,4e-04,0.022
Little brown bat,Myotis,insecti,Chiroptera,NA,19.9,2,0.2,4.1,0.00025,0.01
Round-tailed muskrat,Neofiber,herbi,Rodentia,nt,14.6,NA,NA,9.4,NA,0.266
Slow loris,Nyctibeus,carni,Primates,NA,11,NA,NA,13,0.0125,1.4
Degu,Octodon,herbi,Rodentia,lc,7.7,0.9,NA,16.3,NA,0.21
Northern grasshopper mouse,Onychomys,carni,Rodentia,lc,14.5,NA,NA,9.5,NA,0.028
Rabbit,Oryctolagus,herbi,Lagomorpha,domesticated,8.4,0.9,0.416666667,15.6,0.0121,2.5
Sheep,Ovis,herbi,Artiodactyla,domesticated,3.8,0.6,NA,20.2,0.175,55.5
Chimpanzee,Pan,omni,Primates,NA,9.7,1.4,1.416666667,14.3,0.44,52.2
Tiger,Panthera,carni,Carnivora,en,15.8,NA,NA,8.2,NA,162.564
Jaguar,Panthera,carni,Carnivora,nt,10.4,NA,NA,13.6,0.157,100
Lion,Panthera,carni,Carnivora,vu,13.5,NA,NA,10.5,NA,161.499
Baboon,Papio,omni,Primates,NA,9.4,1,0.666666667,14.6,0.18,25.235
Desert hedgehog,Paraechinus,NA,Erinaceomorpha,lc,10.3,2.7,NA,13.7,0.0024,0.55
Potto,Perodicticus,omni,Primates,lc,11,NA,NA,13,NA,1.1
Deer mouse,Peromyscus,NA,Rodentia,NA,11.5,NA,NA,12.5,NA,0.021
Phalanger,Phalanger,NA,Diprotodontia,NA,13.7,1.8,NA,10.3,0.0114,1.62
Caspian seal,Phoca,carni,Carnivora,vu,3.5,0.4,NA,20.5,NA,86
Common porpoise,Phocoena,carni,Cetacea,vu,5.6,NA,NA,18.45,NA,53.18
Potoroo,Potorous,herbi,Diprotodontia,NA,11.1,1.5,NA,12.9,NA,1.1
Giant armadillo,Priodontes,insecti,Cingulata,en,18.1,6.1,NA,5.9,0.081,60
Rock hyrax,Procavia,NA,Hyracoidea,lc,5.4,0.5,NA,18.6,0.021,3.6
Laboratory rat,Rattus,herbi,Rodentia,lc,13,2.4,0.183333333,11,0.0019,0.32
African striped mouse,Rhabdomys,omni,Rodentia,NA,8.7,NA,NA,15.3,NA,0.044
Squirrel monkey,Saimiri,omni,Primates,NA,9.6,1.4,NA,14.4,0.02,0.743
Eastern american mole,Scalopus,insecti,Soricomorpha,lc,8.4,2.1,0.166666667,15.6,0.0012,0.075
Cotton rat,Sigmodon,herbi,Rodentia,NA,11.3,1.1,0.15,12.7,0.00118,0.148
Mole rat,Spalax,NA,Rodentia,NA,10.6,2.4,NA,13.4,0.003,0.122
Arctic ground squirrel,Spermophilus,herbi,Rodentia,lc,16.6,NA,NA,7.4,0.0057,0.92
Thirteen-lined ground squirrel,Spermophilus,herbi,Rodentia,lc,13.8,3.4,0.216666667,10.2,0.004,0.101
Golden-mantled ground squirrel,Spermophilus,herbi,Rodentia,lc,15.9,3,NA,8.1,NA,0.205
Musk shrew,Suncus,NA,Soricomorpha,NA,12.8,2,0.183333333,11.2,0.00033,0.048
Pig,Sus,omni,Artiodactyla,domesticated,9.1,2.4,0.5,14.9,0.18,86.25
Short-nosed echidna,Tachyglossus,insecti,Monotremata,NA,8.6,NA,NA,15.4,0.025,4.5
Eastern american chipmunk,Tamias,herbi,Rodentia,NA,15.8,NA,NA,8.2,NA,0.112
Brazilian tapir,Tapirus,herbi,Perissodactyla,vu,4.4,1,0.9,19.6,0.169,207.501
Tenrec,Tenrec,omni,Afrosoricida,NA,15.6,2.3,NA,8.4,0.0026,0.9
Tree shrew,Tupaia,omni,Scandentia,NA,8.9,2.6,0.233333333,15.1,0.0025,0.104
Bottle-nosed dolphin,Tursiops,carni,Cetacea,NA,5.2,NA,NA,18.8,NA,173.33
Genet,Genetta,carni,Carnivora,NA,6.3,1.3,NA,17.7,0.0175,2
Arctic fox,Vulpes,carni,Carnivora,NA,12.5,NA,NA,11.5,0.0445,3.38
Red fox,Vulpes,carni,Carnivora,NA,9.8,2.4,0.35,14.2,0.0504,4.23
1 name genus vore order conservation sleep_total sleep_rem sleep_cycle awake brainwt bodywt
2 Cheetah Acinonyx carni Carnivora lc 12.1 nothing NA 11.9 NA 50
3 Owl monkey Aotus omni Primates NA 17 1.8 NA 7 0.0155 0.48
4 Mountain beaver Aplodontia herbi Rodentia nt 14.4 2.4 NA 9.6 NA 1.35
5 Greater short-tailed shrew Blarina omni Soricomorpha lc 14.9 2.3 0.133333333 9.1 0.00029 0.019
6 Cow Bos herbi Artiodactyla domesticated 4 0.7 0.666666667 20 0.423 600
7 Three-toed sloth Bradypus herbi Pilosa NA 14.4 2.2 0.766666667 9.6 NA 3.85
8 Northern fur seal Callorhinus carni Carnivora vu 8.7 1.4 0.383333333 15.3 NA 20.49
9 Vesper mouse Calomys NA Rodentia NA 7 NA NA 17 NA 0.045
10 Dog Canis carni Carnivora domesticated 10.1 2.9 0.333333333 13.9 0.07 14
11 Roe deer Capreolus herbi Artiodactyla lc 3 NA NA 21 0.0982 14.8
12 Goat Capri herbi Artiodactyla lc 5.3 0.6 NA 18.7 0.115 33.5
13 Guinea pig Cavis herbi Rodentia domesticated 9.4 0.8 0.216666667 14.6 0.0055 0.728
14 Grivet Cercopithecus omni Primates lc 10 0.7 NA 14 NA 4.75
15 Chinchilla Chinchilla herbi Rodentia domesticated 12.5 1.5 0.116666667 11.5 0.0064 0.42
16 Star-nosed mole Condylura omni Soricomorpha lc 10.3 2.2 NA 13.7 0.001 0.06
17 African giant pouched rat Cricetomys omni Rodentia NA 8.3 2 NA 15.7 0.0066 1
18 Lesser short-tailed shrew Cryptotis omni Soricomorpha lc 9.1 1.4 0.15 14.9 0.00014 0.005
19 Long-nosed armadillo Dasypus carni Cingulata lc 17.4 3.1 0.383333333 6.6 0.0108 3.5
20 Tree hyrax Dendrohyrax herbi Hyracoidea lc 5.3 0.5 NA 18.7 0.0123 2.95
21 North American Opossum Didelphis omni Didelphimorphia lc 18 4.9 0.333333333 6 0.0063 1.7
22 Asian elephant Elephas herbi Proboscidea en 3.9 NA NA 20.1 4.603 2547
23 Big brown bat Eptesicus insecti Chiroptera lc 19.7 3.9 0.116666667 4.3 3e-04 0.023
24 Horse Equus herbi Perissodactyla domesticated 2.9 0.6 1 21.1 0.655 521
25 Donkey Equus herbi Perissodactyla domesticated 3.1 0.4 NA 20.9 0.419 187
26 European hedgehog Erinaceus omni Erinaceomorpha lc 10.1 3.5 0.283333333 13.9 0.0035 0.77
27 Patas monkey Erythrocebus omni Primates lc 10.9 1.1 NA 13.1 0.115 10
28 Western american chipmunk Eutamias herbi Rodentia NA 14.9 NA NA 9.1 NA 0.071
29 Domestic cat Felis carni Carnivora domesticated 12.5 3.2 0.416666667 11.5 0.0256 3.3
30 Galago Galago omni Primates NA 9.8 1.1 0.55 14.2 0.005 0.2
31 Giraffe Giraffa herbi Artiodactyla cd 1.9 0.4 NA 22.1 NA 899.995
32 Pilot whale Globicephalus carni Cetacea cd 2.7 0.1 NA 21.35 NA 800
33 Gray seal Haliochoerus carni Carnivora lc 6.2 1.5 NA 17.8 0.325 85
34 Gray hyrax Heterohyrax herbi Hyracoidea lc 6.3 0.6 NA 17.7 0.01227 2.625
35 Human Homo omni Primates NA 8 1.9 1.5 16 1.32 62
36 Mongoose lemur Lemur herbi Primates vu 9.5 0.9 NA 14.5 NA 1.67
37 African elephant Loxodonta herbi Proboscidea vu 3.3 NA NA 20.7 5.712 6654
38 Thick-tailed opposum Lutreolina carni Didelphimorphia lc 19.4 6.6 NA 4.6 NA 0.37
39 Macaque Macaca omni Primates NA 10.1 1.2 0.75 13.9 0.179 6.8
40 Mongolian gerbil Meriones herbi Rodentia lc 14.2 1.9 NA 9.8 NA 0.053
41 Golden hamster Mesocricetus herbi Rodentia en 14.3 3.1 0.2 9.7 0.001 0.12
42 Vole Microtus herbi Rodentia NA 12.8 NA NA 11.2 NA 0.035
43 House mouse Mus herbi Rodentia nt 12.5 1.4 0.183333333 11.5 4e-04 0.022
44 Little brown bat Myotis insecti Chiroptera NA 19.9 2 0.2 4.1 0.00025 0.01
45 Round-tailed muskrat Neofiber herbi Rodentia nt 14.6 NA NA 9.4 NA 0.266
46 Slow loris Nyctibeus carni Primates NA 11 NA NA 13 0.0125 1.4
47 Degu Octodon herbi Rodentia lc 7.7 0.9 NA 16.3 NA 0.21
48 Northern grasshopper mouse Onychomys carni Rodentia lc 14.5 NA NA 9.5 NA 0.028
49 Rabbit Oryctolagus herbi Lagomorpha domesticated 8.4 0.9 0.416666667 15.6 0.0121 2.5
50 Sheep Ovis herbi Artiodactyla domesticated 3.8 0.6 NA 20.2 0.175 55.5
51 Chimpanzee Pan omni Primates NA 9.7 1.4 1.416666667 14.3 0.44 52.2
52 Tiger Panthera carni Carnivora en 15.8 NA NA 8.2 NA 162.564
53 Jaguar Panthera carni Carnivora nt 10.4 NA NA 13.6 0.157 100
54 Lion Panthera carni Carnivora vu 13.5 NA NA 10.5 NA 161.499
55 Baboon Papio omni Primates NA 9.4 1 0.666666667 14.6 0.18 25.235
56 Desert hedgehog Paraechinus NA Erinaceomorpha lc 10.3 2.7 NA 13.7 0.0024 0.55
57 Potto Perodicticus omni Primates lc 11 NA NA 13 NA 1.1
58 Deer mouse Peromyscus NA Rodentia NA 11.5 NA NA 12.5 NA 0.021
59 Phalanger Phalanger NA Diprotodontia NA 13.7 1.8 NA 10.3 0.0114 1.62
60 Caspian seal Phoca carni Carnivora vu 3.5 0.4 NA 20.5 NA 86
61 Common porpoise Phocoena carni Cetacea vu 5.6 NA NA 18.45 NA 53.18
62 Potoroo Potorous herbi Diprotodontia NA 11.1 1.5 NA 12.9 NA 1.1
63 Giant armadillo Priodontes insecti Cingulata en 18.1 6.1 NA 5.9 0.081 60
64 Rock hyrax Procavia NA Hyracoidea lc 5.4 0.5 NA 18.6 0.021 3.6
65 Laboratory rat Rattus herbi Rodentia lc 13 2.4 0.183333333 11 0.0019 0.32
66 African striped mouse Rhabdomys omni Rodentia NA 8.7 NA NA 15.3 NA 0.044
67 Squirrel monkey Saimiri omni Primates NA 9.6 1.4 NA 14.4 0.02 0.743
68 Eastern american mole Scalopus insecti Soricomorpha lc 8.4 2.1 0.166666667 15.6 0.0012 0.075
69 Cotton rat Sigmodon herbi Rodentia NA 11.3 1.1 0.15 12.7 0.00118 0.148
70 Mole rat Spalax NA Rodentia NA 10.6 2.4 NA 13.4 0.003 0.122
71 Arctic ground squirrel Spermophilus herbi Rodentia lc 16.6 NA NA 7.4 0.0057 0.92
72 Thirteen-lined ground squirrel Spermophilus herbi Rodentia lc 13.8 3.4 0.216666667 10.2 0.004 0.101
73 Golden-mantled ground squirrel Spermophilus herbi Rodentia lc 15.9 3 NA 8.1 NA 0.205
74 Musk shrew Suncus NA Soricomorpha NA 12.8 2 0.183333333 11.2 0.00033 0.048
75 Pig Sus omni Artiodactyla domesticated 9.1 2.4 0.5 14.9 0.18 86.25
76 Short-nosed echidna Tachyglossus insecti Monotremata NA 8.6 NA NA 15.4 0.025 4.5
77 Eastern american chipmunk Tamias herbi Rodentia NA 15.8 NA NA 8.2 NA 0.112
78 Brazilian tapir Tapirus herbi Perissodactyla vu 4.4 1 0.9 19.6 0.169 207.501
79 Tenrec Tenrec omni Afrosoricida NA 15.6 2.3 NA 8.4 0.0026 0.9
80 Tree shrew Tupaia omni Scandentia NA 8.9 2.6 0.233333333 15.1 0.0025 0.104
81 Bottle-nosed dolphin Tursiops carni Cetacea NA 5.2 NA NA 18.8 NA 173.33
82 Genet Genetta carni Carnivora NA 6.3 1.3 NA 17.7 0.0175 2
83 Arctic fox Vulpes carni Carnivora NA 12.5 NA NA 11.5 0.0445 3.38
84 Red fox Vulpes carni Carnivora NA 9.8 2.4 0.35 14.2 0.0504 4.23
Binary file not shown.
@@ -0,0 +1,34 @@
# SLF4J's SimpleLogger configuration file
# Simple implementation of Logger that sends all enabled log messages, for all defined loggers, to System.err.
# Default logging detail level for all instances of SimpleLogger.
# Must be one of ("trace", "debug", "info", "warn", or "error").
# If not specified, defaults to "info".
org.slf4j.simpleLogger.defaultLogLevel=debug
# Logging detail level for a SimpleLogger instance named "xxxxx".
# Must be one of ("trace", "debug", "info", "warn", or "error").
# If not specified, the default logging detail level is used.
#org.slf4j.simpleLogger.log.xxxxx=
# Set to true if you want the current date and time to be included in output messages.
# Default is false, and will output the number of milliseconds elapsed since startup.
org.slf4j.simpleLogger.showDateTime=true
# The date and time format to be used in the output messages.
# The pattern describing the date and time format is the same that is used in java.text.SimpleDateFormat.
# If the format is not specified or is invalid, the default format is used.
# The default format is yyyy-MM-dd HH:mm:ss:SSS Z.
org.slf4j.simpleLogger.dateTimeFormat=yyyy-MM-dd HH:mm:ss:SSS Z
# Set to true if you want to output the current thread name.
# Defaults to true.
org.slf4j.simpleLogger.showThreadName=true
# Set to true if you want the Logger instance name to be included in output messages.
# Defaults to true.
org.slf4j.simpleLogger.showLogName=true
# Set to true if you want the last component of the name to be included in output messages.
# Defaults to false.
#org.slf4j.simpleLogger.showShortLogName=false
@@ -0,0 +1,3 @@
12|tuv|0.12|true
41|xyz|3.6|not assigned
89|abc|7.1|false
1 12 tuv 0.12 true
2 41 xyz 3.6 not assigned
3 89 abc 7.1 false
Binary file not shown.
1  user_id name duplicate username duplicate duplicate1 double number time empty
2 0 4 George abc a null 1203 599.213 2021-01-07 15:12:32 null
3 1 5 Paul paul null null N/A 214.211 2021-01-14 14:36:19 null
4 2 8 Johnny qwerty b null 20 412.214 2021-02-23 19:47:00 null
5 3 10 Jack buk N/A null 2414 01.01 2021-03-08 23:38:52 null
6 4 12 Samuel qwerty NA null inf 00 2021-04-01 02:30:22 null
@@ -0,0 +1,6 @@
,user_id,name,duplicate,username,duplicate,duplicate1,double,number,time,empty
0,4,George,,abc,a,null,1203,599.213,2021-01-07 15:12:32,null
1,5,Paul,,paul,null,null,N/A,214.211,2021-01-14 14:36:19,null
2,8,Johnny,,qwerty,b,null,20,412.214,2021-02-23 19:47:00,null
3,10,Jack,,buk,N/A,null,2414,01.01,2021-03-08 23:38:52,null
4,12,Samuel,,qwerty,NA,null,inf,00,2021-04-01 02:30:22,null
1 user_id name duplicate username duplicate duplicate1 double number time empty
2 0 4 George abc a null 1203 599.213 2021-01-07 15:12:32 null
3 1 5 Paul paul null null N/A 214.211 2021-01-14 14:36:19 null
4 2 8 Johnny qwerty b null 20 412.214 2021-02-23 19:47:00 null
5 3 10 Jack buk N/A null 2414 01.01 2021-03-08 23:38:52 null
6 4 12 Samuel qwerty NA null inf 00 2021-04-01 02:30:22 null
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,6 @@
;user_id;name;duplicate;username;duplicate;duplicate1;double;number;time;empty
0;4;George;"";abc;a;null;1203;599,213;2021-01-07 15:12:32;null
1;5;Paul;"";paul;null;null;N/A;214,211;2021-01-14 14:36:19;null
2;8;Johnny;;qwerty;b;null;20;412,214;2021-02-23 19:47:00;null
3;10;Jack;"";buk;N/A;null;2414;01,01;2021-03-08 23:38:52;null
4;12;Samuel;"";qwerty;NA;null;inf;00;2021-04-01 02:30:22;null
1 user_id name duplicate username duplicate duplicate1 double number time empty
2 0 4 George abc a null 1203 599,213 2021-01-07 15:12:32 null
3 1 5 Paul paul null null N/A 214,211 2021-01-14 14:36:19 null
4 2 8 Johnny qwerty b null 20 412,214 2021-02-23 19:47:00 null
5 3 10 Jack buk N/A null 2414 01,01 2021-03-08 23:38:52 null
6 4 12 Samuel qwerty NA null inf 00 2021-04-01 02:30:22 null
Binary file not shown.
+10
View File
@@ -0,0 +1,10 @@
"fixed acidity";"volatile acidity";"citric acid";"residual sugar";"chlorides";"free sulfur dioxide";"total sulfur dioxide";"density";"pH";"sulphates";"alcohol";"quality"
7.4;0.7;0;1.9;0.076;11;34;0.9978;3.51;0.56;9.4;5
7.8;0.88;0;2.6;0.098;25;67;0.9968;3.2;0.68;9.8;5
7.8;0.76;0.04;2.3;0.092;15;54;0.997;3.26;0.65;9.8;5
11.2;0.28;0.56;1.9;0.075;17;60;0.998;3.16;0.58;9.8;6
7.4;0.7;0;1.9;0.076;11;34;0.9978;3.51;0.56;9.4;5
7.4;0.66;0;1.8;0.075;13;40;0.9978;3.51;0.56;9.4;5
7.9;0.6;0.06;1.6;0.069;15;59;0.9964;3.3;0.46;9.4;5
7.3;0.65;0;1.2;0.065;15;21;0.9946;3.39;0.47;10;7
7.8;0.58;0.02;2;0.073;9;18;0.9968;3.36;0.57;9.5;7
1 fixed acidity volatile acidity citric acid residual sugar chlorides free sulfur dioxide total sulfur dioxide density pH sulphates alcohol quality
2 7.4 0.7 0 1.9 0.076 11 34 0.9978 3.51 0.56 9.4 5
3 7.8 0.88 0 2.6 0.098 25 67 0.9968 3.2 0.68 9.8 5
4 7.8 0.76 0.04 2.3 0.092 15 54 0.997 3.26 0.65 9.8 5
5 11.2 0.28 0.56 1.9 0.075 17 60 0.998 3.16 0.58 9.8 6
6 7.4 0.7 0 1.9 0.076 11 34 0.9978 3.51 0.56 9.4 5
7 7.4 0.66 0 1.8 0.075 13 40 0.9978 3.51 0.56 9.4 5
8 7.9 0.6 0.06 1.6 0.069 15 59 0.9964 3.3 0.46 9.4 5
9 7.3 0.65 0 1.2 0.065 15 21 0.9946 3.39 0.47 10 7
10 7.8 0.58 0.02 2 0.073 9 18 0.9968 3.36 0.57 9.5 7
@@ -0,0 +1,3 @@
Column1;Column2
0,25;18
1,24;19
1 Column1 Column2
2 0,25 18
3 1,24 19