init research
This commit is contained in:
Vendored
+7
@@ -0,0 +1,7 @@
|
||||
## :dataframe-csv
|
||||
|
||||
This module, published as `dataframe-csv`, contains all logic and tests for DataFrame to be able to work with `csv`
|
||||
files.
|
||||
|
||||
At the moment, this module is in the experimental stage, so it's not included when
|
||||
you add the `dataframe` dependency to your project.
|
||||
+173
@@ -0,0 +1,173 @@
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/CsvDeephaven : org/jetbrains/kotlinx/dataframe/io/SupportedDataFrameFormat {
|
||||
public fun <init> ()V
|
||||
public fun <init> (C)V
|
||||
public synthetic fun <init> (CILkotlin/jvm/internal/DefaultConstructorMarker;)V
|
||||
public fun acceptsExtension (Ljava/lang/String;)Z
|
||||
public fun acceptsSample (Lorg/jetbrains/kotlinx/dataframe/io/SupportedFormatSample;)Z
|
||||
public fun createDefaultReadMethod (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethod;
|
||||
public fun getTestOrder ()I
|
||||
public fun readDataFrame (Ljava/io/File;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public fun readDataFrame (Ljava/io/InputStream;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public fun readDataFrame (Ljava/nio/file/Path;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/QuoteMode : java/lang/Enum {
|
||||
public static final field ALL Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;
|
||||
public static final field ALL_NON_NULL Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;
|
||||
public static final field MINIMAL Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;
|
||||
public static final field NONE Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;
|
||||
public static final field NON_NUMERIC Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;
|
||||
public static fun getEntries ()Lkotlin/enums/EnumEntries;
|
||||
public static fun valueOf (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;
|
||||
public static fun values ()[Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/ReadCsvDeephavenKt {
|
||||
public static final fun readCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final synthetic fun readCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final synthetic fun readCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final synthetic fun readCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final synthetic fun readCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final synthetic fun readCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/ReadCsvStrKt {
|
||||
public static final fun readCsvStr (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZ)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readCsvStr$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/ReadDelimDeephavenKt {
|
||||
public static final fun readDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final synthetic fun readDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final synthetic fun readDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final synthetic fun readDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final synthetic fun readDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final synthetic fun readDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/ReadDelimStrKt {
|
||||
public static final fun readDelimStr (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZ)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readDelimStr$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/ReadTsvDeephavenKt {
|
||||
public static final fun readTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final synthetic fun readTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final synthetic fun readTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function2;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final synthetic fun readTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final synthetic fun readTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final fun readTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static final synthetic fun readTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/File;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/io/InputStream;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function2;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/net/URL;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;CLjava/util/List;Ljava/nio/charset/Charset;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/nio/file/Path;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZLorg/jetbrains/kotlinx/dataframe/io/Compression;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/ReadTsvStrKt {
|
||||
public static final fun readTsvStr (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZ)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public static synthetic fun readTsvStr$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;ZLjava/util/List;Ljava/util/Map;JLjava/lang/Long;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ZZZCZZZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/ToCsvStrKt {
|
||||
public static final fun toCsvStr (Lorg/jetbrains/kotlinx/dataframe/DataFrame;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;)Ljava/lang/String;
|
||||
public static synthetic fun toCsvStr$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;ILjava/lang/Object;)Ljava/lang/String;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/ToDelimStrKt {
|
||||
public static final fun toDelimStr (Lorg/jetbrains/kotlinx/dataframe/DataFrame;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;)Ljava/lang/String;
|
||||
public static synthetic fun toDelimStr$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;ILjava/lang/Object;)Ljava/lang/String;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/ToTsvStrKt {
|
||||
public static final fun toTsvStr (Lorg/jetbrains/kotlinx/dataframe/DataFrame;ZCLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;)Ljava/lang/String;
|
||||
public static synthetic fun toTsvStr$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;ZCLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;ILjava/lang/Object;)Ljava/lang/String;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/TsvDeephaven : org/jetbrains/kotlinx/dataframe/io/SupportedDataFrameFormat {
|
||||
public fun <init> ()V
|
||||
public fun <init> (C)V
|
||||
public synthetic fun <init> (CILkotlin/jvm/internal/DefaultConstructorMarker;)V
|
||||
public fun acceptsExtension (Ljava/lang/String;)Z
|
||||
public fun acceptsSample (Lorg/jetbrains/kotlinx/dataframe/io/SupportedFormatSample;)Z
|
||||
public fun createDefaultReadMethod (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethod;
|
||||
public fun getTestOrder ()I
|
||||
public fun readDataFrame (Ljava/io/File;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public fun readDataFrame (Ljava/io/InputStream;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
public fun readDataFrame (Ljava/nio/file/Path;Ljava/util/List;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/UtilKt {
|
||||
public static final fun getDEFAULT_DELIM_NULL_STRINGS ()Ljava/util/Set;
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/WriteCsvDeephavenKt {
|
||||
public static final fun writeCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/io/File;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;)V
|
||||
public static final fun writeCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/Appendable;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;Lkotlin/jvm/functions/Function2;)V
|
||||
public static final fun writeCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/String;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;)V
|
||||
public static final fun writeCsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/nio/file/Path;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;)V
|
||||
public static synthetic fun writeCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/io/File;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;ILjava/lang/Object;)V
|
||||
public static synthetic fun writeCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/Appendable;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;Lkotlin/jvm/functions/Function2;ILjava/lang/Object;)V
|
||||
public static synthetic fun writeCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/String;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;ILjava/lang/Object;)V
|
||||
public static synthetic fun writeCsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/nio/file/Path;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;ILjava/lang/Object;)V
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/WriteDelimDeephavenKt {
|
||||
public static final fun writeDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/io/File;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;)V
|
||||
public static final fun writeDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/Appendable;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;Lkotlin/jvm/functions/Function2;)V
|
||||
public static final fun writeDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/String;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;)V
|
||||
public static final fun writeDelim (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/nio/file/Path;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;)V
|
||||
public static synthetic fun writeDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/io/File;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;ILjava/lang/Object;)V
|
||||
public static synthetic fun writeDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/Appendable;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;Lkotlin/jvm/functions/Function2;ILjava/lang/Object;)V
|
||||
public static synthetic fun writeDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/String;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;ILjava/lang/Object;)V
|
||||
public static synthetic fun writeDelim$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/nio/file/Path;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;ILjava/lang/Object;)V
|
||||
}
|
||||
|
||||
public final class org/jetbrains/kotlinx/dataframe/io/WriteTsvDeephavenKt {
|
||||
public static final fun writeTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/io/File;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;)V
|
||||
public static final fun writeTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/Appendable;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;Lkotlin/jvm/functions/Function2;)V
|
||||
public static final fun writeTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/String;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;)V
|
||||
public static final fun writeTsv (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/nio/file/Path;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;)V
|
||||
public static synthetic fun writeTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/io/File;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;ILjava/lang/Object;)V
|
||||
public static synthetic fun writeTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/Appendable;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;Lkotlin/jvm/functions/Function2;ILjava/lang/Object;)V
|
||||
public static synthetic fun writeTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/lang/String;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;ILjava/lang/Object;)V
|
||||
public static synthetic fun writeTsv$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;Ljava/nio/file/Path;CZLjava/lang/Character;Lorg/jetbrains/kotlinx/dataframe/io/QuoteMode;Ljava/lang/Character;Ljava/lang/Character;Ljava/util/List;Ljava/lang/String;ILjava/lang/Object;)V
|
||||
}
|
||||
|
||||
+153
@@ -0,0 +1,153 @@
|
||||
import nl.jolanrensen.kodex.gradle.creatingRunKodexTask
|
||||
import org.gradle.jvm.tasks.Jar
|
||||
|
||||
plugins {
|
||||
with(convention.plugins) {
|
||||
alias(kotlinJvm8)
|
||||
}
|
||||
with(libs.plugins) {
|
||||
alias(publisher)
|
||||
alias(serialization)
|
||||
alias(kodex)
|
||||
alias(binary.compatibility.validator)
|
||||
alias(kotlinx.benchmark)
|
||||
}
|
||||
idea
|
||||
}
|
||||
|
||||
group = "org.jetbrains.kotlinx"
|
||||
|
||||
dependencies {
|
||||
api(projects.core)
|
||||
|
||||
// for reading/writing JSON <-> DataFrame/DataRow in CSV/TSV/Delim
|
||||
// can safely be excluded when working without JSON and only writing flat dataframes
|
||||
api(projects.dataframeJson)
|
||||
|
||||
// for csv reading
|
||||
api(libs.deephavenCsv)
|
||||
// for csv writing
|
||||
api(libs.commonsCsv)
|
||||
implementation(libs.commonsIo)
|
||||
implementation(libs.sl4j)
|
||||
implementation(libs.kotlinLogging)
|
||||
implementation(libs.kotlin.reflect)
|
||||
|
||||
testImplementation(libs.kotlinx.benchmark.runtime)
|
||||
testImplementation(libs.junit)
|
||||
testImplementation(libs.sl4jsimple)
|
||||
testImplementation(libs.kotestAssertions) {
|
||||
exclude("org.jetbrains.kotlin", "kotlin-stdlib-jdk8")
|
||||
}
|
||||
}
|
||||
|
||||
benchmark {
|
||||
targets {
|
||||
register("test")
|
||||
}
|
||||
}
|
||||
|
||||
val generatedSourcesFolderName = "generated-sources"
|
||||
|
||||
// Backup the kotlin source files location
|
||||
val kotlinMainSources = kotlin.sourceSets.main
|
||||
.get()
|
||||
.kotlin.sourceDirectories
|
||||
.toList()
|
||||
val kotlinTestSources = kotlin.sourceSets.test
|
||||
.get()
|
||||
.kotlin.sourceDirectories
|
||||
.toList()
|
||||
|
||||
fun pathOf(vararg parts: String) = parts.joinToString(File.separator)
|
||||
|
||||
// Include both test and main sources for cross-referencing, Exclude generated sources
|
||||
val processKDocsMainSources = (kotlinMainSources + kotlinTestSources)
|
||||
.filterNot { pathOf("build", "generated") in it.path }
|
||||
|
||||
// sourceset of the generated sources as a result of `processKDocsMain`, this will create linter tasks
|
||||
val generatedSources by kotlin.sourceSets.creating {
|
||||
kotlin {
|
||||
setSrcDirs(
|
||||
listOf(
|
||||
"$generatedSourcesFolderName/src/main/kotlin",
|
||||
"$generatedSourcesFolderName/src/main/java",
|
||||
),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Task to generate the processed documentation
|
||||
val processKDocsMain by creatingRunKodexTask(processKDocsMainSources) {
|
||||
group = "KDocs"
|
||||
target = file(generatedSourcesFolderName)
|
||||
|
||||
// false, so `runKtlintFormatOverGeneratedSourcesSourceSet` can format the output
|
||||
outputReadOnly = false
|
||||
|
||||
exportAsHtml {
|
||||
dir = file("../docs/StardustDocs/resources/snippets/kdocs")
|
||||
}
|
||||
finalizedBy("runKtlintFormatOverGeneratedSourcesSourceSet")
|
||||
}
|
||||
|
||||
tasks.named("ktlintGeneratedSourcesSourceSetCheck") {
|
||||
onlyIf { false }
|
||||
}
|
||||
tasks.named("runKtlintCheckOverGeneratedSourcesSourceSet") {
|
||||
onlyIf { false }
|
||||
}
|
||||
|
||||
// If `changeJarTask` is run, modify all Jar tasks such that before running the Kotlin sources are set to
|
||||
// the target of `processKdocMain`, and they are returned to normal afterward.
|
||||
// This is usually only done when publishing
|
||||
val changeJarTask by tasks.registering {
|
||||
outputs.upToDateWhen { project.hasProperty("skipKodex") }
|
||||
doFirst {
|
||||
tasks.withType<Jar> {
|
||||
doFirst {
|
||||
require(generatedSources.kotlin.srcDirs.toList().isNotEmpty()) {
|
||||
logger.error("`processKDocsMain`'s outputs are empty, did `processKDocsMain` run before this task?")
|
||||
}
|
||||
kotlin.sourceSets.main {
|
||||
kotlin.setSrcDirs(generatedSources.kotlin.srcDirs)
|
||||
}
|
||||
logger.lifecycle("$this is run with modified sources: \"$generatedSourcesFolderName\"")
|
||||
}
|
||||
|
||||
doLast {
|
||||
kotlin.sourceSets.main {
|
||||
kotlin.setSrcDirs(kotlinMainSources)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if `processKDocsMain` runs, the Jar tasks must run after it so the generated-sources are there
|
||||
tasks.withType<Jar> {
|
||||
mustRunAfter(changeJarTask, processKDocsMain)
|
||||
}
|
||||
|
||||
// modify all publishing tasks to depend on `changeJarTask` so the sources are swapped out with generated sources
|
||||
tasks.configureEach {
|
||||
if (!project.hasProperty("skipKodex") && name.startsWith("publish")) {
|
||||
dependsOn(processKDocsMain, changeJarTask)
|
||||
}
|
||||
}
|
||||
|
||||
// Exclude the generated/processed sources from the IDE
|
||||
idea {
|
||||
module {
|
||||
excludeDirs.add(file(generatedSourcesFolderName))
|
||||
}
|
||||
}
|
||||
|
||||
kotlinPublications {
|
||||
publication {
|
||||
publicationName = "dataframeCsv"
|
||||
artifactId = project.name
|
||||
description = "CSV support for Kotlin DataFrame"
|
||||
packageName = artifactId
|
||||
}
|
||||
}
|
||||
+274
@@ -0,0 +1,274 @@
|
||||
package org.jetbrains.kotlinx.dataframe.documentationCsv
|
||||
|
||||
import io.deephaven.csv.CsvSpecs
|
||||
import org.apache.commons.csv.CSVFormat
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses
|
||||
import org.jetbrains.kotlinx.dataframe.io.AdjustCSVFormat
|
||||
import org.jetbrains.kotlinx.dataframe.io.AdjustCsvSpecs
|
||||
import org.jetbrains.kotlinx.dataframe.io.ColType
|
||||
import org.jetbrains.kotlinx.dataframe.io.Compression
|
||||
import org.jetbrains.kotlinx.dataframe.io.DefaultNullStringsContentLink
|
||||
import org.jetbrains.kotlinx.dataframe.io.QuoteMode
|
||||
import java.nio.charset.Charset
|
||||
|
||||
/**
|
||||
* Contains both the default values of csv/tsv parameters and the parameter KDocs.
|
||||
*/
|
||||
@Suppress("ktlint:standard:class-naming", "ClassName", "KDocUnresolvedReference")
|
||||
internal object DelimParams {
|
||||
|
||||
/**
|
||||
* @param path The file path to read.
|
||||
* Use [charset] to specify the encoding.
|
||||
* Can also be compressed as `.gz` or `.zip`, see [Compression][Compression].
|
||||
*/
|
||||
typealias PATH_READ = Nothing
|
||||
|
||||
/**
|
||||
* @param file The file to read.
|
||||
* Use [charset] to specify the encoding.
|
||||
* Can also be compressed as `.gz` or `.zip`, see [Compression][Compression].
|
||||
*/
|
||||
typealias FILE_READ = Nothing
|
||||
|
||||
/**
|
||||
* @param url The URL from which to fetch the data.
|
||||
* Use [charset] to specify the encoding.
|
||||
* Can also be compressed as `.gz` or `.zip`, see [Compression][Compression].
|
||||
*/
|
||||
typealias URL_READ = Nothing
|
||||
|
||||
/**
|
||||
* @param fileOrUrl The file path or URL to read the data from.
|
||||
* Use [charset] to specify the encoding.
|
||||
* Can also be compressed as `.gz` or `.zip`, see [Compression][Compression].
|
||||
*/
|
||||
typealias FILE_OR_URL_READ = Nothing
|
||||
|
||||
/**
|
||||
* @param inputStream Represents the file to read.
|
||||
* Use [charset] to specify the encoding.
|
||||
*/
|
||||
typealias INPUT_STREAM_READ = Nothing
|
||||
|
||||
/** @param text The raw data to read in the form of a [String]. */
|
||||
typealias TEXT_READ = Nothing
|
||||
|
||||
/** @param file The file to write to. */
|
||||
typealias FILE_WRITE = Nothing
|
||||
|
||||
/** @param path The path pointing to a file to write to. */
|
||||
typealias PATH_WRITE = Nothing
|
||||
|
||||
/** @param writer The [Appendable] to write to. */
|
||||
typealias WRITER_WRITE = Nothing
|
||||
|
||||
/**
|
||||
* @param charset The [character set][java.nio.charset.Charset] the input is encoded in.
|
||||
* Default: `null`
|
||||
*
|
||||
* If `null`, the Charset will be read from the BOM of the provided input,
|
||||
* defaulting to [UTF-8][Charsets.UTF_8] if no BOM is found.
|
||||
*/
|
||||
val CHARSET: Charset? = null
|
||||
|
||||
/**
|
||||
* @param delimiter The field delimiter character. Default: ','.
|
||||
*
|
||||
* Ignored if [hasFixedWidthColumns] is `true`.
|
||||
*/
|
||||
const val CSV_DELIMITER: Char = ','
|
||||
|
||||
/**
|
||||
* @param delimiter The field delimiter character. Default: '\t'.
|
||||
*
|
||||
* Ignored if [hasFixedWidthColumns] is `true`.
|
||||
*/
|
||||
const val TSV_DELIMITER: Char = '\t'
|
||||
|
||||
/**
|
||||
* @param delimiter The field delimiter character. Default: ','.
|
||||
*
|
||||
* Ignored if [hasFixedWidthColumns] is `true`.
|
||||
*/
|
||||
const val DELIM_DELIMITER: Char = ','
|
||||
|
||||
/**
|
||||
* @param header Optional column titles. Default: empty list.
|
||||
*
|
||||
* If non-empty, the data will be read with [header] as the column titles
|
||||
* (use [skipLines] if there's a header in the data).
|
||||
* If empty (default), the header will be read from the data.
|
||||
*/
|
||||
val HEADER: List<String> = emptyList()
|
||||
|
||||
/**
|
||||
* @param hasFixedWidthColumns Whether the data has fixed-width columns instead of a single delimiter.
|
||||
* Default: `false`.
|
||||
*
|
||||
* Fixed-width columns can occur, for instance, in multi-space delimited data, where the columns are separated
|
||||
* by multiple spaces instead of a single delimiter, so columns are visually aligned.
|
||||
* Column widths are determined by the header in the data (if present), or manually by setting
|
||||
* [fixedColumnWidths].
|
||||
*/
|
||||
const val HAS_FIXED_WIDTH_COLUMNS: Boolean = false
|
||||
|
||||
/**
|
||||
* @param fixedColumnWidths The fixed column widths. Default: empty list.
|
||||
*
|
||||
* Requires [hasFixedWidthColumns]. If empty, the column widths will be determined by the header in the data
|
||||
* (if present), else, this manually sets the column widths.
|
||||
* The number of widths should match the number of columns.
|
||||
*/
|
||||
val FIXED_COLUMN_WIDTHS: List<Int> = emptyList()
|
||||
|
||||
/**
|
||||
* @param compression The compression of the data.
|
||||
* Default: [Compression.None], unless detected otherwise from the input file or url.
|
||||
*/
|
||||
val COMPRESSION: Compression<*> = Compression.None
|
||||
|
||||
/**
|
||||
* @param colTypes The expected [ColType] per column name. Default: empty map, a.k.a. infer every column type.
|
||||
*
|
||||
* If supplied for a certain column name (inferred from data or given by [header]),
|
||||
* the parser will parse the column with the specified name as the specified type, else it will infer the type.
|
||||
*
|
||||
* e.g. `colTypes = `[mapOf][mapOf]`("colName" `[to][to]` `[ColType][ColType]`.`[Int][ColType.Int]`)`.
|
||||
* You can also set [ColType][ColType]`.`[DEFAULT][ColType.DEFAULT]` `[to][to]` `[ColType][ColType]`.X`
|
||||
* to set a _default_ column type, like [ColType.String].
|
||||
*/
|
||||
val COL_TYPES: Map<String, ColType> = emptyMap()
|
||||
|
||||
/**
|
||||
* @param skipLines The number of lines to skip before reading the header and data. Default: `0`.
|
||||
*
|
||||
* Useful for files with metadata, or comments at the beginning, or to give a custom [header].
|
||||
*/
|
||||
const val SKIP_LINES: Long = 0L
|
||||
|
||||
/**
|
||||
* @param readLines The maximum number of lines to read from the data. Default: `null`.
|
||||
*
|
||||
* If `null`, all lines will be read.
|
||||
*/
|
||||
val READ_LINES: Long? = null
|
||||
|
||||
/**
|
||||
* @param parserOptions Optional [parsing options][ParserOptions] for columns initially read as [String].
|
||||
* Default, `null`.
|
||||
*
|
||||
* Can configure locale, date format, double parsing, skipping types, etc.
|
||||
*
|
||||
* If [parserOptions] or any of the arguments are `null`, the global parser configuration
|
||||
* ([DataFrame.parser][DataFrame.Companion.parser]) will be queried.
|
||||
*
|
||||
* The only exceptions are:
|
||||
* - [nullStrings][ParserOptions.nullStrings], which, if `null`,
|
||||
* will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS].
|
||||
* - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses] to
|
||||
* the given types or the global setting.
|
||||
*/
|
||||
val PARSER_OPTIONS: ParserOptions? = null
|
||||
|
||||
/**
|
||||
* @param ignoreEmptyLines Whether to skip intermediate empty lines. Default: `false`.
|
||||
*
|
||||
* If `false`, empty lines will be interpreted as having _empty_ values if [allowMissingColumns].
|
||||
*/
|
||||
const val IGNORE_EMPTY_LINES: Boolean = false
|
||||
|
||||
/**
|
||||
* @param allowMissingColumns Whether to allow rows with fewer columns than the header. Default: `true`.
|
||||
*
|
||||
* If `true`, rows that are too short will be interpreted as _empty_ values.
|
||||
*/
|
||||
const val ALLOW_MISSING_COLUMNS: Boolean = true
|
||||
|
||||
/**
|
||||
* @param ignoreExcessColumns Whether to ignore rows with more columns than the header. Default: `true`.
|
||||
*
|
||||
* If `true`, rows that are too long will have those columns dropped.
|
||||
*/
|
||||
const val IGNORE_EXCESS_COLUMNS: Boolean = true
|
||||
|
||||
/**
|
||||
* @param quote The quote character. Default: `"`.
|
||||
*
|
||||
* Used when field- or line delimiters should be interpreted as literal text.
|
||||
*
|
||||
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
|
||||
*/
|
||||
const val QUOTE: Char = '"'
|
||||
|
||||
/**
|
||||
* @param ignoreSurroundingSpaces Whether to ignore leading and trailing blanks around non-quoted fields.
|
||||
* Default: `true`.
|
||||
*/
|
||||
const val IGNORE_SURROUNDING_SPACES: Boolean = true
|
||||
|
||||
/**
|
||||
* @param trimInsideQuoted Whether to ignore leading and trailing blanks inside quoted fields.
|
||||
* Default: `false`.
|
||||
*/
|
||||
const val TRIM_INSIDE_QUOTED: Boolean = false
|
||||
|
||||
/**
|
||||
* @param parseParallel Whether to parse the data in parallel. Default: `true`.
|
||||
*
|
||||
* If `true`, the data will be read and parsed in parallel by the Deephaven parser.
|
||||
* This is usually faster but can be turned off for debugging.
|
||||
*/
|
||||
const val PARSE_PARALLEL: Boolean = true
|
||||
|
||||
/**
|
||||
* @param adjustCsvSpecs Optional extra [CsvSpecs] configuration. Default: `{ it }`.
|
||||
*
|
||||
* Before instantiating the [CsvSpecs], the [CsvSpecs.Builder] will be passed to this lambda.
|
||||
* This will allow you to configure/overwrite any CSV / TSV parsing options.
|
||||
*/
|
||||
val ADJUST_CSV_SPECS: AdjustCsvSpecs = { it }
|
||||
|
||||
/** @param includeHeader Whether to include the header in the output. Default: `true`. */
|
||||
const val INCLUDE_HEADER: Boolean = true
|
||||
|
||||
/**
|
||||
* @param quoteMode The [QuoteMode] to use when writing CSV / TSV files.
|
||||
* Default: [QuoteMode.MINIMAL].
|
||||
*/
|
||||
val QUOTE_MODE: QuoteMode = QuoteMode.MINIMAL
|
||||
|
||||
/**
|
||||
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE].
|
||||
* Default: `null`. This will double-quote the value.
|
||||
*/
|
||||
val ESCAPE_CHAR: Char? = null
|
||||
|
||||
/**
|
||||
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
|
||||
* Default: `'#'`.
|
||||
*/
|
||||
const val COMMENT_CHAR: Char = '#'
|
||||
|
||||
/**
|
||||
* @param recordSeparator The character that separates records in a CSV / TSV file.
|
||||
* Default: `'\n'`, a Unix-newline.
|
||||
*/
|
||||
const val RECORD_SEPARATOR: String = "\n"
|
||||
|
||||
/**
|
||||
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
|
||||
* Default: empty list.
|
||||
*/
|
||||
val HEADER_COMMENTS: List<String> = emptyList()
|
||||
|
||||
/**
|
||||
* @param adjustCsvFormat Optional extra [CSVFormat] configuration. Default: `{ it }`.
|
||||
*
|
||||
* Before instantiating the [CSVFormat], the [CSVFormat.Builder] will be passed to this lambda.
|
||||
* This will allow you to configure/overwrite any CSV / TSV writing options.
|
||||
*/
|
||||
val ADJUST_CSV_FORMAT: AdjustCSVFormat = { it }
|
||||
}
|
||||
+69
@@ -0,0 +1,69 @@
|
||||
package org.jetbrains.kotlinx.dataframe.documentationCsv
|
||||
|
||||
import kotlin.annotation.AnnotationTarget.ANNOTATION_CLASS
|
||||
import kotlin.annotation.AnnotationTarget.CLASS
|
||||
import kotlin.annotation.AnnotationTarget.CONSTRUCTOR
|
||||
import kotlin.annotation.AnnotationTarget.FIELD
|
||||
import kotlin.annotation.AnnotationTarget.FILE
|
||||
import kotlin.annotation.AnnotationTarget.FUNCTION
|
||||
import kotlin.annotation.AnnotationTarget.LOCAL_VARIABLE
|
||||
import kotlin.annotation.AnnotationTarget.PROPERTY
|
||||
import kotlin.annotation.AnnotationTarget.PROPERTY_GETTER
|
||||
import kotlin.annotation.AnnotationTarget.PROPERTY_SETTER
|
||||
import kotlin.annotation.AnnotationTarget.TYPE
|
||||
import kotlin.annotation.AnnotationTarget.TYPEALIAS
|
||||
import kotlin.annotation.AnnotationTarget.VALUE_PARAMETER
|
||||
|
||||
/**
|
||||
* Any `Documentable` annotated with this annotation will be excluded from the generated sources by
|
||||
* the documentation processor.
|
||||
*
|
||||
* **NOTE: DO NOT RENAME!**
|
||||
*/
|
||||
@Target(
|
||||
CLASS,
|
||||
ANNOTATION_CLASS,
|
||||
PROPERTY,
|
||||
FIELD,
|
||||
LOCAL_VARIABLE,
|
||||
VALUE_PARAMETER,
|
||||
CONSTRUCTOR,
|
||||
FUNCTION,
|
||||
PROPERTY_GETTER,
|
||||
PROPERTY_SETTER,
|
||||
TYPE,
|
||||
TYPEALIAS,
|
||||
FILE,
|
||||
)
|
||||
internal annotation class ExcludeFromSources
|
||||
|
||||
/**
|
||||
* Any `Documentable` annotated with this annotation will be exported to HTML by the documentation
|
||||
* processor.
|
||||
*
|
||||
* You can use @exportAsHtmlStart and @exportAsHtmlEnd to specify a range of the doc to
|
||||
* export to HTML.
|
||||
*
|
||||
* **NOTE: DO NOT RENAME!**
|
||||
*
|
||||
* @param theme Whether to include a simple theme in the HTML file. Default is `true`.
|
||||
* @param stripReferences Whether to strip `[references]` from the HTML file. Default is `true`.
|
||||
* This is useful when you want to include the HTML file in a website, where the references are not
|
||||
* needed or would break.
|
||||
*/
|
||||
@Target(
|
||||
CLASS,
|
||||
ANNOTATION_CLASS,
|
||||
PROPERTY,
|
||||
FIELD,
|
||||
LOCAL_VARIABLE,
|
||||
VALUE_PARAMETER,
|
||||
CONSTRUCTOR,
|
||||
FUNCTION,
|
||||
PROPERTY_GETTER,
|
||||
PROPERTY_SETTER,
|
||||
TYPE,
|
||||
TYPEALIAS,
|
||||
FILE,
|
||||
)
|
||||
internal annotation class ExportAsHtml(val theme: Boolean = true, val stripReferences: Boolean = true)
|
||||
+24
@@ -0,0 +1,24 @@
|
||||
package org.jetbrains.kotlinx.dataframe.impl.io
|
||||
|
||||
import io.deephaven.csv.containers.ByteSlice
|
||||
import io.deephaven.csv.tokenization.Tokenizer.CustomDoubleParser
|
||||
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
|
||||
|
||||
/**
|
||||
* Wrapper around [FastDoubleParser] so we can use it from Deephaven.
|
||||
*/
|
||||
internal class DataFrameCustomDoubleParser(parserOptions: ParserOptions? = null) : CustomDoubleParser {
|
||||
|
||||
private val fastDoubleParser = FastDoubleParser(parserOptions)
|
||||
|
||||
override fun parse(bs: ByteSlice): Double =
|
||||
try {
|
||||
fastDoubleParser.parseOrNull(bs.data(), bs.begin(), bs.size())
|
||||
} catch (_: Exception) {
|
||||
null
|
||||
} ?: throw NumberFormatException()
|
||||
|
||||
override fun parse(cs: CharSequence): Double =
|
||||
fastDoubleParser.parseOrNull(cs.toString())
|
||||
?: throw NumberFormatException()
|
||||
}
|
||||
+204
@@ -0,0 +1,204 @@
|
||||
package org.jetbrains.kotlinx.dataframe.impl.io
|
||||
|
||||
import io.deephaven.csv.parsers.DataType
|
||||
import io.deephaven.csv.parsers.DataType.BOOLEAN_AS_BYTE
|
||||
import io.deephaven.csv.parsers.DataType.BYTE
|
||||
import io.deephaven.csv.parsers.DataType.CHAR
|
||||
import io.deephaven.csv.parsers.DataType.DATETIME_AS_LONG
|
||||
import io.deephaven.csv.parsers.DataType.DOUBLE
|
||||
import io.deephaven.csv.parsers.DataType.FLOAT
|
||||
import io.deephaven.csv.parsers.DataType.INT
|
||||
import io.deephaven.csv.parsers.DataType.LONG
|
||||
import io.deephaven.csv.parsers.DataType.SHORT
|
||||
import io.deephaven.csv.parsers.DataType.STRING
|
||||
import io.deephaven.csv.parsers.DataType.TIMESTAMP_AS_LONG
|
||||
import io.deephaven.csv.sinks.Sink
|
||||
import io.deephaven.csv.sinks.SinkFactory
|
||||
import io.deephaven.csv.sinks.Source
|
||||
import kotlinx.datetime.toKotlinLocalDateTime
|
||||
import java.time.LocalDateTime
|
||||
import java.time.ZoneOffset
|
||||
import kotlin.time.Duration.Companion.nanoseconds
|
||||
|
||||
internal interface SinkSource<T : Any> :
|
||||
Sink<T>,
|
||||
Source<T>
|
||||
|
||||
/**
|
||||
* Implementation of Deephaven's [Sink] and [Source] that stores data in an [ArrayList].
|
||||
*
|
||||
* The implementation is based on [Writing Your Own Data Sinks](https://github.com/deephaven/deephaven-csv/blob/main/ADVANCED.md).
|
||||
*
|
||||
* If we ever store column data unboxed / primitively, this needs to be modified.
|
||||
*/
|
||||
@Suppress("UNCHECKED_CAST")
|
||||
internal class ListSink(val columnIndex: Int, val dataType: DataType) : SinkSource<Any> {
|
||||
|
||||
@Suppress("ktlint:standard:comment-wrapping", "ktlint:standard:no-consecutive-comments")
|
||||
companion object {
|
||||
val SINK_FACTORY: SinkFactory = SinkFactory.of(
|
||||
// unused in Parsers.DEFAULT:
|
||||
/* byteSinkSupplier = */ { ListSink(it, BYTE) as SinkSource<ByteArray> },
|
||||
/* shortSinkSupplier = */ { ListSink(it, SHORT) as SinkSource<ShortArray> },
|
||||
/* intSinkSupplier = */ { ListSink(it, INT) as SinkSource<IntArray> },
|
||||
/* longSinkSupplier = */ { ListSink(it, LONG) as SinkSource<LongArray> },
|
||||
// unused in Parsers.COMPLETE and Parsers.DEFAULT:
|
||||
/* floatSinkSupplier = */ { ListSink(it, FLOAT) as SinkSource<FloatArray> },
|
||||
/* doubleSinkSupplier = */ { ListSink(it, DOUBLE) as SinkSource<DoubleArray> },
|
||||
/* booleanAsByteSinkSupplier = */ { ListSink(it, BOOLEAN_AS_BYTE) as SinkSource<ByteArray> },
|
||||
/* charSinkSupplier = */ { ListSink(it, CHAR) as SinkSource<CharArray> },
|
||||
/* stringSinkSupplier = */ { ListSink(it, STRING) as SinkSource<Array<String>> },
|
||||
/* dateTimeAsLongSinkSupplier = */ { ListSink(it, DATETIME_AS_LONG) as SinkSource<LongArray> },
|
||||
// unused in Parsers.COMPLETE and Parsers.DEFAULT:
|
||||
/* timestampAsLongSinkSupplier = */ { ListSink(it, TIMESTAMP_AS_LONG) as SinkSource<LongArray> },
|
||||
)
|
||||
}
|
||||
|
||||
private val _data: MutableList<Any?> = ArrayList(1000)
|
||||
|
||||
val data: List<Any?>
|
||||
get() = _data
|
||||
|
||||
var hasNulls: Boolean = false
|
||||
private set
|
||||
|
||||
private fun getValue(src: Any, srcIndex: Int, isNull: BooleanArray): Any? =
|
||||
if (isNull[srcIndex]) {
|
||||
hasNulls = true
|
||||
null
|
||||
} else {
|
||||
when (dataType) {
|
||||
BOOLEAN_AS_BYTE -> (src as ByteArray)[srcIndex] == 1.toByte()
|
||||
|
||||
// unused in Parsers.DEFAULT
|
||||
BYTE -> (src as ByteArray)[srcIndex]
|
||||
|
||||
// unused in Parsers.DEFAULT
|
||||
SHORT -> (src as ShortArray)[srcIndex]
|
||||
|
||||
INT -> (src as IntArray)[srcIndex]
|
||||
|
||||
LONG -> (src as LongArray)[srcIndex]
|
||||
|
||||
// unused in Parsers.COMPLETE and Parsers.DEFAULT
|
||||
FLOAT -> (src as FloatArray)[srcIndex]
|
||||
|
||||
DOUBLE -> (src as DoubleArray)[srcIndex]
|
||||
|
||||
CHAR -> (src as CharArray)[srcIndex]
|
||||
|
||||
STRING -> (src as Array<String>)[srcIndex]
|
||||
|
||||
DATETIME_AS_LONG -> (src as LongArray)[srcIndex].nanoseconds
|
||||
.toComponents { seconds, nanoseconds ->
|
||||
LocalDateTime.ofEpochSecond(seconds, nanoseconds, ZoneOffset.UTC)
|
||||
}.toKotlinLocalDateTime()
|
||||
|
||||
// unused in Parsers.COMPLETE and Parsers.DEFAULT
|
||||
TIMESTAMP_AS_LONG -> (src as LongArray)[srcIndex].nanoseconds
|
||||
.toComponents { seconds, nanoseconds ->
|
||||
LocalDateTime.ofEpochSecond(seconds, nanoseconds, ZoneOffset.UTC)
|
||||
}.toKotlinLocalDateTime()
|
||||
|
||||
else -> error("unsupported parser")
|
||||
}
|
||||
}
|
||||
|
||||
private fun writeAppending(
|
||||
src: Any,
|
||||
destBegin: Int,
|
||||
destEnd: Int,
|
||||
isNull: BooleanArray,
|
||||
) {
|
||||
while (data.size < destBegin) {
|
||||
_data += null
|
||||
hasNulls = true
|
||||
}
|
||||
for ((srcIndex, _) in (destBegin..<destEnd).withIndex()) {
|
||||
_data += getValue(src, srcIndex, isNull)
|
||||
}
|
||||
}
|
||||
|
||||
private fun writeReplacing(
|
||||
src: Any,
|
||||
destBegin: Int,
|
||||
destEnd: Int,
|
||||
isNull: BooleanArray,
|
||||
) {
|
||||
for ((srcIndex, destIndex) in (destBegin..<destEnd).withIndex()) {
|
||||
_data[destIndex] = getValue(src, srcIndex, isNull)
|
||||
}
|
||||
}
|
||||
|
||||
override fun write(
|
||||
src: Any,
|
||||
isNull: BooleanArray,
|
||||
destBegin: Long,
|
||||
destEnd: Long,
|
||||
appending: Boolean,
|
||||
) {
|
||||
if (destBegin == destEnd) return
|
||||
val destBeginAsInt = destBegin.toInt()
|
||||
val destEndAsInt = destEnd.toInt()
|
||||
if (appending) {
|
||||
writeAppending(src = src, destBegin = destBeginAsInt, destEnd = destEndAsInt, isNull = isNull)
|
||||
} else {
|
||||
writeReplacing(src = src, destBegin = destBeginAsInt, destEnd = destEndAsInt, isNull = isNull)
|
||||
}
|
||||
}
|
||||
|
||||
override fun read(
|
||||
dest: Any,
|
||||
isNull: BooleanArray,
|
||||
srcBegin: Long,
|
||||
srcEnd: Long,
|
||||
) {
|
||||
if (srcBegin == srcEnd) return
|
||||
val srcBeginAsInt = srcBegin.toInt()
|
||||
val srcEndAsInt = srcEnd.toInt()
|
||||
|
||||
when (dataType) {
|
||||
BYTE -> {
|
||||
dest as ByteArray
|
||||
for ((srcIndex, destIndex) in (srcBeginAsInt..<srcEndAsInt).withIndex()) {
|
||||
val value = data[srcIndex] as Byte?
|
||||
if (value != null) dest[destIndex] = value
|
||||
isNull[destIndex] = value == null
|
||||
}
|
||||
}
|
||||
|
||||
SHORT -> {
|
||||
dest as ShortArray
|
||||
for ((srcIndex, destIndex) in (srcBeginAsInt..<srcEndAsInt).withIndex()) {
|
||||
val value = data[srcIndex] as Short?
|
||||
if (value != null) dest[destIndex] = value
|
||||
isNull[destIndex] = value == null
|
||||
}
|
||||
}
|
||||
|
||||
INT -> {
|
||||
dest as IntArray
|
||||
for ((srcIndex, destIndex) in (srcBeginAsInt..<srcEndAsInt).withIndex()) {
|
||||
val value = data[srcIndex] as Int?
|
||||
if (value != null) dest[destIndex] = value
|
||||
isNull[destIndex] = value == null
|
||||
}
|
||||
}
|
||||
|
||||
LONG -> {
|
||||
dest as LongArray
|
||||
for ((srcIndex, destIndex) in (srcBeginAsInt..<srcEndAsInt).withIndex()) {
|
||||
val value = data[srcIndex] as Long?
|
||||
if (value != null) dest[destIndex] = value
|
||||
isNull[destIndex] = value == null
|
||||
}
|
||||
}
|
||||
|
||||
// Deephaven's fast path for numeric type inference supports only byte, short, int, and long
|
||||
// so this should never be reached
|
||||
else -> error("unsupported sink state")
|
||||
}
|
||||
}
|
||||
|
||||
override fun getUnderlying(): ListSink = this
|
||||
}
|
||||
+448
@@ -0,0 +1,448 @@
|
||||
@file:JvmName("ReadDelimDeephavenKt")
|
||||
|
||||
package org.jetbrains.kotlinx.dataframe.impl.io
|
||||
|
||||
import io.deephaven.csv.CsvSpecs
|
||||
import io.deephaven.csv.parsers.DataType
|
||||
import io.deephaven.csv.parsers.DataType.BOOLEAN_AS_BYTE
|
||||
import io.deephaven.csv.parsers.DataType.BYTE
|
||||
import io.deephaven.csv.parsers.DataType.CHAR
|
||||
import io.deephaven.csv.parsers.DataType.DATETIME_AS_LONG
|
||||
import io.deephaven.csv.parsers.DataType.DOUBLE
|
||||
import io.deephaven.csv.parsers.DataType.FLOAT
|
||||
import io.deephaven.csv.parsers.DataType.INT
|
||||
import io.deephaven.csv.parsers.DataType.LONG
|
||||
import io.deephaven.csv.parsers.DataType.SHORT
|
||||
import io.deephaven.csv.parsers.DataType.STRING
|
||||
import io.deephaven.csv.parsers.DataType.TIMESTAMP_AS_LONG
|
||||
import io.deephaven.csv.parsers.Parser
|
||||
import io.deephaven.csv.parsers.Parsers
|
||||
import io.deephaven.csv.reading.CsvReader
|
||||
import io.deephaven.csv.util.CsvReaderException
|
||||
import kotlinx.datetime.LocalDate
|
||||
import kotlinx.datetime.LocalDateTime
|
||||
import kotlinx.datetime.LocalTime
|
||||
import org.apache.commons.io.input.BOMInputStream
|
||||
import org.jetbrains.kotlinx.dataframe.DataColumn
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.DataRow
|
||||
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
|
||||
import org.jetbrains.kotlinx.dataframe.api.convertTo
|
||||
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
|
||||
import org.jetbrains.kotlinx.dataframe.api.parse
|
||||
import org.jetbrains.kotlinx.dataframe.api.parser
|
||||
import org.jetbrains.kotlinx.dataframe.api.tryParse
|
||||
import org.jetbrains.kotlinx.dataframe.columns.ValueColumn
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_SPECS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ALLOW_MISSING_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CHARSET
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COL_TYPES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMPRESSION
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FIXED_COLUMN_WIDTHS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HAS_FIXED_WIDTH_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EMPTY_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EXCESS_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_SURROUNDING_SPACES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INPUT_STREAM_READ
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSER_OPTIONS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSE_PARALLEL
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.READ_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.SKIP_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TRIM_INSIDE_QUOTED
|
||||
import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
|
||||
import org.jetbrains.kotlinx.dataframe.io.AdjustCsvSpecs
|
||||
import org.jetbrains.kotlinx.dataframe.io.ColType
|
||||
import org.jetbrains.kotlinx.dataframe.io.Compression
|
||||
import org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS
|
||||
import org.jetbrains.kotlinx.dataframe.io.skippingBomCharacters
|
||||
import org.jetbrains.kotlinx.dataframe.io.toKType
|
||||
import org.jetbrains.kotlinx.dataframe.io.useDecompressed
|
||||
import java.io.InputStream
|
||||
import java.math.BigDecimal
|
||||
import java.math.BigInteger
|
||||
import java.net.URL
|
||||
import java.nio.charset.Charset
|
||||
import kotlin.reflect.KType
|
||||
import kotlin.reflect.full.withNullability
|
||||
import kotlin.reflect.typeOf
|
||||
import kotlin.time.Duration
|
||||
import kotlin.time.Instant as StdlibInstant
|
||||
import kotlinx.datetime.Instant as DeprecatedInstant
|
||||
|
||||
/**
|
||||
* Implementation to read delimiter-separated data from an [InputStream] based on the Deephaven CSV library.
|
||||
*
|
||||
* @param inputStream Represents the file to read.
|
||||
* Use [charset] to specify the encoding.
|
||||
* @param charset The [character set][java.nio.charset.Charset] the input is encoded in.
|
||||
* Default: `null`
|
||||
*
|
||||
* If `null`, the Charset will be read from the BOM of the provided input,
|
||||
* defaulting to [UTF-8][Charsets.UTF_8] if no BOM is found.
|
||||
* @param delimiter The field delimiter character. The default is ',' for CSV, 't' for TSV.
|
||||
* @param header Optional column titles. Default: empty list.
|
||||
*
|
||||
* If non-empty, the data will be read with [header] as the column titles
|
||||
* (use [skipLines] if there's a header in the data).
|
||||
* If empty (default), the header will be read from the data.
|
||||
* @param colTypes The expected [ColType] per column name. Default: empty map, a.k.a. infer every column type.
|
||||
*
|
||||
* If supplied for a certain column name (inferred from data or given by [header]),
|
||||
* the parser will parse the column with the specified name as the specified type, else it will infer the type.
|
||||
*
|
||||
* e.g. `colTypes = `[mapOf][mapOf]`("colName" `[to][to]` `[ColType][ColType]`.`[Int][ColType.Int]`)`.
|
||||
* You can also set [ColType][ColType]`.`[DEFAULT][ColType.DEFAULT]` `[to][to]` `[ColType][ColType]`.X`
|
||||
* to set a _default_ column type, like [ColType.String].
|
||||
* @param skipLines The number of lines to skip before reading the header and data. Default: `0`.
|
||||
*
|
||||
* Useful for files with metadata, or comments at the beginning, or to give a custom [header].
|
||||
* @param readLines The maximum number of lines to read from the data. Default: `null`.
|
||||
*
|
||||
* If `null`, all lines will be read.
|
||||
* @param hasFixedWidthColumns Whether the data has fixed-width columns instead of a single delimiter.
|
||||
* Default: `false`.
|
||||
*
|
||||
* Fixed-width columns can occur, for instance, in multi-space delimited data, where the columns are separated
|
||||
* by multiple spaces instead of a single delimiter, so columns are visually aligned.
|
||||
* Column widths are determined by the header in the data (if present), or manually by setting
|
||||
* [fixedColumnWidths].
|
||||
* @param fixedColumnWidths The fixed column widths. Default: empty list.
|
||||
*
|
||||
* Requires [hasFixedWidthColumns]. If empty, the column widths will be determined by the header in the data
|
||||
* (if present), else, this manually sets the column widths.
|
||||
* The number of widths should match the number of columns.
|
||||
* @param parserOptions Optional [parsing options][ParserOptions] for columns initially read as [String].
|
||||
* Default, `null`.
|
||||
*
|
||||
* Can configure locale, date format, double parsing, skipping types, etc.
|
||||
*
|
||||
* If [parserOptions] or any of the arguments are `null`, the global parser configuration
|
||||
* ([DataFrame.parser][DataFrame.Companion.parser]) will be queried.
|
||||
*
|
||||
* The only exceptions are:
|
||||
* - [nullStrings][ParserOptions.nullStrings], which, if `null`,
|
||||
* will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS].
|
||||
* - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to
|
||||
* the given types or the global setting.
|
||||
* @param ignoreEmptyLines Whether to skip intermediate empty lines. Default: `false`.
|
||||
*
|
||||
* If `false`, empty lines will be interpreted as having _empty_ values if [allowMissingColumns].
|
||||
* @param allowMissingColumns Whether to allow rows with fewer columns than the header. Default: `true`.
|
||||
*
|
||||
* If `true`, rows that are too short will be interpreted as _empty_ values.
|
||||
* @param ignoreExcessColumns Whether to ignore rows with more columns than the header. Default: `true`.
|
||||
*
|
||||
* If `true`, rows that are too long will have those columns dropped.
|
||||
* @param quote The quote character. Default: `"`.
|
||||
*
|
||||
* Used when field- or line delimiters should be interpreted as literal text.
|
||||
*
|
||||
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
|
||||
* @param ignoreSurroundingSpaces Whether to ignore leading and trailing blanks around non-quoted fields.
|
||||
* Default: `true`.
|
||||
* @param trimInsideQuoted Whether to ignore leading and trailing blanks inside quoted fields.
|
||||
* Default: `false`.
|
||||
* @param parseParallel Whether to parse the data in parallel. Default: `true`.
|
||||
*
|
||||
* If `true`, the data will be read and parsed in parallel by the Deephaven parser.
|
||||
* This is usually faster but can be turned off for debugging.
|
||||
* @param compression The compression of the data.
|
||||
* Default: [Compression.None], unless detected otherwise from the input file or url.
|
||||
* @param adjustCsvSpecs Optional extra [CsvSpecs] configuration. Default: `{ it }`.
|
||||
*
|
||||
* Before instantiating the [CsvSpecs], the [CsvSpecs.Builder] will be passed to this lambda.
|
||||
* This will allow you to configure/overwrite any CSV / TSV parsing options.
|
||||
*/
|
||||
internal fun readDelimImpl(
|
||||
inputStream: InputStream,
|
||||
delimiter: Char,
|
||||
header: List<String>,
|
||||
charset: Charset?,
|
||||
hasFixedWidthColumns: Boolean,
|
||||
fixedColumnWidths: List<Int>,
|
||||
colTypes: Map<String, ColType>,
|
||||
skipLines: Long,
|
||||
readLines: Long?,
|
||||
parserOptions: ParserOptions?,
|
||||
ignoreEmptyLines: Boolean,
|
||||
allowMissingColumns: Boolean,
|
||||
ignoreExcessColumns: Boolean,
|
||||
quote: Char,
|
||||
ignoreSurroundingSpaces: Boolean,
|
||||
trimInsideQuoted: Boolean,
|
||||
parseParallel: Boolean,
|
||||
compression: Compression<*>,
|
||||
adjustCsvSpecs: AdjustCsvSpecs,
|
||||
): DataFrame<*> {
|
||||
// set up the csv specs
|
||||
val csvSpecs = with(CsvSpecs.builder()) {
|
||||
customDoubleParser(DataFrameCustomDoubleParser(parserOptions))
|
||||
|
||||
// use the given nullStrings if provided, else take the global ones + some extras
|
||||
val nullStrings = parserOptions?.nullStrings ?: (DataFrame.parser.nulls + DEFAULT_DELIM_NULL_STRINGS)
|
||||
nullValueLiterals(nullStrings)
|
||||
headerLegalizer(::legalizeHeader)
|
||||
numRows(readLines ?: Long.MAX_VALUE)
|
||||
ignoreEmptyLines(ignoreEmptyLines)
|
||||
allowMissingColumns(allowMissingColumns)
|
||||
ignoreExcessColumns(ignoreExcessColumns)
|
||||
if (!hasFixedWidthColumns) delimiter(delimiter)
|
||||
quote(quote)
|
||||
ignoreSurroundingSpaces(ignoreSurroundingSpaces)
|
||||
trim(trimInsideQuoted)
|
||||
concurrent(parseParallel)
|
||||
header(header)
|
||||
hasFixedWidthColumns(hasFixedWidthColumns)
|
||||
if (hasFixedWidthColumns && fixedColumnWidths.isNotEmpty()) fixedColumnWidths(fixedColumnWidths)
|
||||
skipLines(takeHeaderFromCsv = header.isEmpty(), skipLines = skipLines)
|
||||
parsers(parserOptions, colTypes)
|
||||
|
||||
adjustCsvSpecs(this, this)
|
||||
}.build()
|
||||
|
||||
val csvReaderResult = inputStream.useDecompressed(compression) { decompressedInputStream ->
|
||||
// read the csv
|
||||
try {
|
||||
val deBommedInputString = decompressedInputStream.skippingBomCharacters()
|
||||
|
||||
// choose charset like: provided? -> from BOM? -> UTF-8
|
||||
val streamCharset = charset
|
||||
?: (deBommedInputString as? BOMInputStream)?.bom?.let { Charset.forName(it.charsetName) }
|
||||
?: Charsets.UTF_8
|
||||
|
||||
@Suppress("ktlint:standard:comment-wrapping")
|
||||
CsvReader.read(
|
||||
/* specs = */ csvSpecs,
|
||||
/* stream = */ deBommedInputString,
|
||||
/* streamCharset = */ streamCharset,
|
||||
/* sinkFactory = */ ListSink.SINK_FACTORY,
|
||||
)
|
||||
} catch (e: CsvReaderException) {
|
||||
// catch case when the file is empty and header needs to be inferred from it.
|
||||
if (e.message ==
|
||||
"Can't proceed because hasHeaderRow is set but input file is empty or shorter than skipHeaderRows"
|
||||
) {
|
||||
return@readDelimImpl DataFrame.empty()
|
||||
}
|
||||
throw IllegalStateException(
|
||||
"Could not read delimiter-separated data: CsvReaderException: ${e.message}: ${e.cause?.message ?: ""}",
|
||||
e,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
val defaultColType = colTypes[ColType.DEFAULT]
|
||||
|
||||
// convert each ResultColumn to a DataColumn
|
||||
val cols = csvReaderResult.map {
|
||||
it.toDataColumn(
|
||||
parserOptions = parserOptions,
|
||||
desiredColType = colTypes[it.name()] ?: defaultColType,
|
||||
)
|
||||
}
|
||||
|
||||
return dataFrameOf(cols)
|
||||
}
|
||||
|
||||
@Suppress("UNCHECKED_CAST")
|
||||
private fun CsvReader.ResultColumn.toDataColumn(
|
||||
parserOptions: ParserOptions?,
|
||||
desiredColType: ColType?,
|
||||
): DataColumn<*> {
|
||||
val listSink = data()!! as ListSink
|
||||
val columnData = listSink.data
|
||||
val dataType = listSink.dataType
|
||||
val hasNulls = listSink.hasNulls
|
||||
val type = dataType().toKType().withNullability(hasNulls)
|
||||
|
||||
val column = DataColumn.createValueColumn(
|
||||
name = name(),
|
||||
values = columnData,
|
||||
type = type,
|
||||
)
|
||||
if (dataType != STRING) return column
|
||||
|
||||
// attempt to perform additional parsing if necessary, will remain String if it fails
|
||||
column as ValueColumn<String?>
|
||||
|
||||
return when {
|
||||
desiredColType != null ->
|
||||
column.convertTo(
|
||||
newType = desiredColType.toKType().withNullability(true),
|
||||
parserOptions = parserOptions,
|
||||
)
|
||||
|
||||
else -> {
|
||||
val givenSkipTypes = parserOptions?.skipTypes ?: DataFrame.parser.skipTypes
|
||||
// no need to check for types that Deephaven already parses, skip those too
|
||||
val adjustedSkipTypes = givenSkipTypes + typesDeephavenAlreadyParses
|
||||
val adjustedParserOptions = (parserOptions ?: ParserOptions())
|
||||
.copy(skipTypes = adjustedSkipTypes)
|
||||
|
||||
column.tryParse(adjustedParserOptions)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun DataType?.toKType(): KType =
|
||||
when (this) {
|
||||
BOOLEAN_AS_BYTE -> typeOf<Boolean>()
|
||||
|
||||
// unused in Parsers.DEFAULT
|
||||
BYTE -> typeOf<Byte>()
|
||||
|
||||
// unused in Parsers.DEFAULT
|
||||
SHORT -> typeOf<Short>()
|
||||
|
||||
INT -> typeOf<Int>()
|
||||
|
||||
LONG -> typeOf<Long>()
|
||||
|
||||
// unused in Parsers.COMPLETE and Parsers.DEFAULT
|
||||
FLOAT -> typeOf<Float>()
|
||||
|
||||
DOUBLE -> typeOf<Double>()
|
||||
|
||||
DATETIME_AS_LONG -> typeOf<LocalDateTime>()
|
||||
|
||||
CHAR -> typeOf<Char>()
|
||||
|
||||
STRING -> typeOf<String>()
|
||||
|
||||
// unused in Parsers.COMPLETE and Parsers.DEFAULT
|
||||
TIMESTAMP_AS_LONG -> typeOf<LocalDateTime>()
|
||||
|
||||
DataType.CUSTOM -> error("custom data type")
|
||||
|
||||
null -> error("null data type")
|
||||
}
|
||||
|
||||
private fun legalizeHeader(header: Array<String>): Array<String> {
|
||||
val generator = ColumnNameGenerator()
|
||||
return header.map { generator.addUnique(it) }.toTypedArray()
|
||||
}
|
||||
|
||||
private fun CsvSpecs.Builder.skipLines(takeHeaderFromCsv: Boolean, skipLines: Long): CsvSpecs.Builder =
|
||||
if (takeHeaderFromCsv) {
|
||||
skipHeaderRows(skipLines)
|
||||
} else {
|
||||
skipRows(skipLines)
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the correct parsers for the csv, based on [colTypes] and [ParserOptions.skipTypes].
|
||||
* If [ColType.DEFAULT] is present, it sets the default parser.
|
||||
*
|
||||
* Logic overview:
|
||||
*
|
||||
* - if no [colTypes] are given
|
||||
* - let deephaven use all its [default parsers][Parsers.DEFAULT] minus [Parsers.DATETIME]
|
||||
* - subtract parsers of [skipTypes][ParserOptions.skipTypes] if those are supplied
|
||||
* - if [colTypes] are supplied
|
||||
* - if [ColType.DEFAULT] is among the values
|
||||
* - set the parser for each supplied column+colType
|
||||
* - let deephaven use _only_ the parser given as [ColType.DEFAULT] type
|
||||
* - if [ColType.DEFAULT] is not among the values
|
||||
* - set the parser for each supplied column+coltype
|
||||
* - let deephaven use all its [default parsers][Parsers.DEFAULT] minus [Parsers.DATETIME]
|
||||
* - subtract parsers of [skipTypes][ParserOptions.skipTypes] if those are supplied
|
||||
*
|
||||
* We will not use [Deephaven's DateTime parser][Parsers.DATETIME].
|
||||
* This is done to avoid different behavior compared to [DataFrame.parse];
|
||||
* Deephaven parses [Instant] as [LocalDateTime]. [Issue #1047](https://github.com/Kotlin/dataframe/issues/1047)
|
||||
*
|
||||
* Note that `skipTypes` will never skip a type explicitly set by `colTypes`.
|
||||
* This is intended.
|
||||
*/
|
||||
private fun CsvSpecs.Builder.parsers(parserOptions: ParserOptions?, colTypes: Map<String, ColType>): CsvSpecs.Builder {
|
||||
for ((colName, colType) in colTypes) {
|
||||
if (colName == ColType.DEFAULT) continue
|
||||
putParserForName(colName, colType.toCsvParser())
|
||||
}
|
||||
// BOOLEAN, INT, LONG, DOUBLE, CHAR, STRING
|
||||
val defaultParsers = Parsers.DEFAULT - Parsers.DATETIME
|
||||
val skipTypes = parserOptions?.skipTypes ?: DataFrame.parser.skipTypes
|
||||
val parsersToUse = when {
|
||||
ColType.DEFAULT in colTypes ->
|
||||
listOf(colTypes[ColType.DEFAULT]!!.toCsvParser(), Parsers.STRING)
|
||||
|
||||
skipTypes.isNotEmpty() -> {
|
||||
val parsersToSkip = skipTypes
|
||||
.mapNotNull { it.toColType().toCsvParserOrNull() }
|
||||
defaultParsers.toSet() - parsersToSkip.toSet()
|
||||
}
|
||||
|
||||
else -> defaultParsers
|
||||
}
|
||||
parsers(parsersToUse)
|
||||
return this
|
||||
}
|
||||
|
||||
private fun CsvSpecs.Builder.header(header: List<String>): CsvSpecs.Builder =
|
||||
if (header.isEmpty()) {
|
||||
// take header from csv
|
||||
hasHeaderRow(true)
|
||||
} else {
|
||||
hasHeaderRow(false)
|
||||
.headers(header)
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a [ColType] to a [Parser] from the Deephaven CSV library.
|
||||
* If no direct [Parser] exists, it returns `null`.
|
||||
*/
|
||||
internal fun ColType.toCsvParserOrNull(): Parser<*>? =
|
||||
when (this) {
|
||||
ColType.Int -> Parsers.INT
|
||||
ColType.Long -> Parsers.LONG
|
||||
ColType.Double -> Parsers.DOUBLE
|
||||
ColType.Char -> Parsers.CHAR
|
||||
ColType.Boolean -> Parsers.BOOLEAN
|
||||
ColType.String -> Parsers.STRING
|
||||
else -> null
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a [ColType] to a [Parser] from the Deephaven CSV library.
|
||||
* If no direct [Parser] exists, it defaults to [Parsers.STRING] so that [DataFrame.parse] can handle it.
|
||||
*/
|
||||
internal fun ColType.toCsvParser(): Parser<*> = toCsvParserOrNull() ?: Parsers.STRING
|
||||
|
||||
internal fun KType.toColType(): ColType =
|
||||
when (this.withNullability(false)) {
|
||||
typeOf<Int>() -> ColType.Int
|
||||
typeOf<Long>() -> ColType.Long
|
||||
typeOf<Double>() -> ColType.Double
|
||||
typeOf<Boolean>() -> ColType.Boolean
|
||||
typeOf<BigDecimal>() -> ColType.BigDecimal
|
||||
typeOf<BigInteger>() -> ColType.BigInteger
|
||||
typeOf<LocalDate>() -> ColType.LocalDate
|
||||
typeOf<LocalTime>() -> ColType.LocalTime
|
||||
typeOf<LocalDateTime>() -> ColType.LocalDateTime
|
||||
typeOf<String>() -> ColType.String
|
||||
typeOf<DeprecatedInstant>() -> ColType.DeprecatedInstant
|
||||
typeOf<StdlibInstant>() -> ColType.StdlibInstant
|
||||
typeOf<Duration>() -> ColType.Duration
|
||||
typeOf<URL>() -> ColType.Url
|
||||
typeOf<DataFrame<*>>() -> ColType.JsonArray
|
||||
typeOf<DataRow<*>>() -> ColType.JsonObject
|
||||
typeOf<Char>() -> ColType.Char
|
||||
else -> ColType.String
|
||||
}
|
||||
|
||||
/**
|
||||
* Types that Deephaven already parses, so we can skip them when
|
||||
* defaulting to DataFrame's String parsers.
|
||||
*
|
||||
* [LocalDateTime] and [java.time.LocalDateTime] are not included because Deephaven cannot recognize all formats.
|
||||
*/
|
||||
internal val typesDeephavenAlreadyParses: Set<KType> =
|
||||
setOf(
|
||||
typeOf<Int>(),
|
||||
typeOf<Long>(),
|
||||
typeOf<Double>(),
|
||||
typeOf<Char>(),
|
||||
typeOf<Boolean>(),
|
||||
)
|
||||
+112
@@ -0,0 +1,112 @@
|
||||
@file:JvmName("WriteDelimDeephavenKt")
|
||||
|
||||
package org.jetbrains.kotlinx.dataframe.impl.io
|
||||
|
||||
import org.apache.commons.csv.CSVFormat
|
||||
import org.jetbrains.kotlinx.dataframe.AnyFrame
|
||||
import org.jetbrains.kotlinx.dataframe.AnyRow
|
||||
import org.jetbrains.kotlinx.dataframe.api.forEach
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CSV_DELIMITER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.WRITER_WRITE
|
||||
import org.jetbrains.kotlinx.dataframe.io.AdjustCSVFormat
|
||||
import org.jetbrains.kotlinx.dataframe.io.QuoteMode
|
||||
import org.jetbrains.kotlinx.dataframe.io.toJson
|
||||
import org.apache.commons.csv.QuoteMode as ApacheQuoteMode
|
||||
|
||||
/**
|
||||
* Writes [df] to [writer] in a delimiter-separated format.
|
||||
*
|
||||
* @param df The data to write.
|
||||
* @param writer The [Appendable] to write to.
|
||||
* @param delimiter The field delimiter character. Default: ','.
|
||||
*
|
||||
* Ignored if [hasFixedWidthColumns] is `true`.
|
||||
* @param includeHeader Whether to include the header in the output. Default: `true`.
|
||||
* @param quote The quote character. Default: `"`.
|
||||
*
|
||||
* Used when field- or line delimiters should be interpreted as literal text.
|
||||
*
|
||||
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
|
||||
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
|
||||
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
|
||||
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
|
||||
* Default: `null`. This will double-quote the value.
|
||||
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
|
||||
* Default: `'#'`.
|
||||
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
|
||||
* Default: empty list.
|
||||
* @param recordSeparator The character that separates records in a CSV / TSV file.
|
||||
* Default: `'\n'`, a Unix-newline.
|
||||
* @param adjustCsvFormat Optional extra [CSVFormat] configuration. Default: `{ it }`.
|
||||
*
|
||||
* Before instantiating the [CSVFormat], the [CSVFormat.Builder] will be passed to this lambda.
|
||||
* This will allow you to configure/overwrite any CSV / TSV writing options.
|
||||
*/
|
||||
internal fun writeDelimImpl(
|
||||
df: AnyFrame,
|
||||
writer: Appendable,
|
||||
delimiter: Char,
|
||||
includeHeader: Boolean,
|
||||
quote: Char?,
|
||||
quoteMode: QuoteMode,
|
||||
escapeChar: Char?,
|
||||
commentChar: Char?,
|
||||
headerComments: List<String>,
|
||||
recordSeparator: String,
|
||||
adjustCsvFormat: AdjustCSVFormat,
|
||||
) {
|
||||
// setup CSV format
|
||||
val format = with(CSVFormat.Builder.create(CSVFormat.DEFAULT)) {
|
||||
setDelimiter(delimiter)
|
||||
setQuote(quote)
|
||||
setSkipHeaderRecord(!includeHeader)
|
||||
setQuoteMode(quoteMode.toApache())
|
||||
setRecordSeparator(recordSeparator)
|
||||
setEscape(escapeChar)
|
||||
setCommentMarker(commentChar)
|
||||
setHeaderComments(*headerComments.toTypedArray())
|
||||
}.let { adjustCsvFormat(it, it) }
|
||||
.get()
|
||||
|
||||
// let the format handle the writing, only converting AnyRow and AnyFrame to JSON
|
||||
format.print(writer).use { printer ->
|
||||
if (includeHeader) {
|
||||
printer.printRecord(df.columnNames())
|
||||
}
|
||||
df.forEach {
|
||||
val values = it.values().map {
|
||||
when (it) {
|
||||
is AnyRow -> try {
|
||||
it.toJson()
|
||||
} catch (_: NoClassDefFoundError) {
|
||||
error(
|
||||
"Encountered a DataRow value when writing to csv/tsv/delim. It must be serialized to JSON, requiring the 'dataframe-json' dependency.",
|
||||
)
|
||||
}
|
||||
|
||||
is AnyFrame -> try {
|
||||
it.toJson()
|
||||
} catch (_: NoClassDefFoundError) {
|
||||
error(
|
||||
"Encountered a DataFrame value when writing to csv/tsv/delim. It must be serialized to JSON, requiring the 'dataframe-json' dependency.",
|
||||
)
|
||||
}
|
||||
|
||||
else -> it
|
||||
}
|
||||
}
|
||||
printer.printRecord(values)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
internal fun QuoteMode.toApache(): ApacheQuoteMode =
|
||||
when (this) {
|
||||
QuoteMode.ALL -> ApacheQuoteMode.ALL
|
||||
QuoteMode.MINIMAL -> ApacheQuoteMode.MINIMAL
|
||||
QuoteMode.NON_NUMERIC -> ApacheQuoteMode.NON_NUMERIC
|
||||
QuoteMode.NONE -> ApacheQuoteMode.NONE
|
||||
QuoteMode.ALL_NON_NULL -> ApacheQuoteMode.ALL_NON_NULL
|
||||
}
|
||||
+26
@@ -0,0 +1,26 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
/** Defines quoting behavior. */
|
||||
public enum class QuoteMode {
|
||||
|
||||
/** Quotes all fields. */
|
||||
ALL,
|
||||
|
||||
/** Quotes all non-null fields. */
|
||||
ALL_NON_NULL,
|
||||
|
||||
/**
|
||||
* Quotes fields that contain special characters such as a field delimiter, quote character, or any of the
|
||||
* characters in the line separator string.
|
||||
*/
|
||||
MINIMAL,
|
||||
|
||||
/** Quotes all non-numeric fields. */
|
||||
NON_NUMERIC,
|
||||
|
||||
/**
|
||||
* Never quotes fields. When the delimiter occurs in data, the printer prefixes it with the escape character. If the
|
||||
* escape character is not set, format validation throws an exception.
|
||||
*/
|
||||
NONE,
|
||||
}
|
||||
Vendored
+39
@@ -0,0 +1,39 @@
|
||||
@file:JvmName("CsvDeephavenKt")
|
||||
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.codeGen.AbstractDefaultReadMethod
|
||||
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams
|
||||
import java.io.File
|
||||
import java.io.InputStream
|
||||
import java.nio.file.Path
|
||||
import kotlin.reflect.typeOf
|
||||
|
||||
public class CsvDeephaven(private val delimiter: Char = DelimParams.CSV_DELIMITER) : SupportedDataFrameFormat {
|
||||
override fun readDataFrame(stream: InputStream, header: List<String>): DataFrame<*> =
|
||||
DataFrame.readCsv(inputStream = stream, header = header, delimiter = delimiter)
|
||||
|
||||
override fun readDataFrame(file: File, header: List<String>): DataFrame<*> =
|
||||
DataFrame.readCsv(file = file, header = header, delimiter = delimiter)
|
||||
|
||||
override fun readDataFrame(path: Path, header: List<String>): DataFrame<*> =
|
||||
DataFrame.readCsv(path = path, delimiter = delimiter, header = header)
|
||||
|
||||
override fun acceptsExtension(ext: String): Boolean = ext == "csv"
|
||||
|
||||
override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough
|
||||
|
||||
override val testOrder: Int = 20_000
|
||||
|
||||
override fun createDefaultReadMethod(pathRepresentation: String?): DefaultReadDfMethod {
|
||||
val arguments = MethodArguments().add("delimiter", typeOf<Char>(), "'%L'", delimiter)
|
||||
return DefaultReadCsvMethod(pathRepresentation, arguments)
|
||||
}
|
||||
}
|
||||
|
||||
private const val READ_CSV = "readCsv"
|
||||
|
||||
internal class DefaultReadCsvMethod(path: String?, arguments: MethodArguments) :
|
||||
AbstractDefaultReadMethod(path, arguments, READ_CSV)
|
||||
+1053
File diff suppressed because it is too large
Load Diff
+175
@@ -0,0 +1,175 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonReadDelimDocs
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_SPECS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ALLOW_MISSING_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COL_TYPES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CSV_DELIMITER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FIXED_COLUMN_WIDTHS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HAS_FIXED_WIDTH_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EMPTY_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EXCESS_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_SURROUNDING_SPACES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSER_OPTIONS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSE_PARALLEL
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.READ_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.SKIP_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TEXT_READ
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TRIM_INSIDE_QUOTED
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl
|
||||
|
||||
/**
|
||||
* ### Read CSV String to [DataFrame]
|
||||
*
|
||||
* Reads any CSV [String] to a [DataFrame][DataFrame].
|
||||
*
|
||||
* Parameters you can use to customize the reading process include, for instance, [delimiter],
|
||||
* [header], [colTypes], [readLines], and [parserOptions].
|
||||
* See the param list below for all settings.
|
||||
*
|
||||
* The integration is built upon [Deephaven CSV](https://github.com/deephaven/deephaven-csv).
|
||||
*
|
||||
* ##### Similar Functions
|
||||
* With the overloads of [DataFrame.readCsv][readCsv]`()`, you can read any CSV by [File][File],
|
||||
* [Path][java.nio.file.Path], [URL][URL], or [InputStream][InputStream].
|
||||
* Reading by file path or URL can also be done by passing a [String].
|
||||
*
|
||||
* For example, [DataFrame.readCsv][readCsv]`("input.csv")` or with some options:
|
||||
*
|
||||
* [DataFrame.readCsv][readCsv]`(`
|
||||
*
|
||||
* `file = `[File][File]`("input.csv"),`
|
||||
*
|
||||
* `parserOptions = `[ParserOptions][org.jetbrains.kotlinx.dataframe.api.ParserOptions]`(locale = `[Locale][java.util.Locale]`.`[US][java.util.Locale.US]`),`
|
||||
*
|
||||
* `colTypes = `[mapOf][mapOf]`("a" `[to][to]` `[ColType][ColType]`.`[Int][ColType.Int]`, `[ColType][ColType]`.`[DEFAULT][ColType.DEFAULT]` `[to][to]` `[ColType][ColType]`.`[String][ColType.String]`),`
|
||||
*
|
||||
* `readLines = 1000L,`
|
||||
*
|
||||
* `)`
|
||||
*
|
||||
* ZIP (.zip) or GZIP (.gz) files are supported by default. [compression] is automatically detected.
|
||||
*
|
||||
* You can also read "raw" CSV data from a [String] like this:
|
||||
*
|
||||
* [DataFrame.readCsvStr][readCsvStr]`("a,b,c", delimiter = ",")`
|
||||
*
|
||||
* @param text The raw data to read in the form of a [String].
|
||||
* @param delimiter The field delimiter character. Default: ','.
|
||||
*
|
||||
* Ignored if [hasFixedWidthColumns] is `true`.
|
||||
* @param charset The [character set][java.nio.charset.Charset] the input is encoded in.
|
||||
* Default: `null`
|
||||
*
|
||||
* If `null`, the Charset will be read from the BOM of the provided input,
|
||||
* defaulting to [UTF-8][Charsets.UTF_8] if no BOM is found.
|
||||
* @param header Optional column titles. Default: empty list.
|
||||
*
|
||||
* If non-empty, the data will be read with [header] as the column titles
|
||||
* (use [skipLines] if there's a header in the data).
|
||||
* If empty (default), the header will be read from the data.
|
||||
* @param hasFixedWidthColumns Whether the data has fixed-width columns instead of a single delimiter.
|
||||
* Default: `false`.
|
||||
*
|
||||
* Fixed-width columns can occur, for instance, in multi-space delimited data, where the columns are separated
|
||||
* by multiple spaces instead of a single delimiter, so columns are visually aligned.
|
||||
* Column widths are determined by the header in the data (if present), or manually by setting
|
||||
* [fixedColumnWidths].
|
||||
* @param fixedColumnWidths The fixed column widths. Default: empty list.
|
||||
*
|
||||
* Requires [hasFixedWidthColumns]. If empty, the column widths will be determined by the header in the data
|
||||
* (if present), else, this manually sets the column widths.
|
||||
* The number of widths should match the number of columns.
|
||||
* @param colTypes The expected [ColType] per column name. Default: empty map, a.k.a. infer every column type.
|
||||
*
|
||||
* If supplied for a certain column name (inferred from data or given by [header]),
|
||||
* the parser will parse the column with the specified name as the specified type, else it will infer the type.
|
||||
*
|
||||
* e.g. `colTypes = `[mapOf][mapOf]`("colName" `[to][to]` `[ColType][ColType]`.`[Int][ColType.Int]`)`.
|
||||
* You can also set [ColType][ColType]`.`[DEFAULT][ColType.DEFAULT]` `[to][to]` `[ColType][ColType]`.X`
|
||||
* to set a _default_ column type, like [ColType.String].
|
||||
* @param skipLines The number of lines to skip before reading the header and data. Default: `0`.
|
||||
*
|
||||
* Useful for files with metadata, or comments at the beginning, or to give a custom [header].
|
||||
* @param readLines The maximum number of lines to read from the data. Default: `null`.
|
||||
*
|
||||
* If `null`, all lines will be read.
|
||||
* @param parserOptions Optional [parsing options][ParserOptions] for columns initially read as [String].
|
||||
* Default, `null`.
|
||||
*
|
||||
* Can configure locale, date format, double parsing, skipping types, etc.
|
||||
*
|
||||
* If [parserOptions] or any of the arguments are `null`, the global parser configuration
|
||||
* ([DataFrame.parser][DataFrame.Companion.parser]) will be queried.
|
||||
*
|
||||
* The only exceptions are:
|
||||
* - [nullStrings][ParserOptions.nullStrings], which, if `null`,
|
||||
* will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS].
|
||||
* - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to
|
||||
* the given types or the global setting.
|
||||
* @param ignoreEmptyLines Whether to skip intermediate empty lines. Default: `false`.
|
||||
*
|
||||
* If `false`, empty lines will be interpreted as having _empty_ values if [allowMissingColumns].
|
||||
* @param allowMissingColumns Whether to allow rows with fewer columns than the header. Default: `true`.
|
||||
*
|
||||
* If `true`, rows that are too short will be interpreted as _empty_ values.
|
||||
* @param ignoreExcessColumns Whether to ignore rows with more columns than the header. Default: `true`.
|
||||
*
|
||||
* If `true`, rows that are too long will have those columns dropped.
|
||||
* @param quote The quote character. Default: `"`.
|
||||
*
|
||||
* Used when field- or line delimiters should be interpreted as literal text.
|
||||
*
|
||||
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
|
||||
* @param ignoreSurroundingSpaces Whether to ignore leading and trailing blanks around non-quoted fields.
|
||||
* Default: `true`.
|
||||
* @param trimInsideQuoted Whether to ignore leading and trailing blanks inside quoted fields.
|
||||
* Default: `false`.
|
||||
* @param parseParallel Whether to parse the data in parallel. Default: `true`.
|
||||
*
|
||||
* If `true`, the data will be read and parsed in parallel by the Deephaven parser.
|
||||
* This is usually faster but can be turned off for debugging.
|
||||
*/
|
||||
public fun DataFrame.Companion.readCsvStr(
|
||||
text: String,
|
||||
delimiter: Char = CSV_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
): DataFrame<*> =
|
||||
readDelimImpl(
|
||||
inputStream = text.byteInputStream(),
|
||||
charset = Charsets.UTF_8,
|
||||
delimiter = delimiter,
|
||||
header = header,
|
||||
hasFixedWidthColumns = hasFixedWidthColumns,
|
||||
fixedColumnWidths = fixedColumnWidths,
|
||||
compression = Compression.None, // of course
|
||||
colTypes = colTypes,
|
||||
skipLines = skipLines,
|
||||
readLines = readLines,
|
||||
parserOptions = parserOptions,
|
||||
ignoreEmptyLines = ignoreEmptyLines,
|
||||
allowMissingColumns = allowMissingColumns,
|
||||
ignoreExcessColumns = ignoreExcessColumns,
|
||||
quote = quote,
|
||||
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
|
||||
trimInsideQuoted = trimInsideQuoted,
|
||||
parseParallel = parseParallel,
|
||||
adjustCsvSpecs = ADJUST_CSV_SPECS,
|
||||
)
|
||||
+1059
File diff suppressed because it is too large
Load Diff
+175
@@ -0,0 +1,175 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonReadDelimDocs
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_SPECS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ALLOW_MISSING_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COL_TYPES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.DELIM_DELIMITER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FIXED_COLUMN_WIDTHS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HAS_FIXED_WIDTH_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EMPTY_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EXCESS_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_SURROUNDING_SPACES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSER_OPTIONS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSE_PARALLEL
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.READ_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.SKIP_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TEXT_READ
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TRIM_INSIDE_QUOTED
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl
|
||||
|
||||
/**
|
||||
* ### Read Delimiter-Separated Text String to [DataFrame]
|
||||
*
|
||||
* Reads any delimiter-separated text [String] to a [DataFrame][DataFrame].
|
||||
*
|
||||
* Parameters you can use to customize the reading process include, for instance, [delimiter],
|
||||
* [header], [colTypes], [readLines], and [parserOptions].
|
||||
* See the param list below for all settings.
|
||||
*
|
||||
* The integration is built upon [Deephaven CSV](https://github.com/deephaven/deephaven-csv).
|
||||
*
|
||||
* ##### Similar Functions
|
||||
* With the overloads of [DataFrame.readDelim][readDelim]`()`, you can read any delimiter-separated text by [File][File],
|
||||
* [Path][java.nio.file.Path], [URL][URL], or [InputStream][InputStream].
|
||||
* Reading by file path or URL can also be done by passing a [String].
|
||||
*
|
||||
* For example, [DataFrame.readDelim][readDelim]`("input.txt")` or with some options:
|
||||
*
|
||||
* [DataFrame.readDelim][readDelim]`(`
|
||||
*
|
||||
* `file = `[File][File]`("input.txt"),`
|
||||
*
|
||||
* `parserOptions = `[ParserOptions][org.jetbrains.kotlinx.dataframe.api.ParserOptions]`(locale = `[Locale][java.util.Locale]`.`[US][java.util.Locale.US]`),`
|
||||
*
|
||||
* `colTypes = `[mapOf][mapOf]`("a" `[to][to]` `[ColType][ColType]`.`[Int][ColType.Int]`, `[ColType][ColType]`.`[DEFAULT][ColType.DEFAULT]` `[to][to]` `[ColType][ColType]`.`[String][ColType.String]`),`
|
||||
*
|
||||
* `readLines = 1000L,`
|
||||
*
|
||||
* `)`
|
||||
*
|
||||
* ZIP (.zip) or GZIP (.gz) files are supported by default. [compression] is automatically detected.
|
||||
*
|
||||
* You can also read "raw" delimiter-separated text data from a [String] like this:
|
||||
*
|
||||
* [DataFrame.readDelimStr][readDelimStr]`("a,b,c", delimiter = ",")`
|
||||
*
|
||||
* @param text The raw data to read in the form of a [String].
|
||||
* @param delimiter The field delimiter character. Default: ','.
|
||||
*
|
||||
* Ignored if [hasFixedWidthColumns] is `true`.
|
||||
* @param charset The [character set][java.nio.charset.Charset] the input is encoded in.
|
||||
* Default: `null`
|
||||
*
|
||||
* If `null`, the Charset will be read from the BOM of the provided input,
|
||||
* defaulting to [UTF-8][Charsets.UTF_8] if no BOM is found.
|
||||
* @param header Optional column titles. Default: empty list.
|
||||
*
|
||||
* If non-empty, the data will be read with [header] as the column titles
|
||||
* (use [skipLines] if there's a header in the data).
|
||||
* If empty (default), the header will be read from the data.
|
||||
* @param hasFixedWidthColumns Whether the data has fixed-width columns instead of a single delimiter.
|
||||
* Default: `false`.
|
||||
*
|
||||
* Fixed-width columns can occur, for instance, in multi-space delimited data, where the columns are separated
|
||||
* by multiple spaces instead of a single delimiter, so columns are visually aligned.
|
||||
* Column widths are determined by the header in the data (if present), or manually by setting
|
||||
* [fixedColumnWidths].
|
||||
* @param fixedColumnWidths The fixed column widths. Default: empty list.
|
||||
*
|
||||
* Requires [hasFixedWidthColumns]. If empty, the column widths will be determined by the header in the data
|
||||
* (if present), else, this manually sets the column widths.
|
||||
* The number of widths should match the number of columns.
|
||||
* @param colTypes The expected [ColType] per column name. Default: empty map, a.k.a. infer every column type.
|
||||
*
|
||||
* If supplied for a certain column name (inferred from data or given by [header]),
|
||||
* the parser will parse the column with the specified name as the specified type, else it will infer the type.
|
||||
*
|
||||
* e.g. `colTypes = `[mapOf][mapOf]`("colName" `[to][to]` `[ColType][ColType]`.`[Int][ColType.Int]`)`.
|
||||
* You can also set [ColType][ColType]`.`[DEFAULT][ColType.DEFAULT]` `[to][to]` `[ColType][ColType]`.X`
|
||||
* to set a _default_ column type, like [ColType.String].
|
||||
* @param skipLines The number of lines to skip before reading the header and data. Default: `0`.
|
||||
*
|
||||
* Useful for files with metadata, or comments at the beginning, or to give a custom [header].
|
||||
* @param readLines The maximum number of lines to read from the data. Default: `null`.
|
||||
*
|
||||
* If `null`, all lines will be read.
|
||||
* @param parserOptions Optional [parsing options][ParserOptions] for columns initially read as [String].
|
||||
* Default, `null`.
|
||||
*
|
||||
* Can configure locale, date format, double parsing, skipping types, etc.
|
||||
*
|
||||
* If [parserOptions] or any of the arguments are `null`, the global parser configuration
|
||||
* ([DataFrame.parser][DataFrame.Companion.parser]) will be queried.
|
||||
*
|
||||
* The only exceptions are:
|
||||
* - [nullStrings][ParserOptions.nullStrings], which, if `null`,
|
||||
* will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS].
|
||||
* - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to
|
||||
* the given types or the global setting.
|
||||
* @param ignoreEmptyLines Whether to skip intermediate empty lines. Default: `false`.
|
||||
*
|
||||
* If `false`, empty lines will be interpreted as having _empty_ values if [allowMissingColumns].
|
||||
* @param allowMissingColumns Whether to allow rows with fewer columns than the header. Default: `true`.
|
||||
*
|
||||
* If `true`, rows that are too short will be interpreted as _empty_ values.
|
||||
* @param ignoreExcessColumns Whether to ignore rows with more columns than the header. Default: `true`.
|
||||
*
|
||||
* If `true`, rows that are too long will have those columns dropped.
|
||||
* @param quote The quote character. Default: `"`.
|
||||
*
|
||||
* Used when field- or line delimiters should be interpreted as literal text.
|
||||
*
|
||||
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
|
||||
* @param ignoreSurroundingSpaces Whether to ignore leading and trailing blanks around non-quoted fields.
|
||||
* Default: `true`.
|
||||
* @param trimInsideQuoted Whether to ignore leading and trailing blanks inside quoted fields.
|
||||
* Default: `false`.
|
||||
* @param parseParallel Whether to parse the data in parallel. Default: `true`.
|
||||
*
|
||||
* If `true`, the data will be read and parsed in parallel by the Deephaven parser.
|
||||
* This is usually faster but can be turned off for debugging.
|
||||
*/
|
||||
public fun DataFrame.Companion.readDelimStr(
|
||||
text: String,
|
||||
delimiter: Char = DELIM_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
): DataFrame<*> =
|
||||
readDelimImpl(
|
||||
inputStream = text.byteInputStream(),
|
||||
charset = Charsets.UTF_8,
|
||||
delimiter = delimiter,
|
||||
header = header,
|
||||
hasFixedWidthColumns = hasFixedWidthColumns,
|
||||
fixedColumnWidths = fixedColumnWidths,
|
||||
compression = Compression.None, // of course
|
||||
colTypes = colTypes,
|
||||
skipLines = skipLines,
|
||||
readLines = readLines,
|
||||
parserOptions = parserOptions,
|
||||
ignoreEmptyLines = ignoreEmptyLines,
|
||||
allowMissingColumns = allowMissingColumns,
|
||||
ignoreExcessColumns = ignoreExcessColumns,
|
||||
quote = quote,
|
||||
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
|
||||
trimInsideQuoted = trimInsideQuoted,
|
||||
parseParallel = parseParallel,
|
||||
adjustCsvSpecs = ADJUST_CSV_SPECS,
|
||||
)
|
||||
+1053
File diff suppressed because it is too large
Load Diff
+175
@@ -0,0 +1,175 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonReadDelimDocs
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_SPECS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ALLOW_MISSING_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COL_TYPES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FIXED_COLUMN_WIDTHS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HAS_FIXED_WIDTH_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EMPTY_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EXCESS_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_SURROUNDING_SPACES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSER_OPTIONS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSE_PARALLEL
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.READ_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.SKIP_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TEXT_READ
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TRIM_INSIDE_QUOTED
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TSV_DELIMITER
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl
|
||||
|
||||
/**
|
||||
* ### Read TSV String to [DataFrame]
|
||||
*
|
||||
* Reads any TSV [String] to a [DataFrame][DataFrame].
|
||||
*
|
||||
* Parameters you can use to customize the reading process include, for instance, [delimiter],
|
||||
* [header], [colTypes], [readLines], and [parserOptions].
|
||||
* See the param list below for all settings.
|
||||
*
|
||||
* The integration is built upon [Deephaven CSV](https://github.com/deephaven/deephaven-csv).
|
||||
*
|
||||
* ##### Similar Functions
|
||||
* With the overloads of [DataFrame.readTsv][readTsv]`()`, you can read any TSV by [File][File],
|
||||
* [Path][java.nio.file.Path], [URL][URL], or [InputStream][InputStream].
|
||||
* Reading by file path or URL can also be done by passing a [String].
|
||||
*
|
||||
* For example, [DataFrame.readTsv][readTsv]`("input.tsv")` or with some options:
|
||||
*
|
||||
* [DataFrame.readTsv][readTsv]`(`
|
||||
*
|
||||
* `file = `[File][File]`("input.tsv"),`
|
||||
*
|
||||
* `parserOptions = `[ParserOptions][org.jetbrains.kotlinx.dataframe.api.ParserOptions]`(locale = `[Locale][java.util.Locale]`.`[US][java.util.Locale.US]`),`
|
||||
*
|
||||
* `colTypes = `[mapOf][mapOf]`("a" `[to][to]` `[ColType][ColType]`.`[Int][ColType.Int]`, `[ColType][ColType]`.`[DEFAULT][ColType.DEFAULT]` `[to][to]` `[ColType][ColType]`.`[String][ColType.String]`),`
|
||||
*
|
||||
* `readLines = 1000L,`
|
||||
*
|
||||
* `)`
|
||||
*
|
||||
* ZIP (.zip) or GZIP (.gz) files are supported by default. [compression] is automatically detected.
|
||||
*
|
||||
* You can also read "raw" TSV data from a [String] like this:
|
||||
*
|
||||
* [DataFrame.readTsvStr][readTsvStr]`("a,b,c", delimiter = ",")`
|
||||
*
|
||||
* @param text The raw data to read in the form of a [String].
|
||||
* @param delimiter The field delimiter character. Default: '\t'.
|
||||
*
|
||||
* Ignored if [hasFixedWidthColumns] is `true`.
|
||||
* @param charset The [character set][java.nio.charset.Charset] the input is encoded in.
|
||||
* Default: `null`
|
||||
*
|
||||
* If `null`, the Charset will be read from the BOM of the provided input,
|
||||
* defaulting to [UTF-8][Charsets.UTF_8] if no BOM is found.
|
||||
* @param header Optional column titles. Default: empty list.
|
||||
*
|
||||
* If non-empty, the data will be read with [header] as the column titles
|
||||
* (use [skipLines] if there's a header in the data).
|
||||
* If empty (default), the header will be read from the data.
|
||||
* @param hasFixedWidthColumns Whether the data has fixed-width columns instead of a single delimiter.
|
||||
* Default: `false`.
|
||||
*
|
||||
* Fixed-width columns can occur, for instance, in multi-space delimited data, where the columns are separated
|
||||
* by multiple spaces instead of a single delimiter, so columns are visually aligned.
|
||||
* Column widths are determined by the header in the data (if present), or manually by setting
|
||||
* [fixedColumnWidths].
|
||||
* @param fixedColumnWidths The fixed column widths. Default: empty list.
|
||||
*
|
||||
* Requires [hasFixedWidthColumns]. If empty, the column widths will be determined by the header in the data
|
||||
* (if present), else, this manually sets the column widths.
|
||||
* The number of widths should match the number of columns.
|
||||
* @param colTypes The expected [ColType] per column name. Default: empty map, a.k.a. infer every column type.
|
||||
*
|
||||
* If supplied for a certain column name (inferred from data or given by [header]),
|
||||
* the parser will parse the column with the specified name as the specified type, else it will infer the type.
|
||||
*
|
||||
* e.g. `colTypes = `[mapOf][mapOf]`("colName" `[to][to]` `[ColType][ColType]`.`[Int][ColType.Int]`)`.
|
||||
* You can also set [ColType][ColType]`.`[DEFAULT][ColType.DEFAULT]` `[to][to]` `[ColType][ColType]`.X`
|
||||
* to set a _default_ column type, like [ColType.String].
|
||||
* @param skipLines The number of lines to skip before reading the header and data. Default: `0`.
|
||||
*
|
||||
* Useful for files with metadata, or comments at the beginning, or to give a custom [header].
|
||||
* @param readLines The maximum number of lines to read from the data. Default: `null`.
|
||||
*
|
||||
* If `null`, all lines will be read.
|
||||
* @param parserOptions Optional [parsing options][ParserOptions] for columns initially read as [String].
|
||||
* Default, `null`.
|
||||
*
|
||||
* Can configure locale, date format, double parsing, skipping types, etc.
|
||||
*
|
||||
* If [parserOptions] or any of the arguments are `null`, the global parser configuration
|
||||
* ([DataFrame.parser][DataFrame.Companion.parser]) will be queried.
|
||||
*
|
||||
* The only exceptions are:
|
||||
* - [nullStrings][ParserOptions.nullStrings], which, if `null`,
|
||||
* will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS].
|
||||
* - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses][org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses] to
|
||||
* the given types or the global setting.
|
||||
* @param ignoreEmptyLines Whether to skip intermediate empty lines. Default: `false`.
|
||||
*
|
||||
* If `false`, empty lines will be interpreted as having _empty_ values if [allowMissingColumns].
|
||||
* @param allowMissingColumns Whether to allow rows with fewer columns than the header. Default: `true`.
|
||||
*
|
||||
* If `true`, rows that are too short will be interpreted as _empty_ values.
|
||||
* @param ignoreExcessColumns Whether to ignore rows with more columns than the header. Default: `true`.
|
||||
*
|
||||
* If `true`, rows that are too long will have those columns dropped.
|
||||
* @param quote The quote character. Default: `"`.
|
||||
*
|
||||
* Used when field- or line delimiters should be interpreted as literal text.
|
||||
*
|
||||
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
|
||||
* @param ignoreSurroundingSpaces Whether to ignore leading and trailing blanks around non-quoted fields.
|
||||
* Default: `true`.
|
||||
* @param trimInsideQuoted Whether to ignore leading and trailing blanks inside quoted fields.
|
||||
* Default: `false`.
|
||||
* @param parseParallel Whether to parse the data in parallel. Default: `true`.
|
||||
*
|
||||
* If `true`, the data will be read and parsed in parallel by the Deephaven parser.
|
||||
* This is usually faster but can be turned off for debugging.
|
||||
*/
|
||||
public fun DataFrame.Companion.readTsvStr(
|
||||
text: String,
|
||||
delimiter: Char = TSV_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
): DataFrame<*> =
|
||||
readDelimImpl(
|
||||
inputStream = text.byteInputStream(),
|
||||
charset = Charsets.UTF_8,
|
||||
delimiter = delimiter,
|
||||
header = header,
|
||||
hasFixedWidthColumns = hasFixedWidthColumns,
|
||||
fixedColumnWidths = fixedColumnWidths,
|
||||
compression = Compression.None, // of course
|
||||
colTypes = colTypes,
|
||||
skipLines = skipLines,
|
||||
readLines = readLines,
|
||||
parserOptions = parserOptions,
|
||||
ignoreEmptyLines = ignoreEmptyLines,
|
||||
allowMissingColumns = allowMissingColumns,
|
||||
ignoreExcessColumns = ignoreExcessColumns,
|
||||
quote = quote,
|
||||
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
|
||||
trimInsideQuoted = trimInsideQuoted,
|
||||
parseParallel = parseParallel,
|
||||
adjustCsvSpecs = ADJUST_CSV_SPECS,
|
||||
)
|
||||
+83
@@ -0,0 +1,83 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.AnyFrame
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMMENT_CHAR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CSV_DELIMITER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ESCAPE_CHAR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER_COMMENTS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INCLUDE_HEADER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE_MODE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.RECORD_SEPARATOR
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
|
||||
|
||||
/**
|
||||
* ### Convert [DataFrame] to CSV String
|
||||
*
|
||||
* Converts [this][this] [DataFrame][DataFrame] to a CSV [String].
|
||||
*
|
||||
* Parameters you can use to customize the process include, for instance, [delimiter],
|
||||
* [includeHeader], [quoteMode], and [headerComments].
|
||||
* See the param list below for all settings.
|
||||
*
|
||||
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
|
||||
*
|
||||
* ##### Similar Functions
|
||||
* With overloads of [DataFrame.writeCsv][writeCsv]`()`, you can write CSV to [File][File], [Path][java.nio.file.Path],
|
||||
* [Appendable], or [String].
|
||||
*
|
||||
* For example, [DataFrame.writeCsv][writeCsv]`("output.csv")`
|
||||
*
|
||||
* or [DataFrame.writeCsv][writeCsv]`(`[File][File]`("output.csv"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
|
||||
*
|
||||
* Converting to a [String] can be done like this:
|
||||
*
|
||||
* [DataFrame.toCsvStr][toCsvStr]`(delimiter = ",")`
|
||||
*
|
||||
* @param delimiter The field delimiter character. Default: ','.
|
||||
*
|
||||
* Ignored if [hasFixedWidthColumns] is `true`.
|
||||
* @param includeHeader Whether to include the header in the output. Default: `true`.
|
||||
* @param quote The quote character. Default: `"`.
|
||||
*
|
||||
* Used when field- or line delimiters should be interpreted as literal text.
|
||||
*
|
||||
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
|
||||
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
|
||||
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
|
||||
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
|
||||
* Default: `null`. This will double-quote the value.
|
||||
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
|
||||
* Default: `'#'`.
|
||||
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
|
||||
* Default: empty list.
|
||||
* @param recordSeparator The character that separates records in a CSV / TSV file.
|
||||
* Default: `'\n'`, a Unix-newline.
|
||||
*/
|
||||
public fun AnyFrame.toCsvStr(
|
||||
delimiter: Char = CSV_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
): String =
|
||||
buildString {
|
||||
writeDelimImpl(
|
||||
df = this@toCsvStr,
|
||||
writer = this,
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = ADJUST_CSV_FORMAT,
|
||||
)
|
||||
}
|
||||
+83
@@ -0,0 +1,83 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.AnyFrame
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMMENT_CHAR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.DELIM_DELIMITER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ESCAPE_CHAR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER_COMMENTS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INCLUDE_HEADER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE_MODE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.RECORD_SEPARATOR
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
|
||||
|
||||
/**
|
||||
* ### Convert [DataFrame] to Delimiter-Separated Text String
|
||||
*
|
||||
* Converts [this][this] [DataFrame][DataFrame] to a delimiter-separated text [String].
|
||||
*
|
||||
* Parameters you can use to customize the process include, for instance, [delimiter],
|
||||
* [includeHeader], [quoteMode], and [headerComments].
|
||||
* See the param list below for all settings.
|
||||
*
|
||||
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
|
||||
*
|
||||
* ##### Similar Functions
|
||||
* With overloads of [DataFrame.writeDelim][writeDelim]`()`, you can write delimiter-separated text to [File][File], [Path][java.nio.file.Path],
|
||||
* [Appendable], or [String].
|
||||
*
|
||||
* For example, [DataFrame.writeDelim][writeDelim]`("output.txt")`
|
||||
*
|
||||
* or [DataFrame.writeDelim][writeDelim]`(`[File][File]`("output.txt"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
|
||||
*
|
||||
* Converting to a [String] can be done like this:
|
||||
*
|
||||
* [DataFrame.toDelimStr][toDelimStr]`(delimiter = ",")`
|
||||
*
|
||||
* @param delimiter The field delimiter character. Default: ','.
|
||||
*
|
||||
* Ignored if [hasFixedWidthColumns] is `true`.
|
||||
* @param includeHeader Whether to include the header in the output. Default: `true`.
|
||||
* @param quote The quote character. Default: `"`.
|
||||
*
|
||||
* Used when field- or line delimiters should be interpreted as literal text.
|
||||
*
|
||||
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
|
||||
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
|
||||
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
|
||||
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
|
||||
* Default: `null`. This will double-quote the value.
|
||||
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
|
||||
* Default: `'#'`.
|
||||
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
|
||||
* Default: empty list.
|
||||
* @param recordSeparator The character that separates records in a CSV / TSV file.
|
||||
* Default: `'\n'`, a Unix-newline.
|
||||
*/
|
||||
public fun AnyFrame.toDelimStr(
|
||||
delimiter: Char = DELIM_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
): String =
|
||||
buildString {
|
||||
writeDelimImpl(
|
||||
df = this@toDelimStr,
|
||||
writer = this,
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = ADJUST_CSV_FORMAT,
|
||||
)
|
||||
}
|
||||
+83
@@ -0,0 +1,83 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.AnyFrame
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMMENT_CHAR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ESCAPE_CHAR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER_COMMENTS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INCLUDE_HEADER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE_MODE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.RECORD_SEPARATOR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TSV_DELIMITER
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
|
||||
|
||||
/**
|
||||
* ### Convert [DataFrame] to TSV String
|
||||
*
|
||||
* Converts [this][this] [DataFrame][DataFrame] to a TSV [String].
|
||||
*
|
||||
* Parameters you can use to customize the process include, for instance, [delimiter],
|
||||
* [includeHeader], [quoteMode], and [headerComments].
|
||||
* See the param list below for all settings.
|
||||
*
|
||||
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
|
||||
*
|
||||
* ##### Similar Functions
|
||||
* With overloads of [DataFrame.writeTsv][writeTsv]`()`, you can write TSV to [File][File], [Path][java.nio.file.Path],
|
||||
* [Appendable], or [String].
|
||||
*
|
||||
* For example, [DataFrame.writeTsv][writeTsv]`("output.tsv")`
|
||||
*
|
||||
* or [DataFrame.writeTsv][writeTsv]`(`[File][File]`("output.tsv"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
|
||||
*
|
||||
* Converting to a [String] can be done like this:
|
||||
*
|
||||
* [DataFrame.toTsvStr][toTsvStr]`(delimiter = ",")`
|
||||
*
|
||||
* @param delimiter The field delimiter character. Default: '\t'.
|
||||
*
|
||||
* Ignored if [hasFixedWidthColumns] is `true`.
|
||||
* @param includeHeader Whether to include the header in the output. Default: `true`.
|
||||
* @param quote The quote character. Default: `"`.
|
||||
*
|
||||
* Used when field- or line delimiters should be interpreted as literal text.
|
||||
*
|
||||
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
|
||||
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
|
||||
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
|
||||
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
|
||||
* Default: `null`. This will double-quote the value.
|
||||
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
|
||||
* Default: `'#'`.
|
||||
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
|
||||
* Default: empty list.
|
||||
* @param recordSeparator The character that separates records in a CSV / TSV file.
|
||||
* Default: `'\n'`, a Unix-newline.
|
||||
*/
|
||||
public fun AnyFrame.toTsvStr(
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
delimiter: Char = TSV_DELIMITER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
): String =
|
||||
buildString {
|
||||
writeDelimImpl(
|
||||
df = this@toTsvStr,
|
||||
writer = this,
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = ADJUST_CSV_FORMAT,
|
||||
)
|
||||
}
|
||||
Vendored
+39
@@ -0,0 +1,39 @@
|
||||
@file:JvmName("TsvDeephavenKt")
|
||||
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.codeGen.AbstractDefaultReadMethod
|
||||
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams
|
||||
import java.io.File
|
||||
import java.io.InputStream
|
||||
import java.nio.file.Path
|
||||
import kotlin.reflect.typeOf
|
||||
|
||||
public class TsvDeephaven(private val delimiter: Char = DelimParams.TSV_DELIMITER) : SupportedDataFrameFormat {
|
||||
override fun readDataFrame(stream: InputStream, header: List<String>): DataFrame<*> =
|
||||
DataFrame.readTsv(inputStream = stream, header = header, delimiter = delimiter)
|
||||
|
||||
override fun readDataFrame(file: File, header: List<String>): DataFrame<*> =
|
||||
DataFrame.readTsv(file = file, header = header, delimiter = delimiter)
|
||||
|
||||
override fun readDataFrame(path: Path, header: List<String>): DataFrame<*> =
|
||||
DataFrame.readTsv(path = path, header = header, delimiter = delimiter)
|
||||
|
||||
override fun acceptsExtension(ext: String): Boolean = ext == "tsv"
|
||||
|
||||
override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough
|
||||
|
||||
override val testOrder: Int = 30_000
|
||||
|
||||
override fun createDefaultReadMethod(pathRepresentation: String?): DefaultReadDfMethod {
|
||||
val arguments = MethodArguments().add("delimiter", typeOf<Char>(), "'%L'", delimiter)
|
||||
return DefaultReadTsvMethod(pathRepresentation, arguments)
|
||||
}
|
||||
}
|
||||
|
||||
private const val READ_TSV = "readTsv"
|
||||
|
||||
internal class DefaultReadTsvMethod(path: String?, arguments: MethodArguments) :
|
||||
AbstractDefaultReadMethod(path, arguments, READ_TSV)
|
||||
dataframe/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/util.kt
Vendored
+25
@@ -0,0 +1,25 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import io.deephaven.csv.CsvSpecs
|
||||
import org.apache.commons.csv.CSVFormat
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.ExcludeFromSources
|
||||
|
||||
/**
|
||||
* Default strings that are considered null when reading CSV / TSV / delim files:
|
||||
*
|
||||
* [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS]
|
||||
*/
|
||||
public val DEFAULT_DELIM_NULL_STRINGS: Set<String> =
|
||||
setOf("", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil")
|
||||
|
||||
/**
|
||||
* Typealias for `CsvSpecs.Builder.(CsvSpecs.Builder) -> CsvSpecs.Builder`.
|
||||
* A lambda where you can overwrite or adjust any of the CSV specs.
|
||||
*/
|
||||
public typealias AdjustCsvSpecs = CsvSpecs.Builder.(CsvSpecs.Builder) -> CsvSpecs.Builder
|
||||
|
||||
/**
|
||||
* Typealias for `CSVFormat.Builder.(CSVFormat.Builder) -> CSVFormat.Builder`.
|
||||
* A lambda where you can overwrite or adjust any of the CSV format options.
|
||||
*/
|
||||
public typealias AdjustCSVFormat = CSVFormat.Builder.(CSVFormat.Builder) -> CSVFormat.Builder
|
||||
+305
@@ -0,0 +1,305 @@
|
||||
@file:JvmName("WriteCsvDeephavenKt")
|
||||
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.AnyFrame
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMMENT_CHAR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CSV_DELIMITER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ESCAPE_CHAR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FILE_WRITE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER_COMMENTS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INCLUDE_HEADER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PATH_WRITE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE_MODE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.RECORD_SEPARATOR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.WRITER_WRITE
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
|
||||
import java.io.File
|
||||
import java.io.FileWriter
|
||||
import java.nio.file.Path
|
||||
import kotlin.io.path.writer
|
||||
|
||||
/**
|
||||
* ### Write [DataFrame] to CSV File
|
||||
*
|
||||
* Writes [this][this] [DataFrame][DataFrame] to a CSV file.
|
||||
*
|
||||
* Parameters you can use to customize the process include, for instance, [delimiter],
|
||||
* [includeHeader], [quoteMode], and [headerComments].
|
||||
* See the param list below for all settings.
|
||||
*
|
||||
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
|
||||
*
|
||||
* ##### Similar Functions
|
||||
* With overloads of [DataFrame.writeCsv][writeCsv]`()`, you can write CSV to [File][File], [Path][java.nio.file.Path],
|
||||
* [Appendable], or [String].
|
||||
*
|
||||
* For example, [DataFrame.writeCsv][writeCsv]`("output.csv")`
|
||||
*
|
||||
* or [DataFrame.writeCsv][writeCsv]`(`[File][File]`("output.csv"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
|
||||
*
|
||||
* Converting to a [String] can be done like this:
|
||||
*
|
||||
* [DataFrame.toCsvStr][toCsvStr]`(delimiter = ",")`
|
||||
*
|
||||
* @param path The path pointing to a file to write to.
|
||||
* @param delimiter The field delimiter character. Default: ','.
|
||||
*
|
||||
* Ignored if [hasFixedWidthColumns] is `true`.
|
||||
* @param includeHeader Whether to include the header in the output. Default: `true`.
|
||||
* @param quote The quote character. Default: `"`.
|
||||
*
|
||||
* Used when field- or line delimiters should be interpreted as literal text.
|
||||
*
|
||||
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
|
||||
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
|
||||
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
|
||||
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
|
||||
* Default: `null`. This will double-quote the value.
|
||||
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
|
||||
* Default: `'#'`.
|
||||
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
|
||||
* Default: empty list.
|
||||
* @param recordSeparator The character that separates records in a CSV / TSV file.
|
||||
* Default: `'\n'`, a Unix-newline.
|
||||
*/
|
||||
public fun AnyFrame.writeCsv(
|
||||
path: Path,
|
||||
delimiter: Char = CSV_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
): Unit =
|
||||
writeDelimImpl(
|
||||
df = this,
|
||||
writer = path.writer(),
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = ADJUST_CSV_FORMAT,
|
||||
)
|
||||
|
||||
/**
|
||||
* ### Write [DataFrame] to CSV File
|
||||
*
|
||||
* Writes [this][this] [DataFrame][DataFrame] to a CSV file.
|
||||
*
|
||||
* Parameters you can use to customize the process include, for instance, [delimiter],
|
||||
* [includeHeader], [quoteMode], and [headerComments].
|
||||
* See the param list below for all settings.
|
||||
*
|
||||
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
|
||||
*
|
||||
* ##### Similar Functions
|
||||
* With overloads of [DataFrame.writeCsv][writeCsv]`()`, you can write CSV to [File][File], [Path][java.nio.file.Path],
|
||||
* [Appendable], or [String].
|
||||
*
|
||||
* For example, [DataFrame.writeCsv][writeCsv]`("output.csv")`
|
||||
*
|
||||
* or [DataFrame.writeCsv][writeCsv]`(`[File][File]`("output.csv"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
|
||||
*
|
||||
* Converting to a [String] can be done like this:
|
||||
*
|
||||
* [DataFrame.toCsvStr][toCsvStr]`(delimiter = ",")`
|
||||
*
|
||||
* @param file The file to write to.
|
||||
* @param delimiter The field delimiter character. Default: ','.
|
||||
*
|
||||
* Ignored if [hasFixedWidthColumns] is `true`.
|
||||
* @param includeHeader Whether to include the header in the output. Default: `true`.
|
||||
* @param quote The quote character. Default: `"`.
|
||||
*
|
||||
* Used when field- or line delimiters should be interpreted as literal text.
|
||||
*
|
||||
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
|
||||
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
|
||||
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
|
||||
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
|
||||
* Default: `null`. This will double-quote the value.
|
||||
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
|
||||
* Default: `'#'`.
|
||||
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
|
||||
* Default: empty list.
|
||||
* @param recordSeparator The character that separates records in a CSV / TSV file.
|
||||
* Default: `'\n'`, a Unix-newline.
|
||||
*/
|
||||
public fun AnyFrame.writeCsv(
|
||||
file: File,
|
||||
delimiter: Char = CSV_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
): Unit =
|
||||
writeDelimImpl(
|
||||
df = this,
|
||||
writer = FileWriter(file),
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = ADJUST_CSV_FORMAT,
|
||||
)
|
||||
|
||||
/**
|
||||
* ### Write [DataFrame] to CSV File
|
||||
*
|
||||
* Writes [this][this] [DataFrame][DataFrame] to a CSV file.
|
||||
*
|
||||
* Parameters you can use to customize the process include, for instance, [delimiter],
|
||||
* [includeHeader], [quoteMode], and [headerComments].
|
||||
* See the param list below for all settings.
|
||||
*
|
||||
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
|
||||
*
|
||||
* ##### Similar Functions
|
||||
* With overloads of [DataFrame.writeCsv][writeCsv]`()`, you can write CSV to [File][File], [Path][java.nio.file.Path],
|
||||
* [Appendable], or [String].
|
||||
*
|
||||
* For example, [DataFrame.writeCsv][writeCsv]`("output.csv")`
|
||||
*
|
||||
* or [DataFrame.writeCsv][writeCsv]`(`[File][File]`("output.csv"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
|
||||
*
|
||||
* Converting to a [String] can be done like this:
|
||||
*
|
||||
* [DataFrame.toCsvStr][toCsvStr]`(delimiter = ",")`
|
||||
*
|
||||
* @param path The path pointing to a file to write to.
|
||||
* @param delimiter The field delimiter character. Default: ','.
|
||||
*
|
||||
* Ignored if [hasFixedWidthColumns] is `true`.
|
||||
* @param includeHeader Whether to include the header in the output. Default: `true`.
|
||||
* @param quote The quote character. Default: `"`.
|
||||
*
|
||||
* Used when field- or line delimiters should be interpreted as literal text.
|
||||
*
|
||||
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
|
||||
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
|
||||
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
|
||||
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
|
||||
* Default: `null`. This will double-quote the value.
|
||||
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
|
||||
* Default: `'#'`.
|
||||
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
|
||||
* Default: empty list.
|
||||
* @param recordSeparator The character that separates records in a CSV / TSV file.
|
||||
* Default: `'\n'`, a Unix-newline.
|
||||
*/
|
||||
public fun AnyFrame.writeCsv(
|
||||
path: String,
|
||||
delimiter: Char = CSV_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
): Unit =
|
||||
writeDelimImpl(
|
||||
df = this,
|
||||
writer = FileWriter(path),
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = ADJUST_CSV_FORMAT,
|
||||
)
|
||||
|
||||
/**
|
||||
*
|
||||
* ### Write [DataFrame] to CSV Appendable
|
||||
*
|
||||
* Writes [this][this] [DataFrame][DataFrame] to a CSV [Appendable].
|
||||
*
|
||||
* Parameters you can use to customize the process include, for instance, [delimiter],
|
||||
* [includeHeader], [quoteMode], and [headerComments].
|
||||
* See the param list below for all settings.
|
||||
*
|
||||
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
|
||||
*
|
||||
* ##### Similar Functions
|
||||
* With overloads of [DataFrame.writeCsv][writeCsv]`()`, you can write CSV to [File][File], [Path][java.nio.file.Path],
|
||||
* [Appendable], or [String].
|
||||
*
|
||||
* For example, [DataFrame.writeCsv][writeCsv]`("output.csv")`
|
||||
*
|
||||
* or [DataFrame.writeCsv][writeCsv]`(`[File][File]`("output.csv"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
|
||||
*
|
||||
* Converting to a [String] can be done like this:
|
||||
*
|
||||
* [DataFrame.toCsvStr][toCsvStr]`(delimiter = ",")`
|
||||
*
|
||||
* @param writer The [Appendable] to write to.
|
||||
* @param delimiter The field delimiter character. Default: ','.
|
||||
*
|
||||
* Ignored if [hasFixedWidthColumns] is `true`.
|
||||
* @param includeHeader Whether to include the header in the output. Default: `true`.
|
||||
* @param quote The quote character. Default: `"`.
|
||||
*
|
||||
* Used when field- or line delimiters should be interpreted as literal text.
|
||||
*
|
||||
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
|
||||
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
|
||||
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
|
||||
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
|
||||
* Default: `null`. This will double-quote the value.
|
||||
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
|
||||
* Default: `'#'`.
|
||||
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
|
||||
* Default: empty list.
|
||||
* @param recordSeparator The character that separates records in a CSV / TSV file.
|
||||
* Default: `'\n'`, a Unix-newline.
|
||||
* @param adjustCsvFormat Optional extra [CSVFormat] configuration. Default: `{ it }`.
|
||||
*
|
||||
* Before instantiating the [CSVFormat], the [CSVFormat.Builder] will be passed to this lambda.
|
||||
* This will allow you to configure/overwrite any CSV / TSV writing options.
|
||||
*/
|
||||
public fun AnyFrame.writeCsv(
|
||||
writer: Appendable,
|
||||
delimiter: Char = CSV_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
adjustCsvFormat: AdjustCSVFormat = ADJUST_CSV_FORMAT,
|
||||
): Unit =
|
||||
writeDelimImpl(
|
||||
df = this,
|
||||
writer = writer,
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = adjustCsvFormat,
|
||||
)
|
||||
+305
@@ -0,0 +1,305 @@
|
||||
@file:JvmName("WriteDelimDeephavenKt")
|
||||
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.AnyFrame
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMMENT_CHAR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.DELIM_DELIMITER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ESCAPE_CHAR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FILE_WRITE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER_COMMENTS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INCLUDE_HEADER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PATH_WRITE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE_MODE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.RECORD_SEPARATOR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.WRITER_WRITE
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
|
||||
import java.io.File
|
||||
import java.io.FileWriter
|
||||
import java.nio.file.Path
|
||||
import kotlin.io.path.writer
|
||||
|
||||
/**
|
||||
* ### Write [DataFrame] to Delimiter-Separated Text File
|
||||
*
|
||||
* Writes [this][this] [DataFrame][DataFrame] to a delimiter-separated text file.
|
||||
*
|
||||
* Parameters you can use to customize the process include, for instance, [delimiter],
|
||||
* [includeHeader], [quoteMode], and [headerComments].
|
||||
* See the param list below for all settings.
|
||||
*
|
||||
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
|
||||
*
|
||||
* ##### Similar Functions
|
||||
* With overloads of [DataFrame.writeDelim][writeDelim]`()`, you can write delimiter-separated text to [File][File], [Path][java.nio.file.Path],
|
||||
* [Appendable], or [String].
|
||||
*
|
||||
* For example, [DataFrame.writeDelim][writeDelim]`("output.txt")`
|
||||
*
|
||||
* or [DataFrame.writeDelim][writeDelim]`(`[File][File]`("output.txt"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
|
||||
*
|
||||
* Converting to a [String] can be done like this:
|
||||
*
|
||||
* [DataFrame.toDelimStr][toDelimStr]`(delimiter = ",")`
|
||||
*
|
||||
* @param path The path pointing to a file to write to.
|
||||
* @param delimiter The field delimiter character. Default: ','.
|
||||
*
|
||||
* Ignored if [hasFixedWidthColumns] is `true`.
|
||||
* @param includeHeader Whether to include the header in the output. Default: `true`.
|
||||
* @param quote The quote character. Default: `"`.
|
||||
*
|
||||
* Used when field- or line delimiters should be interpreted as literal text.
|
||||
*
|
||||
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
|
||||
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
|
||||
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
|
||||
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
|
||||
* Default: `null`. This will double-quote the value.
|
||||
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
|
||||
* Default: `'#'`.
|
||||
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
|
||||
* Default: empty list.
|
||||
* @param recordSeparator The character that separates records in a CSV / TSV file.
|
||||
* Default: `'\n'`, a Unix-newline.
|
||||
*/
|
||||
public fun AnyFrame.writeDelim(
|
||||
path: Path,
|
||||
delimiter: Char = DELIM_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
): Unit =
|
||||
writeDelimImpl(
|
||||
df = this,
|
||||
writer = path.writer(),
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = ADJUST_CSV_FORMAT,
|
||||
)
|
||||
|
||||
/**
|
||||
* ### Write [DataFrame] to Delimiter-Separated Text File
|
||||
*
|
||||
* Writes [this][this] [DataFrame][DataFrame] to a delimiter-separated text file.
|
||||
*
|
||||
* Parameters you can use to customize the process include, for instance, [delimiter],
|
||||
* [includeHeader], [quoteMode], and [headerComments].
|
||||
* See the param list below for all settings.
|
||||
*
|
||||
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
|
||||
*
|
||||
* ##### Similar Functions
|
||||
* With overloads of [DataFrame.writeDelim][writeDelim]`()`, you can write delimiter-separated text to [File][File], [Path][java.nio.file.Path],
|
||||
* [Appendable], or [String].
|
||||
*
|
||||
* For example, [DataFrame.writeDelim][writeDelim]`("output.txt")`
|
||||
*
|
||||
* or [DataFrame.writeDelim][writeDelim]`(`[File][File]`("output.txt"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
|
||||
*
|
||||
* Converting to a [String] can be done like this:
|
||||
*
|
||||
* [DataFrame.toDelimStr][toDelimStr]`(delimiter = ",")`
|
||||
*
|
||||
* @param file The file to write to.
|
||||
* @param delimiter The field delimiter character. Default: ','.
|
||||
*
|
||||
* Ignored if [hasFixedWidthColumns] is `true`.
|
||||
* @param includeHeader Whether to include the header in the output. Default: `true`.
|
||||
* @param quote The quote character. Default: `"`.
|
||||
*
|
||||
* Used when field- or line delimiters should be interpreted as literal text.
|
||||
*
|
||||
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
|
||||
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
|
||||
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
|
||||
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
|
||||
* Default: `null`. This will double-quote the value.
|
||||
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
|
||||
* Default: `'#'`.
|
||||
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
|
||||
* Default: empty list.
|
||||
* @param recordSeparator The character that separates records in a CSV / TSV file.
|
||||
* Default: `'\n'`, a Unix-newline.
|
||||
*/
|
||||
public fun AnyFrame.writeDelim(
|
||||
file: File,
|
||||
delimiter: Char = DELIM_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
): Unit =
|
||||
writeDelimImpl(
|
||||
df = this,
|
||||
writer = FileWriter(file),
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = ADJUST_CSV_FORMAT,
|
||||
)
|
||||
|
||||
/**
|
||||
* ### Write [DataFrame] to Delimiter-Separated Text File
|
||||
*
|
||||
* Writes [this][this] [DataFrame][DataFrame] to a delimiter-separated text file.
|
||||
*
|
||||
* Parameters you can use to customize the process include, for instance, [delimiter],
|
||||
* [includeHeader], [quoteMode], and [headerComments].
|
||||
* See the param list below for all settings.
|
||||
*
|
||||
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
|
||||
*
|
||||
* ##### Similar Functions
|
||||
* With overloads of [DataFrame.writeDelim][writeDelim]`()`, you can write delimiter-separated text to [File][File], [Path][java.nio.file.Path],
|
||||
* [Appendable], or [String].
|
||||
*
|
||||
* For example, [DataFrame.writeDelim][writeDelim]`("output.txt")`
|
||||
*
|
||||
* or [DataFrame.writeDelim][writeDelim]`(`[File][File]`("output.txt"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
|
||||
*
|
||||
* Converting to a [String] can be done like this:
|
||||
*
|
||||
* [DataFrame.toDelimStr][toDelimStr]`(delimiter = ",")`
|
||||
*
|
||||
* @param path The path pointing to a file to write to.
|
||||
* @param delimiter The field delimiter character. Default: ','.
|
||||
*
|
||||
* Ignored if [hasFixedWidthColumns] is `true`.
|
||||
* @param includeHeader Whether to include the header in the output. Default: `true`.
|
||||
* @param quote The quote character. Default: `"`.
|
||||
*
|
||||
* Used when field- or line delimiters should be interpreted as literal text.
|
||||
*
|
||||
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
|
||||
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
|
||||
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
|
||||
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
|
||||
* Default: `null`. This will double-quote the value.
|
||||
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
|
||||
* Default: `'#'`.
|
||||
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
|
||||
* Default: empty list.
|
||||
* @param recordSeparator The character that separates records in a CSV / TSV file.
|
||||
* Default: `'\n'`, a Unix-newline.
|
||||
*/
|
||||
public fun AnyFrame.writeDelim(
|
||||
path: String,
|
||||
delimiter: Char = DELIM_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
): Unit =
|
||||
writeDelimImpl(
|
||||
df = this,
|
||||
writer = FileWriter(path),
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = ADJUST_CSV_FORMAT,
|
||||
)
|
||||
|
||||
/**
|
||||
*
|
||||
* ### Write [DataFrame] to Delimiter-Separated Text Appendable
|
||||
*
|
||||
* Writes [this][this] [DataFrame][DataFrame] to a delimiter-separated text [Appendable].
|
||||
*
|
||||
* Parameters you can use to customize the process include, for instance, [delimiter],
|
||||
* [includeHeader], [quoteMode], and [headerComments].
|
||||
* See the param list below for all settings.
|
||||
*
|
||||
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
|
||||
*
|
||||
* ##### Similar Functions
|
||||
* With overloads of [DataFrame.writeDelim][writeDelim]`()`, you can write delimiter-separated text to [File][File], [Path][java.nio.file.Path],
|
||||
* [Appendable], or [String].
|
||||
*
|
||||
* For example, [DataFrame.writeDelim][writeDelim]`("output.txt")`
|
||||
*
|
||||
* or [DataFrame.writeDelim][writeDelim]`(`[File][File]`("output.txt"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
|
||||
*
|
||||
* Converting to a [String] can be done like this:
|
||||
*
|
||||
* [DataFrame.toDelimStr][toDelimStr]`(delimiter = ",")`
|
||||
*
|
||||
* @param writer The [Appendable] to write to.
|
||||
* @param delimiter The field delimiter character. Default: ','.
|
||||
*
|
||||
* Ignored if [hasFixedWidthColumns] is `true`.
|
||||
* @param includeHeader Whether to include the header in the output. Default: `true`.
|
||||
* @param quote The quote character. Default: `"`.
|
||||
*
|
||||
* Used when field- or line delimiters should be interpreted as literal text.
|
||||
*
|
||||
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
|
||||
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
|
||||
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
|
||||
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
|
||||
* Default: `null`. This will double-quote the value.
|
||||
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
|
||||
* Default: `'#'`.
|
||||
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
|
||||
* Default: empty list.
|
||||
* @param recordSeparator The character that separates records in a CSV / TSV file.
|
||||
* Default: `'\n'`, a Unix-newline.
|
||||
* @param adjustCsvFormat Optional extra [CSVFormat] configuration. Default: `{ it }`.
|
||||
*
|
||||
* Before instantiating the [CSVFormat], the [CSVFormat.Builder] will be passed to this lambda.
|
||||
* This will allow you to configure/overwrite any CSV / TSV writing options.
|
||||
*/
|
||||
public fun AnyFrame.writeDelim(
|
||||
writer: Appendable,
|
||||
delimiter: Char = DELIM_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
adjustCsvFormat: AdjustCSVFormat = ADJUST_CSV_FORMAT,
|
||||
): Unit =
|
||||
writeDelimImpl(
|
||||
df = this,
|
||||
writer = writer,
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = adjustCsvFormat,
|
||||
)
|
||||
+305
@@ -0,0 +1,305 @@
|
||||
@file:JvmName("WriteTsvDeephavenKt")
|
||||
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.AnyFrame
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMMENT_CHAR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ESCAPE_CHAR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FILE_WRITE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER_COMMENTS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INCLUDE_HEADER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PATH_WRITE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE_MODE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.RECORD_SEPARATOR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TSV_DELIMITER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.WRITER_WRITE
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
|
||||
import java.io.File
|
||||
import java.io.FileWriter
|
||||
import java.nio.file.Path
|
||||
import kotlin.io.path.writer
|
||||
|
||||
/**
|
||||
* ### Write [DataFrame] to TSV File
|
||||
*
|
||||
* Writes [this][this] [DataFrame][DataFrame] to a TSV file.
|
||||
*
|
||||
* Parameters you can use to customize the process include, for instance, [delimiter],
|
||||
* [includeHeader], [quoteMode], and [headerComments].
|
||||
* See the param list below for all settings.
|
||||
*
|
||||
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
|
||||
*
|
||||
* ##### Similar Functions
|
||||
* With overloads of [DataFrame.writeTsv][writeTsv]`()`, you can write TSV to [File][File], [Path][java.nio.file.Path],
|
||||
* [Appendable], or [String].
|
||||
*
|
||||
* For example, [DataFrame.writeTsv][writeTsv]`("output.tsv")`
|
||||
*
|
||||
* or [DataFrame.writeTsv][writeTsv]`(`[File][File]`("output.tsv"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
|
||||
*
|
||||
* Converting to a [String] can be done like this:
|
||||
*
|
||||
* [DataFrame.toTsvStr][toTsvStr]`(delimiter = ",")`
|
||||
*
|
||||
* @param path The path pointing to a file to write to.
|
||||
* @param delimiter The field delimiter character. Default: '\t'.
|
||||
*
|
||||
* Ignored if [hasFixedWidthColumns] is `true`.
|
||||
* @param includeHeader Whether to include the header in the output. Default: `true`.
|
||||
* @param quote The quote character. Default: `"`.
|
||||
*
|
||||
* Used when field- or line delimiters should be interpreted as literal text.
|
||||
*
|
||||
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
|
||||
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
|
||||
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
|
||||
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
|
||||
* Default: `null`. This will double-quote the value.
|
||||
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
|
||||
* Default: `'#'`.
|
||||
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
|
||||
* Default: empty list.
|
||||
* @param recordSeparator The character that separates records in a CSV / TSV file.
|
||||
* Default: `'\n'`, a Unix-newline.
|
||||
*/
|
||||
public fun AnyFrame.writeTsv(
|
||||
path: Path,
|
||||
delimiter: Char = TSV_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
): Unit =
|
||||
writeDelimImpl(
|
||||
df = this,
|
||||
writer = path.writer(),
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = ADJUST_CSV_FORMAT,
|
||||
)
|
||||
|
||||
/**
|
||||
* ### Write [DataFrame] to TSV File
|
||||
*
|
||||
* Writes [this][this] [DataFrame][DataFrame] to a TSV file.
|
||||
*
|
||||
* Parameters you can use to customize the process include, for instance, [delimiter],
|
||||
* [includeHeader], [quoteMode], and [headerComments].
|
||||
* See the param list below for all settings.
|
||||
*
|
||||
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
|
||||
*
|
||||
* ##### Similar Functions
|
||||
* With overloads of [DataFrame.writeTsv][writeTsv]`()`, you can write TSV to [File][File], [Path][java.nio.file.Path],
|
||||
* [Appendable], or [String].
|
||||
*
|
||||
* For example, [DataFrame.writeTsv][writeTsv]`("output.tsv")`
|
||||
*
|
||||
* or [DataFrame.writeTsv][writeTsv]`(`[File][File]`("output.tsv"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
|
||||
*
|
||||
* Converting to a [String] can be done like this:
|
||||
*
|
||||
* [DataFrame.toTsvStr][toTsvStr]`(delimiter = ",")`
|
||||
*
|
||||
* @param file The file to write to.
|
||||
* @param delimiter The field delimiter character. Default: '\t'.
|
||||
*
|
||||
* Ignored if [hasFixedWidthColumns] is `true`.
|
||||
* @param includeHeader Whether to include the header in the output. Default: `true`.
|
||||
* @param quote The quote character. Default: `"`.
|
||||
*
|
||||
* Used when field- or line delimiters should be interpreted as literal text.
|
||||
*
|
||||
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
|
||||
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
|
||||
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
|
||||
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
|
||||
* Default: `null`. This will double-quote the value.
|
||||
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
|
||||
* Default: `'#'`.
|
||||
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
|
||||
* Default: empty list.
|
||||
* @param recordSeparator The character that separates records in a CSV / TSV file.
|
||||
* Default: `'\n'`, a Unix-newline.
|
||||
*/
|
||||
public fun AnyFrame.writeTsv(
|
||||
file: File,
|
||||
delimiter: Char = TSV_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
): Unit =
|
||||
writeDelimImpl(
|
||||
df = this,
|
||||
writer = FileWriter(file),
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = ADJUST_CSV_FORMAT,
|
||||
)
|
||||
|
||||
/**
|
||||
* ### Write [DataFrame] to TSV File
|
||||
*
|
||||
* Writes [this][this] [DataFrame][DataFrame] to a TSV file.
|
||||
*
|
||||
* Parameters you can use to customize the process include, for instance, [delimiter],
|
||||
* [includeHeader], [quoteMode], and [headerComments].
|
||||
* See the param list below for all settings.
|
||||
*
|
||||
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
|
||||
*
|
||||
* ##### Similar Functions
|
||||
* With overloads of [DataFrame.writeTsv][writeTsv]`()`, you can write TSV to [File][File], [Path][java.nio.file.Path],
|
||||
* [Appendable], or [String].
|
||||
*
|
||||
* For example, [DataFrame.writeTsv][writeTsv]`("output.tsv")`
|
||||
*
|
||||
* or [DataFrame.writeTsv][writeTsv]`(`[File][File]`("output.tsv"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
|
||||
*
|
||||
* Converting to a [String] can be done like this:
|
||||
*
|
||||
* [DataFrame.toTsvStr][toTsvStr]`(delimiter = ",")`
|
||||
*
|
||||
* @param path The path pointing to a file to write to.
|
||||
* @param delimiter The field delimiter character. Default: '\t'.
|
||||
*
|
||||
* Ignored if [hasFixedWidthColumns] is `true`.
|
||||
* @param includeHeader Whether to include the header in the output. Default: `true`.
|
||||
* @param quote The quote character. Default: `"`.
|
||||
*
|
||||
* Used when field- or line delimiters should be interpreted as literal text.
|
||||
*
|
||||
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
|
||||
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
|
||||
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
|
||||
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
|
||||
* Default: `null`. This will double-quote the value.
|
||||
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
|
||||
* Default: `'#'`.
|
||||
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
|
||||
* Default: empty list.
|
||||
* @param recordSeparator The character that separates records in a CSV / TSV file.
|
||||
* Default: `'\n'`, a Unix-newline.
|
||||
*/
|
||||
public fun AnyFrame.writeTsv(
|
||||
path: String,
|
||||
delimiter: Char = TSV_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
): Unit =
|
||||
writeDelimImpl(
|
||||
df = this,
|
||||
writer = FileWriter(path),
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = ADJUST_CSV_FORMAT,
|
||||
)
|
||||
|
||||
/**
|
||||
*
|
||||
* ### Write [DataFrame] to TSV Appendable
|
||||
*
|
||||
* Writes [this][this] [DataFrame][DataFrame] to a TSV [Appendable].
|
||||
*
|
||||
* Parameters you can use to customize the process include, for instance, [delimiter],
|
||||
* [includeHeader], [quoteMode], and [headerComments].
|
||||
* See the param list below for all settings.
|
||||
*
|
||||
* The integration is built upon [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/).
|
||||
*
|
||||
* ##### Similar Functions
|
||||
* With overloads of [DataFrame.writeTsv][writeTsv]`()`, you can write TSV to [File][File], [Path][java.nio.file.Path],
|
||||
* [Appendable], or [String].
|
||||
*
|
||||
* For example, [DataFrame.writeTsv][writeTsv]`("output.tsv")`
|
||||
*
|
||||
* or [DataFrame.writeTsv][writeTsv]`(`[File][File]`("output.tsv"), quoteMode = `[QuoteMode.ALL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL]`)`
|
||||
*
|
||||
* Converting to a [String] can be done like this:
|
||||
*
|
||||
* [DataFrame.toTsvStr][toTsvStr]`(delimiter = ",")`
|
||||
*
|
||||
* @param writer The [Appendable] to write to.
|
||||
* @param delimiter The field delimiter character. Default: '\t'.
|
||||
*
|
||||
* Ignored if [hasFixedWidthColumns] is `true`.
|
||||
* @param includeHeader Whether to include the header in the output. Default: `true`.
|
||||
* @param quote The quote character. Default: `"`.
|
||||
*
|
||||
* Used when field- or line delimiters should be interpreted as literal text.
|
||||
*
|
||||
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
|
||||
* @param quoteMode The [QuoteMode][org.jetbrains.kotlinx.dataframe.io.QuoteMode] to use when writing CSV / TSV files.
|
||||
* Default: [QuoteMode.MINIMAL][org.jetbrains.kotlinx.dataframe.io.QuoteMode.MINIMAL].
|
||||
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE][org.jetbrains.kotlinx.dataframe.io.QuoteMode.NONE].
|
||||
* Default: `null`. This will double-quote the value.
|
||||
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
|
||||
* Default: `'#'`.
|
||||
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
|
||||
* Default: empty list.
|
||||
* @param recordSeparator The character that separates records in a CSV / TSV file.
|
||||
* Default: `'\n'`, a Unix-newline.
|
||||
* @param adjustCsvFormat Optional extra [CSVFormat] configuration. Default: `{ it }`.
|
||||
*
|
||||
* Before instantiating the [CSVFormat], the [CSVFormat.Builder] will be passed to this lambda.
|
||||
* This will allow you to configure/overwrite any CSV / TSV writing options.
|
||||
*/
|
||||
public fun AnyFrame.writeTsv(
|
||||
writer: Appendable,
|
||||
delimiter: Char = TSV_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
adjustCsvFormat: AdjustCSVFormat = ADJUST_CSV_FORMAT,
|
||||
): Unit =
|
||||
writeDelimImpl(
|
||||
df = this,
|
||||
writer = writer,
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = adjustCsvFormat,
|
||||
)
|
||||
+30
@@ -0,0 +1,30 @@
|
||||
@file:JvmName("CsvDeprecationMessagesKt")
|
||||
|
||||
package org.jetbrains.kotlinx.dataframe.util
|
||||
|
||||
/*
|
||||
* This file contains deprecation messages for the whole core module.
|
||||
* After each release, all messages should be reviewed and updated.
|
||||
* Level.WARNING -> Level.ERROR
|
||||
* Level.ERROR -> Remove
|
||||
*/
|
||||
|
||||
// region WARNING in 0.15, ERROR in 1.0
|
||||
|
||||
private const val MESSAGE_1_0 = "Will be ERROR in 1.0."
|
||||
|
||||
internal const val READ_CSV_BINARY_COMPATIBILITY = "This overload is here to maintain binary compatibility."
|
||||
internal const val READ_TSV_BINARY_COMPATIBILITY = "This overload is here to maintain binary compatibility."
|
||||
internal const val READ_DELIM_BINARY_COMPATIBILITY = "This overload is here to maintain binary compatibility."
|
||||
|
||||
// endregion
|
||||
|
||||
// region WARNING in 1.0, ERROR in 1.1
|
||||
|
||||
private const val MESSAGE_1_1 = "Will be ERROR in 1.1."
|
||||
|
||||
// endregion
|
||||
|
||||
// region keep across releases
|
||||
|
||||
// endregion
|
||||
+54
@@ -0,0 +1,54 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.openjdk.jmh.annotations.Benchmark
|
||||
import org.openjdk.jmh.annotations.BenchmarkMode
|
||||
import org.openjdk.jmh.annotations.Measurement
|
||||
import org.openjdk.jmh.annotations.Mode
|
||||
import org.openjdk.jmh.annotations.Param
|
||||
import org.openjdk.jmh.annotations.Scope
|
||||
import org.openjdk.jmh.annotations.Setup
|
||||
import org.openjdk.jmh.annotations.State
|
||||
import org.openjdk.jmh.annotations.TearDown
|
||||
import org.openjdk.jmh.annotations.Warmup
|
||||
import java.io.File
|
||||
import java.util.concurrent.TimeUnit
|
||||
|
||||
@BenchmarkMode(Mode.SingleShotTime)
|
||||
@Warmup(iterations = 10, time = 5, timeUnit = TimeUnit.SECONDS)
|
||||
@Measurement(iterations = 10, timeUnit = TimeUnit.SECONDS)
|
||||
@State(Scope.Benchmark)
|
||||
open class BenchmarkTest {
|
||||
|
||||
@Param("small", "medium", "large")
|
||||
var type = ""
|
||||
var file: File? = null
|
||||
|
||||
@Setup
|
||||
fun setup() {
|
||||
System.setProperty("org.slf4j.simpleLogger.defaultLogLevel", "info")
|
||||
file = File(
|
||||
"src/test/resources/" + when (type) {
|
||||
"small" -> "testCSV.csv"
|
||||
"medium" -> "gross-domestic-product-june-2024-quarter.csv"
|
||||
"large" -> "largeCsv.csv.gz"
|
||||
else -> throw IllegalArgumentException("Invalid type")
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
@TearDown
|
||||
fun tearDown() {
|
||||
file = null
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
fun apache() {
|
||||
DataFrame.readCSV(file!!)
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
fun deephaven() {
|
||||
DataFrame.readCsv(file!!)
|
||||
}
|
||||
}
|
||||
+887
@@ -0,0 +1,887 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import io.deephaven.csv.parsers.Parsers
|
||||
import io.kotest.assertions.throwables.shouldNotThrowAny
|
||||
import io.kotest.assertions.throwables.shouldThrow
|
||||
import io.kotest.matchers.collections.shouldContainInOrder
|
||||
import io.kotest.matchers.nulls.shouldNotBeNull
|
||||
import io.kotest.matchers.shouldBe
|
||||
import io.kotest.matchers.shouldNotBe
|
||||
import kotlinx.datetime.LocalDate
|
||||
import kotlinx.datetime.LocalDateTime
|
||||
import org.intellij.lang.annotations.Language
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
|
||||
import org.jetbrains.kotlinx.dataframe.api.allNulls
|
||||
import org.jetbrains.kotlinx.dataframe.api.convert
|
||||
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
|
||||
import org.jetbrains.kotlinx.dataframe.api.group
|
||||
import org.jetbrains.kotlinx.dataframe.api.groupBy
|
||||
import org.jetbrains.kotlinx.dataframe.api.into
|
||||
import org.jetbrains.kotlinx.dataframe.api.isEmpty
|
||||
import org.jetbrains.kotlinx.dataframe.api.parser
|
||||
import org.jetbrains.kotlinx.dataframe.api.print
|
||||
import org.jetbrains.kotlinx.dataframe.api.schema
|
||||
import org.jetbrains.kotlinx.dataframe.api.toStr
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser
|
||||
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
|
||||
import org.junit.After
|
||||
import org.junit.Before
|
||||
import org.junit.Test
|
||||
import java.io.File
|
||||
import java.io.StringWriter
|
||||
import java.math.BigDecimal
|
||||
import java.net.URL
|
||||
import java.util.Locale
|
||||
import java.util.zip.GZIPInputStream
|
||||
import kotlin.reflect.KClass
|
||||
import kotlin.reflect.typeOf
|
||||
import kotlin.time.Instant as StdlibInstant
|
||||
import kotlinx.datetime.Instant as DeprecatedInstant
|
||||
|
||||
// can be enabled for showing logs for these tests
|
||||
private const val SHOW_LOGS = false
|
||||
|
||||
@Suppress("ktlint:standard:argument-list-wrapping")
|
||||
class DelimCsvTsvTests {
|
||||
|
||||
private val logLevel = "org.slf4j.simpleLogger.log.${FastDoubleParser::class.qualifiedName}"
|
||||
private var loggerBefore: String? = null
|
||||
|
||||
@Before
|
||||
fun setLogger() {
|
||||
if (!SHOW_LOGS) return
|
||||
loggerBefore = System.getProperty(logLevel)
|
||||
System.setProperty(logLevel, "trace")
|
||||
}
|
||||
|
||||
@After
|
||||
fun restoreLogger() {
|
||||
if (!SHOW_LOGS) return
|
||||
if (loggerBefore != null) {
|
||||
System.setProperty(logLevel, loggerBefore)
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun readNulls() {
|
||||
@Language("CSV")
|
||||
val src =
|
||||
"""
|
||||
first,second
|
||||
2,,
|
||||
3,,
|
||||
""".trimIndent()
|
||||
val df = DataFrame.readCsvStr(src)
|
||||
df.rowsCount() shouldBe 2
|
||||
df.columnsCount() shouldBe 2
|
||||
df["first"].type() shouldBe typeOf<Int>()
|
||||
df["second"].allNulls() shouldBe true
|
||||
df["second"].type() shouldBe typeOf<String?>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun write() {
|
||||
val df = dataFrameOf("col1", "col2")(
|
||||
1, null,
|
||||
2, null,
|
||||
).convert("col2").toStr()
|
||||
|
||||
val str = StringWriter()
|
||||
df.writeCsv(str)
|
||||
|
||||
val res = DataFrame.readCsvStr(str.buffer.toString())
|
||||
|
||||
res shouldBe df
|
||||
}
|
||||
|
||||
@Test
|
||||
fun readCsv() {
|
||||
val df = DataFrame.read(simpleCsv)
|
||||
|
||||
df.columnsCount() shouldBe 11
|
||||
df.rowsCount() shouldBe 5
|
||||
df.columnNames()[5] shouldBe "duplicate1"
|
||||
df.columnNames()[6] shouldBe "duplicate11"
|
||||
df["duplicate1"].type() shouldBe typeOf<Char?>()
|
||||
df["double"].type() shouldBe typeOf<Double?>()
|
||||
df["number"].type() shouldBe typeOf<Double>()
|
||||
df["time"].type() shouldBe typeOf<LocalDateTime>()
|
||||
|
||||
df.print(columnTypes = true, borders = true, title = true)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `readCsv different charset`() {
|
||||
val df = DataFrame.readCsv(simpleCsv)
|
||||
|
||||
DataFrame.readCsv(simpleCsvUtf16le) shouldBe df
|
||||
DataFrame.readCsv(simpleCsvUtf16le, charset = Charsets.UTF_16LE) shouldBe df
|
||||
DataFrame.readCsv(simpleCsvUtf16le, charset = Charsets.UTF_16BE) shouldNotBe df
|
||||
DataFrame.readCsv(simpleCsvUtf16le, charset = Charsets.UTF_8) shouldNotBe df
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `readCsv gz compressed different charset`() {
|
||||
val df = DataFrame.readCsv(simpleCsv)
|
||||
|
||||
DataFrame.readCsv(simpleCsvUtf16leGz) shouldBe df
|
||||
DataFrame.readCsv(simpleCsvUtf16leGz, charset = Charsets.UTF_16LE) shouldBe df
|
||||
DataFrame.readCsv(simpleCsvUtf16leGz, charset = Charsets.UTF_16BE) shouldNotBe df
|
||||
DataFrame.readCsv(simpleCsvUtf16leGz, charset = Charsets.UTF_8) shouldNotBe df
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `readCsv zip compressed different charset`() {
|
||||
val df = DataFrame.readCsv(simpleCsv)
|
||||
|
||||
DataFrame.readCsv(simpleCsvUtf16leZip) shouldBe df
|
||||
DataFrame.readCsv(simpleCsvUtf16leZip, charset = Charsets.UTF_16LE) shouldBe df
|
||||
DataFrame.readCsv(simpleCsvUtf16leZip, charset = Charsets.UTF_16BE) shouldNotBe df
|
||||
DataFrame.readCsv(simpleCsvUtf16leZip, charset = Charsets.UTF_8) shouldNotBe df
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read ZIP Csv`() {
|
||||
DataFrame.readCsv(simpleCsvZip) shouldBe DataFrame.readCsv(simpleCsv)
|
||||
|
||||
shouldThrow<IllegalStateException> {
|
||||
DataFrame.readCsv(notCsv)
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read GZ Csv`() {
|
||||
DataFrame.readCsv(simpleCsvGz) shouldBe DataFrame.readCsv(simpleCsv)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read custom compression Csv`() {
|
||||
DataFrame.readCsv(
|
||||
simpleCsvGz,
|
||||
compression = Compression(::GZIPInputStream),
|
||||
) shouldBe DataFrame.readCsv(simpleCsv)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read 2 compressed Csv`() {
|
||||
shouldThrow<IllegalArgumentException> { DataFrame.readCsv(twoCsvsZip) }
|
||||
}
|
||||
|
||||
@Test
|
||||
fun readCsvWithFrenchLocaleAndAlternativeDelimiter() {
|
||||
val df = DataFrame.readCsv(
|
||||
url = csvWithFrenchLocale,
|
||||
delimiter = ';',
|
||||
parserOptions = ParserOptions(locale = Locale.FRENCH),
|
||||
)
|
||||
|
||||
df.columnsCount() shouldBe 11
|
||||
df.rowsCount() shouldBe 5
|
||||
df.columnNames()[5] shouldBe "duplicate1"
|
||||
df.columnNames()[6] shouldBe "duplicate11"
|
||||
df["duplicate1"].type() shouldBe typeOf<Char?>()
|
||||
df["double"].type() shouldBe typeOf<Double?>()
|
||||
df["number"].type() shouldBe typeOf<Double>()
|
||||
df["time"].type() shouldBe typeOf<LocalDateTime>()
|
||||
|
||||
println(df)
|
||||
}
|
||||
|
||||
private fun assertColumnType(columnName: String, kClass: KClass<*>, schema: DataFrameSchema) {
|
||||
val col = schema.columns[columnName]
|
||||
col.shouldNotBeNull()
|
||||
col.type.classifier shouldBe kClass
|
||||
}
|
||||
|
||||
@Test
|
||||
fun readCsvWithFloats() {
|
||||
val df = DataFrame.readCsv(wineCsv, delimiter = ';')
|
||||
val schema = df.schema()
|
||||
|
||||
assertColumnType("citric acid", Double::class, schema)
|
||||
assertColumnType("alcohol", Double::class, schema)
|
||||
assertColumnType("quality", Int::class, schema)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read standard CSV with floats when user has alternative locale`() {
|
||||
val currentLocale = Locale.getDefault()
|
||||
try {
|
||||
Locale.setDefault(Locale.forLanguageTag("ru-RU"))
|
||||
val df = DataFrame.readCsv(wineCsv, delimiter = ';')
|
||||
val schema = df.schema()
|
||||
|
||||
assertColumnType("citric acid", Double::class, schema)
|
||||
assertColumnType("alcohol", Double::class, schema)
|
||||
assertColumnType("quality", Int::class, schema)
|
||||
} finally {
|
||||
Locale.setDefault(currentLocale)
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read with custom header`() {
|
||||
val header = ('A'..'K').map { it.toString() }
|
||||
val df = DataFrame.readCsv(simpleCsv, header = header, skipLines = 1)
|
||||
df.columnNames() shouldBe header
|
||||
df["B"].type() shouldBe typeOf<Int>()
|
||||
|
||||
val headerShort = ('A'..'E').map { it.toString() }
|
||||
val dfShort = DataFrame.readCsv(simpleCsv, header = headerShort, skipLines = 1)
|
||||
dfShort.columnsCount() shouldBe 5
|
||||
dfShort.columnNames() shouldBe headerShort
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read first rows`() {
|
||||
val expected =
|
||||
listOf(
|
||||
"untitled",
|
||||
"user_id",
|
||||
"name",
|
||||
"duplicate",
|
||||
"username",
|
||||
"duplicate1",
|
||||
"duplicate11",
|
||||
"double",
|
||||
"number",
|
||||
"time",
|
||||
"empty",
|
||||
)
|
||||
val dfHeader = DataFrame.readCsv(simpleCsv, readLines = 0)
|
||||
dfHeader.rowsCount() shouldBe 0
|
||||
dfHeader.columnNames() shouldBe expected
|
||||
|
||||
val dfThree = DataFrame.readCsv(simpleCsv, readLines = 3)
|
||||
dfThree.rowsCount() shouldBe 3
|
||||
|
||||
val dfFull = DataFrame.readCsv(simpleCsv, readLines = 10)
|
||||
dfFull.rowsCount() shouldBe 5
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `if string starts with a number, it should be parsed as a string anyway`() {
|
||||
@Language("CSV")
|
||||
val df = DataFrame.readCsvStr(
|
||||
"""
|
||||
duration,floatDuration
|
||||
12 min,1.0
|
||||
15,12.98 sec
|
||||
1 Season,0.9 parsec
|
||||
""".trimIndent(),
|
||||
)
|
||||
df["duration"].type() shouldBe typeOf<String>()
|
||||
df["floatDuration"].type() shouldBe typeOf<String>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `if record has fewer columns than header then pad it with nulls`() {
|
||||
@Language("CSV")
|
||||
val csvContent =
|
||||
"""
|
||||
col1,col2,col3
|
||||
568,801,587
|
||||
780,588
|
||||
""".trimIndent()
|
||||
|
||||
val df = shouldNotThrowAny {
|
||||
DataFrame.readCsvStr(csvContent)
|
||||
}
|
||||
|
||||
df shouldBe dataFrameOf("col1", "col2", "col3")(
|
||||
568, 801, 587,
|
||||
780, 588, null,
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `write and read frame column`() {
|
||||
val df = dataFrameOf("a", "b", "c")(
|
||||
1, 2, 3,
|
||||
1, 3, 2,
|
||||
2, 1, 3,
|
||||
)
|
||||
val grouped = df.groupBy("a").into("g")
|
||||
val str = grouped.toCsvStr(escapeChar = null)
|
||||
val res = DataFrame.readCsvStr(str, quote = '"')
|
||||
res shouldBe grouped
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `write and read column group`() {
|
||||
val df = dataFrameOf("a", "b", "c")(
|
||||
1, 2, 3,
|
||||
1, 3, 2,
|
||||
)
|
||||
val grouped = df.group("b", "c").into("d")
|
||||
val str = grouped.toCsvStr()
|
||||
val res = DataFrame.readCsvStr(str)
|
||||
res shouldBe grouped
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `CSV String of saved dataframe starts with column name`() {
|
||||
val df = dataFrameOf("a")(1)
|
||||
df.toCsvStr().first() shouldBe 'a'
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `guess tsv`() {
|
||||
val df = DataFrame.read(testResource("abc.tsv"))
|
||||
df.columnsCount() shouldBe 3
|
||||
df.rowsCount() shouldBe 2
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `write csv without header produce correct file`() {
|
||||
val df = dataFrameOf("a", "b", "c")(
|
||||
1, 2, 3,
|
||||
1, 3, 2,
|
||||
)
|
||||
df.writeCsv(
|
||||
path = "src/test/resources/without_header.csv",
|
||||
includeHeader = false,
|
||||
recordSeparator = "\r\n",
|
||||
)
|
||||
val producedFile = File("src/test/resources/without_header.csv")
|
||||
producedFile.exists() shouldBe true
|
||||
producedFile.readText() shouldBe "1,2,3\r\n1,3,2\r\n"
|
||||
producedFile.delete()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `check integrity of example data`() {
|
||||
shouldThrow<IllegalStateException> {
|
||||
// cannot read file with blank line at the start
|
||||
DataFrame.readCsv("../data/jetbrains repositories.csv")
|
||||
}
|
||||
shouldThrow<IllegalStateException> {
|
||||
// ignoreEmptyLines only ignores intermediate empty lines
|
||||
DataFrame.readCsv("../data/jetbrains repositories.csv", ignoreEmptyLines = true)
|
||||
}
|
||||
|
||||
val df = DataFrame.readCsv(
|
||||
"../data/jetbrains repositories.csv",
|
||||
skipLines = 1, // we need to skip the empty lines manually
|
||||
)
|
||||
df.columnNames() shouldBe listOf("full_name", "html_url", "stargazers_count", "topics", "watchers")
|
||||
df.columnTypes() shouldBe listOf(
|
||||
typeOf<String>(),
|
||||
typeOf<URL>(),
|
||||
typeOf<Int>(),
|
||||
typeOf<String>(),
|
||||
typeOf<Int>(),
|
||||
)
|
||||
// same file without empty line at the beginning
|
||||
df shouldBe DataFrame.readCsv("../data/jetbrains_repositories.csv")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `readCsvStr delimiter`() {
|
||||
@Language("TSV")
|
||||
val tsv =
|
||||
"""
|
||||
a b c
|
||||
1 2 3
|
||||
""".trimIndent()
|
||||
val df = DataFrame.readCsvStr(tsv, '\t')
|
||||
df shouldBe dataFrameOf("a", "b", "c")(1, 2, 3)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `file with BOM`() {
|
||||
val df = DataFrame.readCsv(withBomCsv, delimiter = ';')
|
||||
df.columnNames() shouldBe listOf("Column1", "Column2")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read empty CSV`() {
|
||||
val emptyDelimStr = DataFrame.readCsvStr("")
|
||||
emptyDelimStr shouldBe DataFrame.empty()
|
||||
|
||||
val emptyWidthStr = DataFrame.readCsvStr("", hasFixedWidthColumns = true)
|
||||
emptyWidthStr shouldBe DataFrame.empty()
|
||||
|
||||
val emptyCsvFile = DataFrame.readCsv(File.createTempFile("empty", "csv"))
|
||||
emptyCsvFile shouldBe DataFrame.empty()
|
||||
|
||||
val emptyCsvFileManualHeader = DataFrame.readCsv(
|
||||
file = File.createTempFile("empty", "csv"),
|
||||
header = listOf("a", "b", "c"),
|
||||
)
|
||||
emptyCsvFileManualHeader.apply {
|
||||
isEmpty() shouldBe true
|
||||
columnNames() shouldBe listOf("a", "b", "c")
|
||||
columnTypes() shouldBe listOf(typeOf<String>(), typeOf<String>(), typeOf<String>())
|
||||
}
|
||||
|
||||
val emptyCsvFileWithHeader = DataFrame.readCsv(
|
||||
file = File.createTempFile("empty", "csv").also { it.writeText("a,b,c") },
|
||||
)
|
||||
emptyCsvFileWithHeader.apply {
|
||||
isEmpty() shouldBe true
|
||||
columnNames() shouldBe listOf("a", "b", "c")
|
||||
columnTypes() shouldBe listOf(typeOf<String>(), typeOf<String>(), typeOf<String>())
|
||||
}
|
||||
|
||||
val emptyTsvStr = DataFrame.readTsv(File.createTempFile("empty", "tsv"))
|
||||
emptyTsvStr shouldBe DataFrame.empty()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read Csv with comments`() {
|
||||
@Language("CSV")
|
||||
val csv =
|
||||
"""
|
||||
# This is a comment
|
||||
a,b,c
|
||||
1,2,3
|
||||
""".trimIndent()
|
||||
val df = DataFrame.readCsvStr(csv, skipLines = 1L)
|
||||
df shouldBe dataFrameOf("a", "b", "c")(1, 2, 3)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `csv with empty lines`() {
|
||||
@Language("CSV")
|
||||
val csv =
|
||||
"""
|
||||
a,b,c
|
||||
1,2,3
|
||||
|
||||
4,5,6
|
||||
""".trimIndent()
|
||||
val df1 = DataFrame.readCsvStr(csv)
|
||||
df1 shouldBe dataFrameOf("a", "b", "c")(
|
||||
1, 2, 3,
|
||||
null, null, null,
|
||||
4, 5, 6,
|
||||
)
|
||||
|
||||
val df2 = DataFrame.readCsvStr(csv, ignoreEmptyLines = true)
|
||||
df2 shouldBe dataFrameOf("a", "b", "c")(
|
||||
1, 2, 3,
|
||||
4, 5, 6,
|
||||
)
|
||||
|
||||
shouldThrow<IllegalStateException> { DataFrame.readCsvStr(csv, allowMissingColumns = false) }
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `don't read folder`() {
|
||||
shouldThrow<IllegalArgumentException> { DataFrame.readCsv("") }
|
||||
shouldThrow<IllegalArgumentException> { DataFrame.readCsv("NON EXISTENT FILE") }
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `cannot auto-parse specific date string`() {
|
||||
@Language("csv")
|
||||
val frenchCsv =
|
||||
"""
|
||||
name; price; date;
|
||||
a;12,45; 05/06/2021;
|
||||
b;-13,35;14/07/2025;
|
||||
c;100 123,35;;
|
||||
d;-204 235,23;;
|
||||
e;NaN;;
|
||||
f;null;;
|
||||
""".trimIndent()
|
||||
|
||||
val dfDeephaven = DataFrame.readCsvStr(
|
||||
text = frenchCsv,
|
||||
delimiter = ';',
|
||||
)
|
||||
|
||||
// could not parse, remains String
|
||||
dfDeephaven["date"].type() shouldBe typeOf<String?>()
|
||||
|
||||
val dfDataFrame = DataFrame.readCsvStr(
|
||||
text = frenchCsv,
|
||||
delimiter = ';',
|
||||
// setting any locale skips deephaven's date parsing
|
||||
parserOptions = ParserOptions(locale = Locale.ROOT),
|
||||
)
|
||||
|
||||
// could not parse, remains String
|
||||
dfDataFrame["date"].type() shouldBe typeOf<String?>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `parse with other locales`() {
|
||||
@Language("csv")
|
||||
val frenchCsv =
|
||||
"""
|
||||
name; price; date;
|
||||
a;12,45; 05/06/2021;
|
||||
b;-13,35;14/07/2025;
|
||||
c;100 123,35;;
|
||||
d;-204 235,23;;
|
||||
e;NaN;;
|
||||
f;null;;
|
||||
""".trimIndent()
|
||||
|
||||
val frenchDf = DataFrame.readCsvStr(
|
||||
text = frenchCsv,
|
||||
delimiter = ';',
|
||||
parserOptions = ParserOptions(
|
||||
dateTimePattern = "dd/MM/yyyy",
|
||||
locale = Locale.FRENCH,
|
||||
),
|
||||
)
|
||||
|
||||
frenchDf["price"].type() shouldBe typeOf<Double?>()
|
||||
frenchDf["date"].type() shouldBe typeOf<LocalDate?>()
|
||||
|
||||
@Language("csv")
|
||||
val dutchCsv =
|
||||
"""
|
||||
name; price;
|
||||
a;12,45;
|
||||
b;-13,35;
|
||||
c;100.123,35;
|
||||
d;-204.235,23;
|
||||
e;NaN;
|
||||
f;null;
|
||||
""".trimIndent()
|
||||
|
||||
val dutchDf = DataFrame.readCsvStr(
|
||||
text = dutchCsv,
|
||||
delimiter = ';',
|
||||
parserOptions = ParserOptions(
|
||||
locale = Locale.forLanguageTag("nl-NL"),
|
||||
),
|
||||
)
|
||||
|
||||
dutchDf["price"].type() shouldBe typeOf<Double?>()
|
||||
|
||||
// skipping this test on windows due to lack of support for Arabic locales
|
||||
if (!System.getProperty("os.name").startsWith("Windows")) {
|
||||
// while negative numbers in RTL languages cannot be parsed thanks to Java, others work
|
||||
@Language("csv")
|
||||
val arabicCsv =
|
||||
"""
|
||||
الاسم; السعر;
|
||||
أ;١٢٫٤٥;
|
||||
ب;١٣٫٣٥;
|
||||
ج;١٠٠٫١٢٣;
|
||||
د;٢٠٤٫٢٣٥;
|
||||
هـ;ليس رقم;
|
||||
و;null;
|
||||
""".trimIndent()
|
||||
|
||||
val easternArabicDf = DataFrame.readCsvStr(
|
||||
arabicCsv,
|
||||
delimiter = ';',
|
||||
parserOptions = ParserOptions(
|
||||
locale = Locale.forLanguageTag("ar-001"),
|
||||
),
|
||||
)
|
||||
|
||||
easternArabicDf["السعر"].type() shouldBe typeOf<Double?>()
|
||||
easternArabicDf["الاسم"].type() shouldBe typeOf<String>() // apparently not a char
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `handle slightly mixed locales`() {
|
||||
@Language("csv")
|
||||
val estonianWrongMinus =
|
||||
"""
|
||||
name; price;
|
||||
a;12,45;
|
||||
b;-13,35;
|
||||
c;100 123,35;
|
||||
d;-204 235,23;
|
||||
e;NaN;
|
||||
f;null;
|
||||
""".trimIndent()
|
||||
|
||||
val estonianDf1 = DataFrame.readCsvStr(
|
||||
text = estonianWrongMinus,
|
||||
delimiter = ';',
|
||||
parserOptions = ParserOptions(
|
||||
locale = Locale.forLanguageTag("et-EE"),
|
||||
),
|
||||
)
|
||||
|
||||
estonianDf1["price"].type() shouldBe typeOf<Double?>()
|
||||
|
||||
// also test the global setting
|
||||
DataFrame.parser.locale = Locale.forLanguageTag("et-EE")
|
||||
|
||||
val estonianDf2 = DataFrame.readCsvStr(
|
||||
text = estonianWrongMinus,
|
||||
delimiter = ';',
|
||||
)
|
||||
estonianDf2 shouldBe estonianDf1
|
||||
|
||||
DataFrame.parser.resetToDefault()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `NA and custom null string in double column`() {
|
||||
val df1 = DataFrame.readCsv(
|
||||
msleepCsv,
|
||||
parserOptions = ParserOptions(
|
||||
nullStrings = DEFAULT_DELIM_NULL_STRINGS + "nothing",
|
||||
),
|
||||
)
|
||||
|
||||
df1["name"].type() shouldBe typeOf<String>()
|
||||
df1["genus"].type() shouldBe typeOf<String>()
|
||||
df1["vore"].type() shouldBe typeOf<String?>()
|
||||
df1["order"].type() shouldBe typeOf<String>()
|
||||
df1["conservation"].type() shouldBe typeOf<String?>()
|
||||
df1["sleep_total"].type() shouldBe typeOf<Double>()
|
||||
df1["sleep_rem"].type() shouldBe typeOf<Double?>()
|
||||
df1["sleep_cycle"].type() shouldBe typeOf<Double?>()
|
||||
df1["awake"].type() shouldBe typeOf<Double>()
|
||||
df1["brainwt"].type() shouldBe typeOf<Double?>()
|
||||
df1["bodywt"].type() shouldBe typeOf<Double?>()
|
||||
|
||||
// Also test the global setting
|
||||
DataFrame.parser.addNullString("nothing")
|
||||
DEFAULT_DELIM_NULL_STRINGS.forEach {
|
||||
DataFrame.parser.addNullString(it)
|
||||
}
|
||||
|
||||
val df2 = DataFrame.readCsv(msleepCsv)
|
||||
df2 shouldBe df1
|
||||
|
||||
DataFrame.parser.resetToDefault()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `multiple spaces as delimiter`() {
|
||||
@Language("csv")
|
||||
val csv =
|
||||
"""
|
||||
NAME STATUS AGE NUMBER LABELS
|
||||
argo-events Active 2y77d 1234 app.kubernetes.io/instance=argo-events,kubernetes.io/metadata.name=argo-events
|
||||
argo-workflows Active 2y77d 1234 app.kubernetes.io/instance=argo-workflows,kubernetes.io/metadata.name=argo-workflows
|
||||
argocd Active 5y18d 1234 kubernetes.io/metadata.name=argocd
|
||||
beta Active 4y235d 1234 kubernetes.io/metadata.name=beta
|
||||
""".trimIndent()
|
||||
|
||||
val df1 = DataFrame.readCsvStr(
|
||||
text = csv,
|
||||
hasFixedWidthColumns = true,
|
||||
)
|
||||
|
||||
df1["NAME"].type() shouldBe typeOf<String>()
|
||||
df1["STATUS"].type() shouldBe typeOf<String>()
|
||||
df1["AGE"].type() shouldBe typeOf<String>()
|
||||
df1["NUMBER"].type() shouldBe typeOf<Int>()
|
||||
df1["LABELS"].type() shouldBe typeOf<String>()
|
||||
|
||||
val df2 = DataFrame.readCsvStr(
|
||||
text = csv,
|
||||
hasFixedWidthColumns = true,
|
||||
fixedColumnWidths = listOf(25, 9, 9, 9, 100),
|
||||
skipLines = 1,
|
||||
header = listOf("name", "status", "age", "number", "labels"),
|
||||
)
|
||||
|
||||
df2["name"].type() shouldBe typeOf<String>()
|
||||
df2["status"].type() shouldBe typeOf<String>()
|
||||
df2["age"].type() shouldBe typeOf<String>()
|
||||
df2["number"].type() shouldBe typeOf<Int>()
|
||||
df2["labels"].type() shouldBe typeOf<String>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `handle default coltype with other parameters`() {
|
||||
val df = DataFrame.readCsv(
|
||||
simpleCsv,
|
||||
header = listOf("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"),
|
||||
skipLines = 2,
|
||||
colTypes = mapOf(
|
||||
"a" to ColType.Int,
|
||||
"b" to ColType.Double,
|
||||
ColType.DEFAULT to ColType.String,
|
||||
),
|
||||
)
|
||||
|
||||
df.columnTypes().shouldContainInOrder(
|
||||
typeOf<Int>(),
|
||||
typeOf<Double>(),
|
||||
typeOf<String>(),
|
||||
typeOf<String?>(),
|
||||
typeOf<String>(),
|
||||
typeOf<String?>(),
|
||||
typeOf<String?>(),
|
||||
typeOf<String?>(),
|
||||
typeOf<String>(),
|
||||
typeOf<String>(),
|
||||
typeOf<String?>(),
|
||||
)
|
||||
df.rowsCount() shouldBe 4
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `skipping types`() {
|
||||
val df1 = DataFrame.readCsv(
|
||||
irisDataset,
|
||||
colTypes = mapOf("sepal.length" to ColType.Double),
|
||||
parserOptions = ParserOptions(
|
||||
skipTypes = setOf(typeOf<Double>()),
|
||||
),
|
||||
)
|
||||
|
||||
df1["sepal.length"].type() shouldBe typeOf<Double>()
|
||||
df1["sepal.width"].type() shouldBe typeOf<BigDecimal>()
|
||||
df1["petal.length"].type() shouldBe typeOf<BigDecimal>()
|
||||
df1["petal.width"].type() shouldBe typeOf<BigDecimal>()
|
||||
df1["variety"].type() shouldBe typeOf<String>()
|
||||
|
||||
// Also test the global setting
|
||||
DataFrame.parser.addSkipType(typeOf<Double>())
|
||||
|
||||
val df2 = DataFrame.readCsv(
|
||||
irisDataset,
|
||||
colTypes = mapOf("sepal.length" to ColType.Double),
|
||||
)
|
||||
df2 shouldBe df1
|
||||
|
||||
DataFrame.parser.resetToDefault()
|
||||
}
|
||||
|
||||
// Issue #921
|
||||
@Test
|
||||
fun `read csv with custom null strings and given type`() {
|
||||
@Language("CSV")
|
||||
val csv =
|
||||
"""
|
||||
a,b
|
||||
noppes,2
|
||||
1.2,
|
||||
3,45
|
||||
,noppes
|
||||
1.3,1
|
||||
""".trimIndent()
|
||||
|
||||
val df1 = DataFrame.readCsvStr(
|
||||
csv,
|
||||
parserOptions = ParserOptions(
|
||||
nullStrings = setOf("noppes", ""),
|
||||
),
|
||||
colTypes = mapOf("a" to ColType.Double, "b" to ColType.Int),
|
||||
)
|
||||
df1 shouldBe dataFrameOf("a", "b")(
|
||||
null, 2,
|
||||
1.2, null,
|
||||
3.0, 45,
|
||||
null, null,
|
||||
1.3, 1,
|
||||
)
|
||||
|
||||
// Also test the global setting
|
||||
DataFrame.parser.addNullString("noppes")
|
||||
DataFrame.parser.addNullString("")
|
||||
|
||||
val df2 = DataFrame.readCsvStr(
|
||||
csv,
|
||||
colTypes = mapOf("a" to ColType.Double, "b" to ColType.Int),
|
||||
)
|
||||
|
||||
df2 shouldBe df1
|
||||
|
||||
DataFrame.parser.resetToDefault()
|
||||
}
|
||||
|
||||
// Issue #1047
|
||||
@Test
|
||||
fun `Only use Deephaven datetime parser with custom csv specs`() {
|
||||
@Language("csv")
|
||||
val csvContent =
|
||||
"""
|
||||
with_timezone_offset,without_timezone_offset
|
||||
2024-12-12T13:00:00+01:00,2024-12-12T13:00:00
|
||||
""".trimIndent()
|
||||
|
||||
// use DFs parsers by default for datetime-like columns
|
||||
val df1 = DataFrame.readCsvStr(csvContent)
|
||||
df1["with_timezone_offset"].let {
|
||||
it.type() shouldBe typeOf<StdlibInstant>()
|
||||
it[0] shouldBe StdlibInstant.parse("2024-12-12T13:00:00+01:00")
|
||||
}
|
||||
df1["without_timezone_offset"].let {
|
||||
it.type() shouldBe typeOf<LocalDateTime>()
|
||||
it[0] shouldBe LocalDateTime.parse("2024-12-12T13:00:00")
|
||||
}
|
||||
|
||||
// enable fast datetime parser for the first column with adjustCsvSpecs
|
||||
val df2 = DataFrame.readCsv(
|
||||
inputStream = csvContent.byteInputStream(),
|
||||
adjustCsvSpecs = {
|
||||
putParserForName("with_timezone_offset", Parsers.DATETIME)
|
||||
},
|
||||
)
|
||||
df2["with_timezone_offset"].let {
|
||||
it.type() shouldBe typeOf<LocalDateTime>()
|
||||
it[0] shouldBe LocalDateTime.parse("2024-12-12T12:00:00")
|
||||
}
|
||||
df2["without_timezone_offset"].let {
|
||||
it.type() shouldBe typeOf<LocalDateTime>()
|
||||
it[0] shouldBe LocalDateTime.parse("2024-12-12T13:00:00")
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `test parsing kotlin-time-Instant`() {
|
||||
@Language("csv")
|
||||
val csvContent =
|
||||
"""
|
||||
with_timezone_offset,without_timezone_offset
|
||||
2024-12-12T13:00:00+01:00,2024-12-12T13:00:00
|
||||
""".trimIndent()
|
||||
|
||||
DataFrame.parser.parseExperimentalInstant = true
|
||||
|
||||
// use DFs parsers by default for datetime-like columns
|
||||
val df1 = DataFrame.readCsvStr(csvContent)
|
||||
df1["with_timezone_offset"].let {
|
||||
it.type() shouldBe typeOf<StdlibInstant>()
|
||||
it[0] shouldBe StdlibInstant.parse("2024-12-12T13:00:00+01:00")
|
||||
}
|
||||
|
||||
DataFrame.parser.resetToDefault()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `json dependency test`() {
|
||||
val df = dataFrameOf("firstName", "lastName")(
|
||||
"John", "Doe",
|
||||
"Jane", "Doe",
|
||||
).group { "firstName" and "lastName" }.into { "name" }
|
||||
|
||||
df.toCsvStr(quote = '\'') shouldBe
|
||||
"""
|
||||
name
|
||||
'{"firstName":"John","lastName":"Doe"}'
|
||||
'{"firstName":"Jane","lastName":"Doe"}'
|
||||
|
||||
""".trimIndent()
|
||||
}
|
||||
|
||||
companion object {
|
||||
private val irisDataset = testCsv("irisDataset")
|
||||
private val simpleCsv = testCsv("testCSV")
|
||||
private val simpleCsvUtf16le = testCsv("testCSV-utf-16-le-bom")
|
||||
private val simpleCsvUtf16leGz = testResource("testCSV-utf16le-bom.csv.gz")
|
||||
private val simpleCsvUtf16leZip = testResource("testCSV-utf-16-le-bom.zip")
|
||||
private val simpleCsvZip = testResource("testCSV.zip")
|
||||
private val twoCsvsZip = testResource("two csvs.zip")
|
||||
private val simpleCsvGz = testResource("testCSV.csv.gz")
|
||||
private val csvWithFrenchLocale = testCsv("testCSVwithFrenchLocale")
|
||||
private val wineCsv = testCsv("wine")
|
||||
private val withBomCsv = testCsv("with-bom")
|
||||
private val msleepCsv = testCsv("msleep")
|
||||
private val notCsv = testResource("not-csv.zip")
|
||||
}
|
||||
}
|
||||
|
||||
fun testResource(resourcePath: String): URL = DelimCsvTsvTests::class.java.classLoader.getResource(resourcePath)!!
|
||||
|
||||
fun testCsv(csvName: String) = testResource("$csvName.csv")
|
||||
+134
@@ -0,0 +1,134 @@
|
||||
@file:ExcludeFromSources
|
||||
|
||||
package org.jetbrains.kotlinx.dataframe.documentationCsv
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CHARSET
|
||||
import org.jetbrains.kotlinx.dataframe.io.ColType
|
||||
import java.io.File
|
||||
import java.io.InputStream
|
||||
import java.net.URL
|
||||
|
||||
/**
|
||||
* ### Read $[FILE_TYPE_TITLE] $[DATA_TITLE] to [DataFrame]
|
||||
*
|
||||
* Reads any $[FILE_TYPE] $[DATA] to a [DataFrame][DataFrame].
|
||||
*
|
||||
* Parameters you can use to customize the reading process include, for instance, \[delimiter\],
|
||||
* \[header\], \[colTypes\], \[readLines\], and \[parserOptions\].
|
||||
* See the param list below for all settings.
|
||||
*
|
||||
* The integration is built upon {@include [DocumentationUrls.Deephaven]}.
|
||||
*
|
||||
* ##### Similar Functions
|
||||
* With the overloads of $[FUNCTION_LINK]`()`, you can read any $[FILE_TYPE] by [File][File],
|
||||
* [Path][java.nio.file.Path], [URL][URL], or [InputStream][InputStream].
|
||||
* Reading by file path or URL can also be done by passing a [String].
|
||||
*
|
||||
* For example, $[FUNCTION_LINK]`("input.$[CommonReadDelimDocs.FILE_EXTENSION]")` or with some options:
|
||||
*
|
||||
* $[FUNCTION_LINK]`(`
|
||||
*
|
||||
* {@include [Indent]}`file = `[File][File]`("input.$[CommonReadDelimDocs.FILE_EXTENSION]"),`
|
||||
*
|
||||
* {@include [Indent]}`parserOptions = `[ParserOptions][org.jetbrains.kotlinx.dataframe.api.ParserOptions]`(locale = `[Locale][java.util.Locale]`.`[US][java.util.Locale.US]`),`
|
||||
*
|
||||
* {@include [Indent]}`colTypes = `[mapOf][mapOf]`("a" `[to][to]` `[ColType][ColType]`.`[Int][ColType.Int]`, `[ColType][ColType]`.`[DEFAULT][ColType.DEFAULT]` `[to][to]` `[ColType][ColType]`.`[String][ColType.String]`),`
|
||||
*
|
||||
* {@include [Indent]}`readLines = 1000L,`
|
||||
*
|
||||
* `)`
|
||||
*
|
||||
* ZIP (.zip) or GZIP (.gz) files are supported by default. \[compression\] is automatically detected.
|
||||
*
|
||||
* You can also read "raw" $[FILE_TYPE] data from a [String] like this:
|
||||
*
|
||||
* $[STR_FUNCTION_LINK]`("a,b,c", delimiter = ",")`
|
||||
*
|
||||
* @comment Some helper arguments for the function links
|
||||
* @set [FUNCTION_LINK] \[DataFrame.${[FUNCTION_NAME]}\]\[${[FUNCTION_NAME]}\]
|
||||
* @set [STR_FUNCTION_LINK] \[DataFrame.${[FUNCTION_NAME]}Str\]\[${[FUNCTION_NAME]}Str\]
|
||||
* @set [OLD_FUNCTION_LINK] \[DataFrame.${[OLD_FUNCTION_NAME]}\]\[org.jetbrains.kotlinx.dataframe.io.${[OLD_FUNCTION_NAME]}\]
|
||||
*/
|
||||
@Suppress("ClassName")
|
||||
internal interface CommonReadDelimDocs {
|
||||
|
||||
/**
|
||||
* @include [CommonReadDelimDocs]
|
||||
* @set [FILE_TYPE_TITLE] CSV
|
||||
* @set [FILE_TYPE] CSV
|
||||
* @set [FILE_EXTENSION] csv
|
||||
* @set [FUNCTION_NAME] readCsv
|
||||
* @set [OLD_FUNCTION_NAME] readCSV
|
||||
*/
|
||||
typealias CsvDocs = Nothing
|
||||
|
||||
/**
|
||||
* @include [CommonReadDelimDocs]
|
||||
* @set [FILE_TYPE_TITLE] TSV
|
||||
* @set [FILE_TYPE] TSV
|
||||
* @set [FILE_EXTENSION] tsv
|
||||
* @set [FUNCTION_NAME] readTsv
|
||||
* @set [OLD_FUNCTION_NAME] readTSV
|
||||
*/
|
||||
typealias TsvDocs = Nothing
|
||||
|
||||
/**
|
||||
* @include [CommonReadDelimDocs]
|
||||
* @set [FILE_TYPE_TITLE] Delimiter-Separated Text
|
||||
* @set [FILE_TYPE] delimiter-separated text
|
||||
* @set [FILE_EXTENSION] txt
|
||||
* @set [FUNCTION_NAME] readDelim
|
||||
* @set [OLD_FUNCTION_NAME] readDelim{@comment cannot differentiate between old and new}
|
||||
*/
|
||||
typealias DelimDocs = Nothing
|
||||
|
||||
/**
|
||||
* @include [CHARSET]
|
||||
* @include [DelimParams.HEADER]
|
||||
* @include [DelimParams.HAS_FIXED_WIDTH_COLUMNS]
|
||||
* @include [DelimParams.FIXED_COLUMN_WIDTHS]
|
||||
* @include [DelimParams.COL_TYPES]
|
||||
* @include [DelimParams.SKIP_LINES]
|
||||
* @include [DelimParams.READ_LINES]
|
||||
* @include [DelimParams.PARSER_OPTIONS]
|
||||
* @include [DelimParams.IGNORE_EMPTY_LINES]
|
||||
* @include [DelimParams.ALLOW_MISSING_COLUMNS]
|
||||
* @include [DelimParams.IGNORE_EXCESS_COLUMNS]
|
||||
* @include [DelimParams.QUOTE]
|
||||
* @include [DelimParams.IGNORE_SURROUNDING_SPACES]
|
||||
* @include [DelimParams.TRIM_INSIDE_QUOTED]
|
||||
* @include [DelimParams.PARSE_PARALLEL]
|
||||
*/
|
||||
typealias CommonReadParams = Nothing
|
||||
|
||||
// something like "File" or "File/URL"
|
||||
typealias DATA_TITLE = Nothing
|
||||
|
||||
// something like "file" or "file or url"
|
||||
typealias DATA = Nothing
|
||||
|
||||
// Like "CSV" or "TSV", capitalized
|
||||
typealias FILE_TYPE_TITLE = Nothing
|
||||
|
||||
// Like "CSV" or "TSV"
|
||||
typealias FILE_TYPE = Nothing
|
||||
|
||||
// like "csv" or "txt"
|
||||
typealias FILE_EXTENSION = Nothing
|
||||
|
||||
// Function name, like "readCsv"
|
||||
typealias FUNCTION_NAME = Nothing
|
||||
|
||||
// Old function name, like "readCSV"
|
||||
typealias OLD_FUNCTION_NAME = Nothing
|
||||
|
||||
// A link to the main function, set by ReadDelim itself
|
||||
typealias FUNCTION_LINK = Nothing
|
||||
|
||||
// A link to the str function, set by ReadDelim itself
|
||||
typealias STR_FUNCTION_LINK = Nothing
|
||||
|
||||
// A link to the old function, set by ReadDelim itself
|
||||
typealias OLD_FUNCTION_LINK = Nothing
|
||||
}
|
||||
+109
@@ -0,0 +1,109 @@
|
||||
@file:ExcludeFromSources
|
||||
|
||||
package org.jetbrains.kotlinx.dataframe.documentationCsv
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.io.QuoteMode.ALL
|
||||
import java.io.File
|
||||
|
||||
/**
|
||||
* ### $[WRITE_OR_CONVERT] [DataFrame] to $[FILE_TYPE_TITLE] $[DATA_TITLE]
|
||||
*
|
||||
* ${[WRITE_OR_CONVERT]}s \[this\]\[this\] [DataFrame][DataFrame] to a $[FILE_TYPE] $[DATA].
|
||||
*
|
||||
* Parameters you can use to customize the process include, for instance, \[delimiter\],
|
||||
* \[includeHeader\], \[quoteMode\], and \[headerComments\].
|
||||
* See the param list below for all settings.
|
||||
*
|
||||
* The integration is built upon {@include [DocumentationUrls.ApacheCsv]}.
|
||||
*
|
||||
* ##### Similar Functions
|
||||
* With overloads of $[FUNCTION_LINK]`()`, you can write $[FILE_TYPE] to [File][File], [Path][java.nio.file.Path],
|
||||
* [Appendable], or [String].
|
||||
*
|
||||
* For example, $[FUNCTION_LINK]`("output.$[CommonWriteDelimDocs.FILE_EXTENSION]")`
|
||||
*
|
||||
* or $[FUNCTION_LINK]`(`[File][File]`("output.$[CommonWriteDelimDocs.FILE_EXTENSION]"), quoteMode = `[QuoteMode.ALL][ALL]`)`
|
||||
*
|
||||
* Converting to a [String] can be done like this:
|
||||
*
|
||||
* $[TO_STR_FUNCTION_LINK]`(delimiter = ",")`
|
||||
*
|
||||
* @comment Some helper arguments for the function links
|
||||
* @set [FUNCTION_LINK] \[DataFrame.${[FUNCTION_NAME]}\]\[${[FUNCTION_NAME]}\]
|
||||
* @set [TO_STR_FUNCTION_LINK] \[DataFrame.${[TO_STR_FUNCTION_NAME]}\]\[${[TO_STR_FUNCTION_NAME]}\]
|
||||
*/
|
||||
@Suppress("ClassName")
|
||||
internal interface CommonWriteDelimDocs {
|
||||
|
||||
/**
|
||||
* @include [CommonWriteDelimDocs]
|
||||
* @set [FILE_TYPE_TITLE] CSV
|
||||
* @set [FILE_TYPE] CSV
|
||||
* @set [FILE_EXTENSION] csv
|
||||
* @set [FUNCTION_NAME] writeCsv
|
||||
* @set [TO_STR_FUNCTION_NAME] toCsvStr
|
||||
*/
|
||||
typealias CsvDocs = Nothing
|
||||
|
||||
/**
|
||||
* @include [CommonWriteDelimDocs]
|
||||
* @set [FILE_TYPE_TITLE] TSV
|
||||
* @set [FILE_TYPE] TSV
|
||||
* @set [FILE_EXTENSION] tsv
|
||||
* @set [FUNCTION_NAME] writeTsv
|
||||
* @set [TO_STR_FUNCTION_NAME] toTsvStr
|
||||
*/
|
||||
typealias TsvDocs = Nothing
|
||||
|
||||
/**
|
||||
* @include [CommonWriteDelimDocs]
|
||||
* @set [FILE_TYPE_TITLE] Delimiter-Separated Text
|
||||
* @set [FILE_TYPE] delimiter-separated text
|
||||
* @set [FILE_EXTENSION] txt
|
||||
* @set [FUNCTION_NAME] writeDelim
|
||||
* @set [TO_STR_FUNCTION_NAME] toDelimStr
|
||||
*/
|
||||
typealias DelimDocs = Nothing
|
||||
|
||||
/**
|
||||
* @include [DelimParams.INCLUDE_HEADER]
|
||||
* @include [DelimParams.QUOTE]
|
||||
* @include [DelimParams.QUOTE_MODE]
|
||||
* @include [DelimParams.ESCAPE_CHAR]
|
||||
* @include [DelimParams.COMMENT_CHAR]
|
||||
* @include [DelimParams.HEADER_COMMENTS]
|
||||
* @include [DelimParams.RECORD_SEPARATOR]
|
||||
*/
|
||||
typealias CommonWriteParams = Nothing
|
||||
|
||||
// something like "Write" or "Convert"
|
||||
typealias WRITE_OR_CONVERT = Nothing
|
||||
|
||||
// Like "CSV" or "TSV", capitalized
|
||||
typealias FILE_TYPE_TITLE = Nothing
|
||||
|
||||
// something like "File" or "String"
|
||||
typealias DATA_TITLE = Nothing
|
||||
|
||||
// something like "file" or "text"
|
||||
typealias DATA = Nothing
|
||||
|
||||
// Like "CSV" or "TSV"
|
||||
typealias FILE_TYPE = Nothing
|
||||
|
||||
// like "csv" or "txt"
|
||||
typealias FILE_EXTENSION = Nothing
|
||||
|
||||
// Function name, like "readCsv"
|
||||
typealias FUNCTION_NAME = Nothing
|
||||
|
||||
// Function name, like "toCsvStr"
|
||||
typealias TO_STR_FUNCTION_NAME = Nothing
|
||||
|
||||
// A link to the main function, set by WriteDelim itself
|
||||
typealias FUNCTION_LINK = Nothing
|
||||
|
||||
// A link to the str function, set by WriteDelim itself
|
||||
typealias TO_STR_FUNCTION_LINK = Nothing
|
||||
}
|
||||
+274
@@ -0,0 +1,274 @@
|
||||
package org.jetbrains.kotlinx.dataframe.documentationCsv
|
||||
|
||||
import io.deephaven.csv.CsvSpecs
|
||||
import org.apache.commons.csv.CSVFormat
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses
|
||||
import org.jetbrains.kotlinx.dataframe.io.AdjustCSVFormat
|
||||
import org.jetbrains.kotlinx.dataframe.io.AdjustCsvSpecs
|
||||
import org.jetbrains.kotlinx.dataframe.io.ColType
|
||||
import org.jetbrains.kotlinx.dataframe.io.Compression
|
||||
import org.jetbrains.kotlinx.dataframe.io.DefaultNullStringsContentLink
|
||||
import org.jetbrains.kotlinx.dataframe.io.QuoteMode
|
||||
import java.nio.charset.Charset
|
||||
|
||||
/**
|
||||
* Contains both the default values of csv/tsv parameters and the parameter KDocs.
|
||||
*/
|
||||
@Suppress("ktlint:standard:class-naming", "ClassName", "KDocUnresolvedReference")
|
||||
internal object DelimParams {
|
||||
|
||||
/**
|
||||
* @param path The file path to read.
|
||||
* Use [charset\] to specify the encoding.
|
||||
* Can also be compressed as `.gz` or `.zip`, see [Compression][Compression].
|
||||
*/
|
||||
typealias PATH_READ = Nothing
|
||||
|
||||
/**
|
||||
* @param file The file to read.
|
||||
* Use [charset\] to specify the encoding.
|
||||
* Can also be compressed as `.gz` or `.zip`, see [Compression][Compression].
|
||||
*/
|
||||
typealias FILE_READ = Nothing
|
||||
|
||||
/**
|
||||
* @param url The URL from which to fetch the data.
|
||||
* Use [charset\] to specify the encoding.
|
||||
* Can also be compressed as `.gz` or `.zip`, see [Compression][Compression].
|
||||
*/
|
||||
typealias URL_READ = Nothing
|
||||
|
||||
/**
|
||||
* @param fileOrUrl The file path or URL to read the data from.
|
||||
* Use [charset\] to specify the encoding.
|
||||
* Can also be compressed as `.gz` or `.zip`, see [Compression][Compression].
|
||||
*/
|
||||
typealias FILE_OR_URL_READ = Nothing
|
||||
|
||||
/**
|
||||
* @param inputStream Represents the file to read.
|
||||
* Use [charset\] to specify the encoding.
|
||||
*/
|
||||
typealias INPUT_STREAM_READ = Nothing
|
||||
|
||||
/** @param text The raw data to read in the form of a [String]. */
|
||||
typealias TEXT_READ = Nothing
|
||||
|
||||
/** @param file The file to write to. */
|
||||
typealias FILE_WRITE = Nothing
|
||||
|
||||
/** @param path The path pointing to a file to write to. */
|
||||
typealias PATH_WRITE = Nothing
|
||||
|
||||
/** @param writer The [Appendable] to write to. */
|
||||
typealias WRITER_WRITE = Nothing
|
||||
|
||||
/**
|
||||
* @param charset The [character set][java.nio.charset.Charset] the input is encoded in.
|
||||
* Default: `null`
|
||||
*
|
||||
* If `null`, the Charset will be read from the BOM of the provided input,
|
||||
* defaulting to [UTF-8][Charsets.UTF_8] if no BOM is found.
|
||||
*/
|
||||
val CHARSET: Charset? = null
|
||||
|
||||
/**
|
||||
* @param delimiter The field delimiter character. Default: ','.
|
||||
*
|
||||
* Ignored if \[hasFixedWidthColumns\] is `true`.
|
||||
*/
|
||||
const val CSV_DELIMITER: Char = ','
|
||||
|
||||
/**
|
||||
* @param delimiter The field delimiter character. Default: '\\t'.
|
||||
*
|
||||
* Ignored if \[hasFixedWidthColumns\] is `true`.
|
||||
*/
|
||||
const val TSV_DELIMITER: Char = '\t'
|
||||
|
||||
/**
|
||||
* @param delimiter The field delimiter character. Default: ','.
|
||||
*
|
||||
* Ignored if \[hasFixedWidthColumns\] is `true`.
|
||||
*/
|
||||
const val DELIM_DELIMITER: Char = ','
|
||||
|
||||
/**
|
||||
* @param header Optional column titles. Default: empty list.
|
||||
*
|
||||
* If non-empty, the data will be read with \[header\] as the column titles
|
||||
* (use \[skipLines\] if there's a header in the data).
|
||||
* If empty (default), the header will be read from the data.
|
||||
*/
|
||||
val HEADER: List<String> = emptyList()
|
||||
|
||||
/**
|
||||
* @param hasFixedWidthColumns Whether the data has fixed-width columns instead of a single delimiter.
|
||||
* Default: `false`.
|
||||
*
|
||||
* Fixed-width columns can occur, for instance, in multi-space delimited data, where the columns are separated
|
||||
* by multiple spaces instead of a single delimiter, so columns are visually aligned.
|
||||
* Column widths are determined by the header in the data (if present), or manually by setting
|
||||
* \[fixedColumnWidths\].
|
||||
*/
|
||||
const val HAS_FIXED_WIDTH_COLUMNS: Boolean = false
|
||||
|
||||
/**
|
||||
* @param fixedColumnWidths The fixed column widths. Default: empty list.
|
||||
*
|
||||
* Requires \[hasFixedWidthColumns\]. If empty, the column widths will be determined by the header in the data
|
||||
* (if present), else, this manually sets the column widths.
|
||||
* The number of widths should match the number of columns.
|
||||
*/
|
||||
val FIXED_COLUMN_WIDTHS: List<Int> = emptyList()
|
||||
|
||||
/**
|
||||
* @param compression The compression of the data.
|
||||
* Default: [Compression.None], unless detected otherwise from the input file or url.
|
||||
*/
|
||||
val COMPRESSION: Compression<*> = Compression.None
|
||||
|
||||
/**
|
||||
* @param colTypes The expected [ColType] per column name. Default: empty map, a.k.a. infer every column type.
|
||||
*
|
||||
* If supplied for a certain column name (inferred from data or given by \[header\]),
|
||||
* the parser will parse the column with the specified name as the specified type, else it will infer the type.
|
||||
*
|
||||
* e.g. `colTypes = `[mapOf][mapOf]`("colName" `[to][to]` `[ColType][ColType]`.`[Int][ColType.Int]`)`.
|
||||
* You can also set [ColType][ColType]`.`[DEFAULT][ColType.DEFAULT]` `[to][to]` `[ColType][ColType]`.X`
|
||||
* to set a _default_ column type, like [ColType.String].
|
||||
*/
|
||||
val COL_TYPES: Map<String, ColType> = emptyMap()
|
||||
|
||||
/**
|
||||
* @param skipLines The number of lines to skip before reading the header and data. Default: `0`.
|
||||
*
|
||||
* Useful for files with metadata, or comments at the beginning, or to give a custom \[header\].
|
||||
*/
|
||||
const val SKIP_LINES: Long = 0L
|
||||
|
||||
/**
|
||||
* @param readLines The maximum number of lines to read from the data. Default: `null`.
|
||||
*
|
||||
* If `null`, all lines will be read.
|
||||
*/
|
||||
val READ_LINES: Long? = null
|
||||
|
||||
/**
|
||||
* @param parserOptions Optional [parsing options][ParserOptions] for columns initially read as [String].
|
||||
* Default, `null`.
|
||||
*
|
||||
* Can configure locale, date format, double parsing, skipping types, etc.
|
||||
*
|
||||
* If [parserOptions\] or any of the arguments are `null`, the global parser configuration
|
||||
* ([DataFrame.parser][DataFrame.Companion.parser]) will be queried.
|
||||
*
|
||||
* The only exceptions are:
|
||||
* - [nullStrings][ParserOptions.nullStrings], which, if `null`,
|
||||
* will take the global setting + {@include [DefaultNullStringsContentLink]}.
|
||||
* - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses] to
|
||||
* the given types or the global setting.
|
||||
*/
|
||||
val PARSER_OPTIONS: ParserOptions? = null
|
||||
|
||||
/**
|
||||
* @param ignoreEmptyLines Whether to skip intermediate empty lines. Default: `false`.
|
||||
*
|
||||
* If `false`, empty lines will be interpreted as having _empty_ values if \[allowMissingColumns\].
|
||||
*/
|
||||
const val IGNORE_EMPTY_LINES: Boolean = false
|
||||
|
||||
/**
|
||||
* @param allowMissingColumns Whether to allow rows with fewer columns than the header. Default: `true`.
|
||||
*
|
||||
* If `true`, rows that are too short will be interpreted as _empty_ values.
|
||||
*/
|
||||
const val ALLOW_MISSING_COLUMNS: Boolean = true
|
||||
|
||||
/**
|
||||
* @param ignoreExcessColumns Whether to ignore rows with more columns than the header. Default: `true`.
|
||||
*
|
||||
* If `true`, rows that are too long will have those columns dropped.
|
||||
*/
|
||||
const val IGNORE_EXCESS_COLUMNS: Boolean = true
|
||||
|
||||
/**
|
||||
* @param quote The quote character. Default: `"`.
|
||||
*
|
||||
* Used when field- or line delimiters should be interpreted as literal text.
|
||||
*
|
||||
* For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`.
|
||||
*/
|
||||
const val QUOTE: Char = '"'
|
||||
|
||||
/**
|
||||
* @param ignoreSurroundingSpaces Whether to ignore leading and trailing blanks around non-quoted fields.
|
||||
* Default: `true`.
|
||||
*/
|
||||
const val IGNORE_SURROUNDING_SPACES: Boolean = true
|
||||
|
||||
/**
|
||||
* @param trimInsideQuoted Whether to ignore leading and trailing blanks inside quoted fields.
|
||||
* Default: `false`.
|
||||
*/
|
||||
const val TRIM_INSIDE_QUOTED: Boolean = false
|
||||
|
||||
/**
|
||||
* @param parseParallel Whether to parse the data in parallel. Default: `true`.
|
||||
*
|
||||
* If `true`, the data will be read and parsed in parallel by the Deephaven parser.
|
||||
* This is usually faster but can be turned off for debugging.
|
||||
*/
|
||||
const val PARSE_PARALLEL: Boolean = true
|
||||
|
||||
/**
|
||||
* @param adjustCsvSpecs Optional extra [CsvSpecs] configuration. Default: `{ it }`.
|
||||
*
|
||||
* Before instantiating the [CsvSpecs], the [CsvSpecs.Builder] will be passed to this lambda.
|
||||
* This will allow you to configure/overwrite any CSV / TSV parsing options.
|
||||
*/
|
||||
val ADJUST_CSV_SPECS: AdjustCsvSpecs = { it }
|
||||
|
||||
/** @param includeHeader Whether to include the header in the output. Default: `true`. */
|
||||
const val INCLUDE_HEADER: Boolean = true
|
||||
|
||||
/**
|
||||
* @param quoteMode The [QuoteMode] to use when writing CSV / TSV files.
|
||||
* Default: [QuoteMode.MINIMAL].
|
||||
*/
|
||||
val QUOTE_MODE: QuoteMode = QuoteMode.MINIMAL
|
||||
|
||||
/**
|
||||
* @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE].
|
||||
* Default: `null`. This will double-quote the value.
|
||||
*/
|
||||
val ESCAPE_CHAR: Char? = null
|
||||
|
||||
/**
|
||||
* @param commentChar The character that indicates a comment line in a CSV / TSV file.
|
||||
* Default: `'#'`.
|
||||
*/
|
||||
const val COMMENT_CHAR: Char = '#'
|
||||
|
||||
/**
|
||||
* @param recordSeparator The character that separates records in a CSV / TSV file.
|
||||
* Default: `'\\n'`, a Unix-newline.
|
||||
*/
|
||||
const val RECORD_SEPARATOR: String = "\n"
|
||||
|
||||
/**
|
||||
* @param headerComments A list of comments to include at the beginning of the CSV / TSV file.
|
||||
* Default: empty list.
|
||||
*/
|
||||
val HEADER_COMMENTS: List<String> = emptyList()
|
||||
|
||||
/**
|
||||
* @param adjustCsvFormat Optional extra [CSVFormat] configuration. Default: `{ it }`.
|
||||
*
|
||||
* Before instantiating the [CSVFormat], the [CSVFormat.Builder] will be passed to this lambda.
|
||||
* This will allow you to configure/overwrite any CSV / TSV writing options.
|
||||
*/
|
||||
val ADJUST_CSV_FORMAT: AdjustCSVFormat = { it }
|
||||
}
|
||||
+12
@@ -0,0 +1,12 @@
|
||||
@file:ExcludeFromSources
|
||||
|
||||
package org.jetbrains.kotlinx.dataframe.documentationCsv
|
||||
|
||||
internal interface DocumentationUrls {
|
||||
|
||||
/** [Deephaven CSV](https://github.com/deephaven/deephaven-csv) */
|
||||
typealias Deephaven = Nothing
|
||||
|
||||
/** [Apache Commons CSV](https://commons.apache.org/proper/commons-csv/) */
|
||||
typealias ApacheCsv = Nothing
|
||||
}
|
||||
Vendored
+101
@@ -0,0 +1,101 @@
|
||||
package org.jetbrains.kotlinx.dataframe.documentationCsv
|
||||
|
||||
import kotlin.annotation.AnnotationTarget.ANNOTATION_CLASS
|
||||
import kotlin.annotation.AnnotationTarget.CLASS
|
||||
import kotlin.annotation.AnnotationTarget.CONSTRUCTOR
|
||||
import kotlin.annotation.AnnotationTarget.FIELD
|
||||
import kotlin.annotation.AnnotationTarget.FILE
|
||||
import kotlin.annotation.AnnotationTarget.FUNCTION
|
||||
import kotlin.annotation.AnnotationTarget.LOCAL_VARIABLE
|
||||
import kotlin.annotation.AnnotationTarget.PROPERTY
|
||||
import kotlin.annotation.AnnotationTarget.PROPERTY_GETTER
|
||||
import kotlin.annotation.AnnotationTarget.PROPERTY_SETTER
|
||||
import kotlin.annotation.AnnotationTarget.TYPE
|
||||
import kotlin.annotation.AnnotationTarget.TYPEALIAS
|
||||
import kotlin.annotation.AnnotationTarget.VALUE_PARAMETER
|
||||
|
||||
/**
|
||||
*
|
||||
* {@include [Indent]}
|
||||
*
|
||||
*/
|
||||
@ExcludeFromSources
|
||||
internal typealias LineBreak = Nothing
|
||||
|
||||
/** */
|
||||
@ExcludeFromSources
|
||||
internal typealias QuarterIndent = Nothing
|
||||
|
||||
/** */
|
||||
@ExcludeFromSources
|
||||
internal typealias HalfIndent = Nothing
|
||||
|
||||
/** */
|
||||
@ExcludeFromSources
|
||||
internal typealias Indent = Nothing
|
||||
|
||||
/** */
|
||||
@ExcludeFromSources
|
||||
internal typealias DoubleIndent = Nothing
|
||||
|
||||
/** */
|
||||
@ExcludeFromSources
|
||||
internal typealias TripleIndent = Nothing
|
||||
|
||||
/** */
|
||||
@ExcludeFromSources
|
||||
internal typealias QuadrupleIndent = Nothing
|
||||
|
||||
/**
|
||||
* Any `Documentable` annotated with this annotation will be excluded from the generated sources by
|
||||
* the documentation processor.
|
||||
*
|
||||
* **NOTE: DO NOT RENAME!**
|
||||
*/
|
||||
@Target(
|
||||
CLASS,
|
||||
ANNOTATION_CLASS,
|
||||
PROPERTY,
|
||||
FIELD,
|
||||
LOCAL_VARIABLE,
|
||||
VALUE_PARAMETER,
|
||||
CONSTRUCTOR,
|
||||
FUNCTION,
|
||||
PROPERTY_GETTER,
|
||||
PROPERTY_SETTER,
|
||||
TYPE,
|
||||
TYPEALIAS,
|
||||
FILE,
|
||||
)
|
||||
internal annotation class ExcludeFromSources
|
||||
|
||||
/**
|
||||
* Any `Documentable` annotated with this annotation will be exported to HTML by the documentation
|
||||
* processor.
|
||||
*
|
||||
* You can use @exportAsHtmlStart and @exportAsHtmlEnd to specify a range of the doc to
|
||||
* export to HTML.
|
||||
*
|
||||
* **NOTE: DO NOT RENAME!**
|
||||
*
|
||||
* @param theme Whether to include a simple theme in the HTML file. Default is `true`.
|
||||
* @param stripReferences Whether to strip `[references]` from the HTML file. Default is `true`.
|
||||
* This is useful when you want to include the HTML file in a website, where the references are not
|
||||
* needed or would break.
|
||||
*/
|
||||
@Target(
|
||||
CLASS,
|
||||
ANNOTATION_CLASS,
|
||||
PROPERTY,
|
||||
FIELD,
|
||||
LOCAL_VARIABLE,
|
||||
VALUE_PARAMETER,
|
||||
CONSTRUCTOR,
|
||||
FUNCTION,
|
||||
PROPERTY_GETTER,
|
||||
PROPERTY_SETTER,
|
||||
TYPE,
|
||||
TYPEALIAS,
|
||||
FILE,
|
||||
)
|
||||
internal annotation class ExportAsHtml(val theme: Boolean = true, val stripReferences: Boolean = true)
|
||||
+24
@@ -0,0 +1,24 @@
|
||||
package org.jetbrains.kotlinx.dataframe.impl.io
|
||||
|
||||
import io.deephaven.csv.containers.ByteSlice
|
||||
import io.deephaven.csv.tokenization.Tokenizer.CustomDoubleParser
|
||||
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
|
||||
|
||||
/**
|
||||
* Wrapper around [FastDoubleParser] so we can use it from Deephaven.
|
||||
*/
|
||||
internal class DataFrameCustomDoubleParser(parserOptions: ParserOptions? = null) : CustomDoubleParser {
|
||||
|
||||
private val fastDoubleParser = FastDoubleParser(parserOptions)
|
||||
|
||||
override fun parse(bs: ByteSlice): Double =
|
||||
try {
|
||||
fastDoubleParser.parseOrNull(bs.data(), bs.begin(), bs.size())
|
||||
} catch (_: Exception) {
|
||||
null
|
||||
} ?: throw NumberFormatException()
|
||||
|
||||
override fun parse(cs: CharSequence): Double =
|
||||
fastDoubleParser.parseOrNull(cs.toString())
|
||||
?: throw NumberFormatException()
|
||||
}
|
||||
Vendored
+204
@@ -0,0 +1,204 @@
|
||||
package org.jetbrains.kotlinx.dataframe.impl.io
|
||||
|
||||
import io.deephaven.csv.parsers.DataType
|
||||
import io.deephaven.csv.parsers.DataType.BOOLEAN_AS_BYTE
|
||||
import io.deephaven.csv.parsers.DataType.BYTE
|
||||
import io.deephaven.csv.parsers.DataType.CHAR
|
||||
import io.deephaven.csv.parsers.DataType.DATETIME_AS_LONG
|
||||
import io.deephaven.csv.parsers.DataType.DOUBLE
|
||||
import io.deephaven.csv.parsers.DataType.FLOAT
|
||||
import io.deephaven.csv.parsers.DataType.INT
|
||||
import io.deephaven.csv.parsers.DataType.LONG
|
||||
import io.deephaven.csv.parsers.DataType.SHORT
|
||||
import io.deephaven.csv.parsers.DataType.STRING
|
||||
import io.deephaven.csv.parsers.DataType.TIMESTAMP_AS_LONG
|
||||
import io.deephaven.csv.sinks.Sink
|
||||
import io.deephaven.csv.sinks.SinkFactory
|
||||
import io.deephaven.csv.sinks.Source
|
||||
import kotlinx.datetime.toKotlinLocalDateTime
|
||||
import java.time.LocalDateTime
|
||||
import java.time.ZoneOffset
|
||||
import kotlin.time.Duration.Companion.nanoseconds
|
||||
|
||||
internal interface SinkSource<T : Any> :
|
||||
Sink<T>,
|
||||
Source<T>
|
||||
|
||||
/**
|
||||
* Implementation of Deephaven's [Sink] and [Source] that stores data in an [ArrayList].
|
||||
*
|
||||
* The implementation is based on [Writing Your Own Data Sinks](https://github.com/deephaven/deephaven-csv/blob/main/ADVANCED.md).
|
||||
*
|
||||
* If we ever store column data unboxed / primitively, this needs to be modified.
|
||||
*/
|
||||
@Suppress("UNCHECKED_CAST")
|
||||
internal class ListSink(val columnIndex: Int, val dataType: DataType) : SinkSource<Any> {
|
||||
|
||||
@Suppress("ktlint:standard:comment-wrapping", "ktlint:standard:no-consecutive-comments")
|
||||
companion object {
|
||||
val SINK_FACTORY: SinkFactory = SinkFactory.of(
|
||||
// unused in Parsers.DEFAULT:
|
||||
/* byteSinkSupplier = */ { ListSink(it, BYTE) as SinkSource<ByteArray> },
|
||||
/* shortSinkSupplier = */ { ListSink(it, SHORT) as SinkSource<ShortArray> },
|
||||
/* intSinkSupplier = */ { ListSink(it, INT) as SinkSource<IntArray> },
|
||||
/* longSinkSupplier = */ { ListSink(it, LONG) as SinkSource<LongArray> },
|
||||
// unused in Parsers.COMPLETE and Parsers.DEFAULT:
|
||||
/* floatSinkSupplier = */ { ListSink(it, FLOAT) as SinkSource<FloatArray> },
|
||||
/* doubleSinkSupplier = */ { ListSink(it, DOUBLE) as SinkSource<DoubleArray> },
|
||||
/* booleanAsByteSinkSupplier = */ { ListSink(it, BOOLEAN_AS_BYTE) as SinkSource<ByteArray> },
|
||||
/* charSinkSupplier = */ { ListSink(it, CHAR) as SinkSource<CharArray> },
|
||||
/* stringSinkSupplier = */ { ListSink(it, STRING) as SinkSource<Array<String>> },
|
||||
/* dateTimeAsLongSinkSupplier = */ { ListSink(it, DATETIME_AS_LONG) as SinkSource<LongArray> },
|
||||
// unused in Parsers.COMPLETE and Parsers.DEFAULT:
|
||||
/* timestampAsLongSinkSupplier = */ { ListSink(it, TIMESTAMP_AS_LONG) as SinkSource<LongArray> },
|
||||
)
|
||||
}
|
||||
|
||||
private val _data: MutableList<Any?> = ArrayList(1000)
|
||||
|
||||
val data: List<Any?>
|
||||
get() = _data
|
||||
|
||||
var hasNulls: Boolean = false
|
||||
private set
|
||||
|
||||
private fun getValue(src: Any, srcIndex: Int, isNull: BooleanArray): Any? =
|
||||
if (isNull[srcIndex]) {
|
||||
hasNulls = true
|
||||
null
|
||||
} else {
|
||||
when (dataType) {
|
||||
BOOLEAN_AS_BYTE -> (src as ByteArray)[srcIndex] == 1.toByte()
|
||||
|
||||
// unused in Parsers.DEFAULT
|
||||
BYTE -> (src as ByteArray)[srcIndex]
|
||||
|
||||
// unused in Parsers.DEFAULT
|
||||
SHORT -> (src as ShortArray)[srcIndex]
|
||||
|
||||
INT -> (src as IntArray)[srcIndex]
|
||||
|
||||
LONG -> (src as LongArray)[srcIndex]
|
||||
|
||||
// unused in Parsers.COMPLETE and Parsers.DEFAULT
|
||||
FLOAT -> (src as FloatArray)[srcIndex]
|
||||
|
||||
DOUBLE -> (src as DoubleArray)[srcIndex]
|
||||
|
||||
CHAR -> (src as CharArray)[srcIndex]
|
||||
|
||||
STRING -> (src as Array<String>)[srcIndex]
|
||||
|
||||
DATETIME_AS_LONG -> (src as LongArray)[srcIndex].nanoseconds
|
||||
.toComponents { seconds, nanoseconds ->
|
||||
LocalDateTime.ofEpochSecond(seconds, nanoseconds, ZoneOffset.UTC)
|
||||
}.toKotlinLocalDateTime()
|
||||
|
||||
// unused in Parsers.COMPLETE and Parsers.DEFAULT
|
||||
TIMESTAMP_AS_LONG -> (src as LongArray)[srcIndex].nanoseconds
|
||||
.toComponents { seconds, nanoseconds ->
|
||||
LocalDateTime.ofEpochSecond(seconds, nanoseconds, ZoneOffset.UTC)
|
||||
}.toKotlinLocalDateTime()
|
||||
|
||||
else -> error("unsupported parser")
|
||||
}
|
||||
}
|
||||
|
||||
private fun writeAppending(
|
||||
src: Any,
|
||||
destBegin: Int,
|
||||
destEnd: Int,
|
||||
isNull: BooleanArray,
|
||||
) {
|
||||
while (data.size < destBegin) {
|
||||
_data += null
|
||||
hasNulls = true
|
||||
}
|
||||
for ((srcIndex, _) in (destBegin..<destEnd).withIndex()) {
|
||||
_data += getValue(src, srcIndex, isNull)
|
||||
}
|
||||
}
|
||||
|
||||
private fun writeReplacing(
|
||||
src: Any,
|
||||
destBegin: Int,
|
||||
destEnd: Int,
|
||||
isNull: BooleanArray,
|
||||
) {
|
||||
for ((srcIndex, destIndex) in (destBegin..<destEnd).withIndex()) {
|
||||
_data[destIndex] = getValue(src, srcIndex, isNull)
|
||||
}
|
||||
}
|
||||
|
||||
override fun write(
|
||||
src: Any,
|
||||
isNull: BooleanArray,
|
||||
destBegin: Long,
|
||||
destEnd: Long,
|
||||
appending: Boolean,
|
||||
) {
|
||||
if (destBegin == destEnd) return
|
||||
val destBeginAsInt = destBegin.toInt()
|
||||
val destEndAsInt = destEnd.toInt()
|
||||
if (appending) {
|
||||
writeAppending(src = src, destBegin = destBeginAsInt, destEnd = destEndAsInt, isNull = isNull)
|
||||
} else {
|
||||
writeReplacing(src = src, destBegin = destBeginAsInt, destEnd = destEndAsInt, isNull = isNull)
|
||||
}
|
||||
}
|
||||
|
||||
override fun read(
|
||||
dest: Any,
|
||||
isNull: BooleanArray,
|
||||
srcBegin: Long,
|
||||
srcEnd: Long,
|
||||
) {
|
||||
if (srcBegin == srcEnd) return
|
||||
val srcBeginAsInt = srcBegin.toInt()
|
||||
val srcEndAsInt = srcEnd.toInt()
|
||||
|
||||
when (dataType) {
|
||||
BYTE -> {
|
||||
dest as ByteArray
|
||||
for ((srcIndex, destIndex) in (srcBeginAsInt..<srcEndAsInt).withIndex()) {
|
||||
val value = data[srcIndex] as Byte?
|
||||
if (value != null) dest[destIndex] = value
|
||||
isNull[destIndex] = value == null
|
||||
}
|
||||
}
|
||||
|
||||
SHORT -> {
|
||||
dest as ShortArray
|
||||
for ((srcIndex, destIndex) in (srcBeginAsInt..<srcEndAsInt).withIndex()) {
|
||||
val value = data[srcIndex] as Short?
|
||||
if (value != null) dest[destIndex] = value
|
||||
isNull[destIndex] = value == null
|
||||
}
|
||||
}
|
||||
|
||||
INT -> {
|
||||
dest as IntArray
|
||||
for ((srcIndex, destIndex) in (srcBeginAsInt..<srcEndAsInt).withIndex()) {
|
||||
val value = data[srcIndex] as Int?
|
||||
if (value != null) dest[destIndex] = value
|
||||
isNull[destIndex] = value == null
|
||||
}
|
||||
}
|
||||
|
||||
LONG -> {
|
||||
dest as LongArray
|
||||
for ((srcIndex, destIndex) in (srcBeginAsInt..<srcEndAsInt).withIndex()) {
|
||||
val value = data[srcIndex] as Long?
|
||||
if (value != null) dest[destIndex] = value
|
||||
isNull[destIndex] = value == null
|
||||
}
|
||||
}
|
||||
|
||||
// Deephaven's fast path for numeric type inference supports only byte, short, int, and long
|
||||
// so this should never be reached
|
||||
else -> error("unsupported sink state")
|
||||
}
|
||||
}
|
||||
|
||||
override fun getUnderlying(): ListSink = this
|
||||
}
|
||||
Vendored
+387
@@ -0,0 +1,387 @@
|
||||
@file:JvmName("ReadDelimDeephavenKt")
|
||||
|
||||
package org.jetbrains.kotlinx.dataframe.impl.io
|
||||
|
||||
import io.deephaven.csv.CsvSpecs
|
||||
import io.deephaven.csv.parsers.DataType
|
||||
import io.deephaven.csv.parsers.DataType.BOOLEAN_AS_BYTE
|
||||
import io.deephaven.csv.parsers.DataType.BYTE
|
||||
import io.deephaven.csv.parsers.DataType.CHAR
|
||||
import io.deephaven.csv.parsers.DataType.DATETIME_AS_LONG
|
||||
import io.deephaven.csv.parsers.DataType.DOUBLE
|
||||
import io.deephaven.csv.parsers.DataType.FLOAT
|
||||
import io.deephaven.csv.parsers.DataType.INT
|
||||
import io.deephaven.csv.parsers.DataType.LONG
|
||||
import io.deephaven.csv.parsers.DataType.SHORT
|
||||
import io.deephaven.csv.parsers.DataType.STRING
|
||||
import io.deephaven.csv.parsers.DataType.TIMESTAMP_AS_LONG
|
||||
import io.deephaven.csv.parsers.Parser
|
||||
import io.deephaven.csv.parsers.Parsers
|
||||
import io.deephaven.csv.reading.CsvReader
|
||||
import io.deephaven.csv.util.CsvReaderException
|
||||
import kotlinx.datetime.LocalDate
|
||||
import kotlinx.datetime.LocalDateTime
|
||||
import kotlinx.datetime.LocalTime
|
||||
import org.apache.commons.io.input.BOMInputStream
|
||||
import org.jetbrains.kotlinx.dataframe.DataColumn
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.DataRow
|
||||
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
|
||||
import org.jetbrains.kotlinx.dataframe.api.convertTo
|
||||
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
|
||||
import org.jetbrains.kotlinx.dataframe.api.parse
|
||||
import org.jetbrains.kotlinx.dataframe.api.parser
|
||||
import org.jetbrains.kotlinx.dataframe.api.tryParse
|
||||
import org.jetbrains.kotlinx.dataframe.columns.ValueColumn
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_SPECS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ALLOW_MISSING_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CHARSET
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COL_TYPES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMPRESSION
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FIXED_COLUMN_WIDTHS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HAS_FIXED_WIDTH_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EMPTY_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EXCESS_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_SURROUNDING_SPACES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INPUT_STREAM_READ
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSER_OPTIONS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSE_PARALLEL
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.READ_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.SKIP_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TRIM_INSIDE_QUOTED
|
||||
import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
|
||||
import org.jetbrains.kotlinx.dataframe.io.AdjustCsvSpecs
|
||||
import org.jetbrains.kotlinx.dataframe.io.ColType
|
||||
import org.jetbrains.kotlinx.dataframe.io.Compression
|
||||
import org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS
|
||||
import org.jetbrains.kotlinx.dataframe.io.skippingBomCharacters
|
||||
import org.jetbrains.kotlinx.dataframe.io.toKType
|
||||
import org.jetbrains.kotlinx.dataframe.io.useDecompressed
|
||||
import java.io.InputStream
|
||||
import java.math.BigDecimal
|
||||
import java.math.BigInteger
|
||||
import java.net.URL
|
||||
import java.nio.charset.Charset
|
||||
import kotlin.reflect.KType
|
||||
import kotlin.reflect.full.withNullability
|
||||
import kotlin.reflect.typeOf
|
||||
import kotlin.time.Duration
|
||||
import kotlin.time.Instant as StdlibInstant
|
||||
import kotlinx.datetime.Instant as DeprecatedInstant
|
||||
|
||||
/**
|
||||
* Implementation to read delimiter-separated data from an [InputStream] based on the Deephaven CSV library.
|
||||
*
|
||||
* @include [INPUT_STREAM_READ]
|
||||
* @include [CHARSET]
|
||||
* @param delimiter The field delimiter character. The default is ',' for CSV, '\t' for TSV.
|
||||
* @include [HEADER]
|
||||
* @include [COL_TYPES]
|
||||
* @include [SKIP_LINES]
|
||||
* @include [READ_LINES]
|
||||
* @include [HAS_FIXED_WIDTH_COLUMNS]
|
||||
* @include [FIXED_COLUMN_WIDTHS]
|
||||
* @include [PARSER_OPTIONS]
|
||||
* @include [IGNORE_EMPTY_LINES]
|
||||
* @include [ALLOW_MISSING_COLUMNS]
|
||||
* @include [IGNORE_EXCESS_COLUMNS]
|
||||
* @include [QUOTE]
|
||||
* @include [IGNORE_SURROUNDING_SPACES]
|
||||
* @include [TRIM_INSIDE_QUOTED]
|
||||
* @include [PARSE_PARALLEL]
|
||||
* @include [COMPRESSION]
|
||||
* @include [ADJUST_CSV_SPECS]
|
||||
*/
|
||||
internal fun readDelimImpl(
|
||||
inputStream: InputStream,
|
||||
delimiter: Char,
|
||||
header: List<String>,
|
||||
charset: Charset?,
|
||||
hasFixedWidthColumns: Boolean,
|
||||
fixedColumnWidths: List<Int>,
|
||||
colTypes: Map<String, ColType>,
|
||||
skipLines: Long,
|
||||
readLines: Long?,
|
||||
parserOptions: ParserOptions?,
|
||||
ignoreEmptyLines: Boolean,
|
||||
allowMissingColumns: Boolean,
|
||||
ignoreExcessColumns: Boolean,
|
||||
quote: Char,
|
||||
ignoreSurroundingSpaces: Boolean,
|
||||
trimInsideQuoted: Boolean,
|
||||
parseParallel: Boolean,
|
||||
compression: Compression<*>,
|
||||
adjustCsvSpecs: AdjustCsvSpecs,
|
||||
): DataFrame<*> {
|
||||
// set up the csv specs
|
||||
val csvSpecs = with(CsvSpecs.builder()) {
|
||||
customDoubleParser(DataFrameCustomDoubleParser(parserOptions))
|
||||
|
||||
// use the given nullStrings if provided, else take the global ones + some extras
|
||||
val nullStrings = parserOptions?.nullStrings ?: (DataFrame.parser.nulls + DEFAULT_DELIM_NULL_STRINGS)
|
||||
nullValueLiterals(nullStrings)
|
||||
headerLegalizer(::legalizeHeader)
|
||||
numRows(readLines ?: Long.MAX_VALUE)
|
||||
ignoreEmptyLines(ignoreEmptyLines)
|
||||
allowMissingColumns(allowMissingColumns)
|
||||
ignoreExcessColumns(ignoreExcessColumns)
|
||||
if (!hasFixedWidthColumns) delimiter(delimiter)
|
||||
quote(quote)
|
||||
ignoreSurroundingSpaces(ignoreSurroundingSpaces)
|
||||
trim(trimInsideQuoted)
|
||||
concurrent(parseParallel)
|
||||
header(header)
|
||||
hasFixedWidthColumns(hasFixedWidthColumns)
|
||||
if (hasFixedWidthColumns && fixedColumnWidths.isNotEmpty()) fixedColumnWidths(fixedColumnWidths)
|
||||
skipLines(takeHeaderFromCsv = header.isEmpty(), skipLines = skipLines)
|
||||
parsers(parserOptions, colTypes)
|
||||
|
||||
adjustCsvSpecs(this, this)
|
||||
}.build()
|
||||
|
||||
val csvReaderResult = inputStream.useDecompressed(compression) { decompressedInputStream ->
|
||||
// read the csv
|
||||
try {
|
||||
val deBommedInputString = decompressedInputStream.skippingBomCharacters()
|
||||
|
||||
// choose charset like: provided? -> from BOM? -> UTF-8
|
||||
val streamCharset = charset
|
||||
?: (deBommedInputString as? BOMInputStream)?.bom?.let { Charset.forName(it.charsetName) }
|
||||
?: Charsets.UTF_8
|
||||
|
||||
@Suppress("ktlint:standard:comment-wrapping")
|
||||
CsvReader.read(
|
||||
/* specs = */ csvSpecs,
|
||||
/* stream = */ deBommedInputString,
|
||||
/* streamCharset = */ streamCharset,
|
||||
/* sinkFactory = */ ListSink.SINK_FACTORY,
|
||||
)
|
||||
} catch (e: CsvReaderException) {
|
||||
// catch case when the file is empty and header needs to be inferred from it.
|
||||
if (e.message ==
|
||||
"Can't proceed because hasHeaderRow is set but input file is empty or shorter than skipHeaderRows"
|
||||
) {
|
||||
return@readDelimImpl DataFrame.empty()
|
||||
}
|
||||
throw IllegalStateException(
|
||||
"Could not read delimiter-separated data: CsvReaderException: ${e.message}: ${e.cause?.message ?: ""}",
|
||||
e,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
val defaultColType = colTypes[ColType.DEFAULT]
|
||||
|
||||
// convert each ResultColumn to a DataColumn
|
||||
val cols = csvReaderResult.map {
|
||||
it.toDataColumn(
|
||||
parserOptions = parserOptions,
|
||||
desiredColType = colTypes[it.name()] ?: defaultColType,
|
||||
)
|
||||
}
|
||||
|
||||
return dataFrameOf(cols)
|
||||
}
|
||||
|
||||
@Suppress("UNCHECKED_CAST")
|
||||
private fun CsvReader.ResultColumn.toDataColumn(
|
||||
parserOptions: ParserOptions?,
|
||||
desiredColType: ColType?,
|
||||
): DataColumn<*> {
|
||||
val listSink = data()!! as ListSink
|
||||
val columnData = listSink.data
|
||||
val dataType = listSink.dataType
|
||||
val hasNulls = listSink.hasNulls
|
||||
val type = dataType().toKType().withNullability(hasNulls)
|
||||
|
||||
val column = DataColumn.createValueColumn(
|
||||
name = name(),
|
||||
values = columnData,
|
||||
type = type,
|
||||
)
|
||||
if (dataType != STRING) return column
|
||||
|
||||
// attempt to perform additional parsing if necessary, will remain String if it fails
|
||||
column as ValueColumn<String?>
|
||||
|
||||
return when {
|
||||
desiredColType != null ->
|
||||
column.convertTo(
|
||||
newType = desiredColType.toKType().withNullability(true),
|
||||
parserOptions = parserOptions,
|
||||
)
|
||||
|
||||
else -> {
|
||||
val givenSkipTypes = parserOptions?.skipTypes ?: DataFrame.parser.skipTypes
|
||||
// no need to check for types that Deephaven already parses, skip those too
|
||||
val adjustedSkipTypes = givenSkipTypes + typesDeephavenAlreadyParses
|
||||
val adjustedParserOptions = (parserOptions ?: ParserOptions())
|
||||
.copy(skipTypes = adjustedSkipTypes)
|
||||
|
||||
column.tryParse(adjustedParserOptions)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun DataType?.toKType(): KType =
|
||||
when (this) {
|
||||
BOOLEAN_AS_BYTE -> typeOf<Boolean>()
|
||||
|
||||
// unused in Parsers.DEFAULT
|
||||
BYTE -> typeOf<Byte>()
|
||||
|
||||
// unused in Parsers.DEFAULT
|
||||
SHORT -> typeOf<Short>()
|
||||
|
||||
INT -> typeOf<Int>()
|
||||
|
||||
LONG -> typeOf<Long>()
|
||||
|
||||
// unused in Parsers.COMPLETE and Parsers.DEFAULT
|
||||
FLOAT -> typeOf<Float>()
|
||||
|
||||
DOUBLE -> typeOf<Double>()
|
||||
|
||||
DATETIME_AS_LONG -> typeOf<LocalDateTime>()
|
||||
|
||||
CHAR -> typeOf<Char>()
|
||||
|
||||
STRING -> typeOf<String>()
|
||||
|
||||
// unused in Parsers.COMPLETE and Parsers.DEFAULT
|
||||
TIMESTAMP_AS_LONG -> typeOf<LocalDateTime>()
|
||||
|
||||
DataType.CUSTOM -> error("custom data type")
|
||||
|
||||
null -> error("null data type")
|
||||
}
|
||||
|
||||
private fun legalizeHeader(header: Array<String>): Array<String> {
|
||||
val generator = ColumnNameGenerator()
|
||||
return header.map { generator.addUnique(it) }.toTypedArray()
|
||||
}
|
||||
|
||||
private fun CsvSpecs.Builder.skipLines(takeHeaderFromCsv: Boolean, skipLines: Long): CsvSpecs.Builder =
|
||||
if (takeHeaderFromCsv) {
|
||||
skipHeaderRows(skipLines)
|
||||
} else {
|
||||
skipRows(skipLines)
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the correct parsers for the csv, based on [colTypes] and [ParserOptions.skipTypes].
|
||||
* If [ColType.DEFAULT] is present, it sets the default parser.
|
||||
*
|
||||
* Logic overview:
|
||||
*
|
||||
* - if no [colTypes] are given
|
||||
* - let deephaven use all its [default parsers][Parsers.DEFAULT] minus [Parsers.DATETIME]
|
||||
* - subtract parsers of [skipTypes][ParserOptions.skipTypes] if those are supplied
|
||||
* - if [colTypes] are supplied
|
||||
* - if [ColType.DEFAULT] is among the values
|
||||
* - set the parser for each supplied column+colType
|
||||
* - let deephaven use _only_ the parser given as [ColType.DEFAULT] type
|
||||
* - if [ColType.DEFAULT] is not among the values
|
||||
* - set the parser for each supplied column+coltype
|
||||
* - let deephaven use all its [default parsers][Parsers.DEFAULT] minus [Parsers.DATETIME]
|
||||
* - subtract parsers of [skipTypes][ParserOptions.skipTypes] if those are supplied
|
||||
*
|
||||
* We will not use [Deephaven's DateTime parser][Parsers.DATETIME].
|
||||
* This is done to avoid different behavior compared to [DataFrame.parse];
|
||||
* Deephaven parses [Instant] as [LocalDateTime]. [Issue #1047](https://github.com/Kotlin/dataframe/issues/1047)
|
||||
*
|
||||
* Note that `skipTypes` will never skip a type explicitly set by `colTypes`.
|
||||
* This is intended.
|
||||
*/
|
||||
private fun CsvSpecs.Builder.parsers(parserOptions: ParserOptions?, colTypes: Map<String, ColType>): CsvSpecs.Builder {
|
||||
for ((colName, colType) in colTypes) {
|
||||
if (colName == ColType.DEFAULT) continue
|
||||
putParserForName(colName, colType.toCsvParser())
|
||||
}
|
||||
// BOOLEAN, INT, LONG, DOUBLE, CHAR, STRING
|
||||
val defaultParsers = Parsers.DEFAULT - Parsers.DATETIME
|
||||
val skipTypes = parserOptions?.skipTypes ?: DataFrame.parser.skipTypes
|
||||
val parsersToUse = when {
|
||||
ColType.DEFAULT in colTypes ->
|
||||
listOf(colTypes[ColType.DEFAULT]!!.toCsvParser(), Parsers.STRING)
|
||||
|
||||
skipTypes.isNotEmpty() -> {
|
||||
val parsersToSkip = skipTypes
|
||||
.mapNotNull { it.toColType().toCsvParserOrNull() }
|
||||
defaultParsers.toSet() - parsersToSkip.toSet()
|
||||
}
|
||||
|
||||
else -> defaultParsers
|
||||
}
|
||||
parsers(parsersToUse)
|
||||
return this
|
||||
}
|
||||
|
||||
private fun CsvSpecs.Builder.header(header: List<String>): CsvSpecs.Builder =
|
||||
if (header.isEmpty()) {
|
||||
// take header from csv
|
||||
hasHeaderRow(true)
|
||||
} else {
|
||||
hasHeaderRow(false)
|
||||
.headers(header)
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a [ColType] to a [Parser] from the Deephaven CSV library.
|
||||
* If no direct [Parser] exists, it returns `null`.
|
||||
*/
|
||||
internal fun ColType.toCsvParserOrNull(): Parser<*>? =
|
||||
when (this) {
|
||||
ColType.Int -> Parsers.INT
|
||||
ColType.Long -> Parsers.LONG
|
||||
ColType.Double -> Parsers.DOUBLE
|
||||
ColType.Char -> Parsers.CHAR
|
||||
ColType.Boolean -> Parsers.BOOLEAN
|
||||
ColType.String -> Parsers.STRING
|
||||
else -> null
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a [ColType] to a [Parser] from the Deephaven CSV library.
|
||||
* If no direct [Parser] exists, it defaults to [Parsers.STRING] so that [DataFrame.parse] can handle it.
|
||||
*/
|
||||
internal fun ColType.toCsvParser(): Parser<*> = toCsvParserOrNull() ?: Parsers.STRING
|
||||
|
||||
internal fun KType.toColType(): ColType =
|
||||
when (this.withNullability(false)) {
|
||||
typeOf<Int>() -> ColType.Int
|
||||
typeOf<Long>() -> ColType.Long
|
||||
typeOf<Double>() -> ColType.Double
|
||||
typeOf<Boolean>() -> ColType.Boolean
|
||||
typeOf<BigDecimal>() -> ColType.BigDecimal
|
||||
typeOf<BigInteger>() -> ColType.BigInteger
|
||||
typeOf<LocalDate>() -> ColType.LocalDate
|
||||
typeOf<LocalTime>() -> ColType.LocalTime
|
||||
typeOf<LocalDateTime>() -> ColType.LocalDateTime
|
||||
typeOf<String>() -> ColType.String
|
||||
typeOf<DeprecatedInstant>() -> ColType.DeprecatedInstant
|
||||
typeOf<StdlibInstant>() -> ColType.StdlibInstant
|
||||
typeOf<Duration>() -> ColType.Duration
|
||||
typeOf<URL>() -> ColType.Url
|
||||
typeOf<DataFrame<*>>() -> ColType.JsonArray
|
||||
typeOf<DataRow<*>>() -> ColType.JsonObject
|
||||
typeOf<Char>() -> ColType.Char
|
||||
else -> ColType.String
|
||||
}
|
||||
|
||||
/**
|
||||
* Types that Deephaven already parses, so we can skip them when
|
||||
* defaulting to DataFrame's String parsers.
|
||||
*
|
||||
* [LocalDateTime] and [java.time.LocalDateTime] are not included because Deephaven cannot recognize all formats.
|
||||
*/
|
||||
internal val typesDeephavenAlreadyParses: Set<KType> =
|
||||
setOf(
|
||||
typeOf<Int>(),
|
||||
typeOf<Long>(),
|
||||
typeOf<Double>(),
|
||||
typeOf<Char>(),
|
||||
typeOf<Boolean>(),
|
||||
)
|
||||
Vendored
+92
@@ -0,0 +1,92 @@
|
||||
@file:JvmName("WriteDelimDeephavenKt")
|
||||
|
||||
package org.jetbrains.kotlinx.dataframe.impl.io
|
||||
|
||||
import org.apache.commons.csv.CSVFormat
|
||||
import org.jetbrains.kotlinx.dataframe.AnyFrame
|
||||
import org.jetbrains.kotlinx.dataframe.AnyRow
|
||||
import org.jetbrains.kotlinx.dataframe.api.forEach
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CSV_DELIMITER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.WRITER_WRITE
|
||||
import org.jetbrains.kotlinx.dataframe.io.AdjustCSVFormat
|
||||
import org.jetbrains.kotlinx.dataframe.io.QuoteMode
|
||||
import org.jetbrains.kotlinx.dataframe.io.toJson
|
||||
import org.apache.commons.csv.QuoteMode as ApacheQuoteMode
|
||||
|
||||
/**
|
||||
* Writes [df] to [writer] in a delimiter-separated format.
|
||||
*
|
||||
* @param df The data to write.
|
||||
* @include [WRITER_WRITE]
|
||||
* @include [CSV_DELIMITER]
|
||||
* @include [CommonWriteDelimDocs.CommonWriteParams]
|
||||
* @include [ADJUST_CSV_FORMAT]
|
||||
*/
|
||||
internal fun writeDelimImpl(
|
||||
df: AnyFrame,
|
||||
writer: Appendable,
|
||||
delimiter: Char,
|
||||
includeHeader: Boolean,
|
||||
quote: Char?,
|
||||
quoteMode: QuoteMode,
|
||||
escapeChar: Char?,
|
||||
commentChar: Char?,
|
||||
headerComments: List<String>,
|
||||
recordSeparator: String,
|
||||
adjustCsvFormat: AdjustCSVFormat,
|
||||
) {
|
||||
// setup CSV format
|
||||
val format = with(CSVFormat.Builder.create(CSVFormat.DEFAULT)) {
|
||||
setDelimiter(delimiter)
|
||||
setQuote(quote)
|
||||
setSkipHeaderRecord(!includeHeader)
|
||||
setQuoteMode(quoteMode.toApache())
|
||||
setRecordSeparator(recordSeparator)
|
||||
setEscape(escapeChar)
|
||||
setCommentMarker(commentChar)
|
||||
setHeaderComments(*headerComments.toTypedArray())
|
||||
}.let { adjustCsvFormat(it, it) }
|
||||
.get()
|
||||
|
||||
// let the format handle the writing, only converting AnyRow and AnyFrame to JSON
|
||||
format.print(writer).use { printer ->
|
||||
if (includeHeader) {
|
||||
printer.printRecord(df.columnNames())
|
||||
}
|
||||
df.forEach {
|
||||
val values = it.values().map {
|
||||
when (it) {
|
||||
is AnyRow -> try {
|
||||
it.toJson()
|
||||
} catch (_: NoClassDefFoundError) {
|
||||
error(
|
||||
"Encountered a DataRow value when writing to csv/tsv/delim. It must be serialized to JSON, requiring the 'dataframe-json' dependency.",
|
||||
)
|
||||
}
|
||||
|
||||
is AnyFrame -> try {
|
||||
it.toJson()
|
||||
} catch (_: NoClassDefFoundError) {
|
||||
error(
|
||||
"Encountered a DataFrame value when writing to csv/tsv/delim. It must be serialized to JSON, requiring the 'dataframe-json' dependency.",
|
||||
)
|
||||
}
|
||||
|
||||
else -> it
|
||||
}
|
||||
}
|
||||
printer.printRecord(values)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
internal fun QuoteMode.toApache(): ApacheQuoteMode =
|
||||
when (this) {
|
||||
QuoteMode.ALL -> ApacheQuoteMode.ALL
|
||||
QuoteMode.MINIMAL -> ApacheQuoteMode.MINIMAL
|
||||
QuoteMode.NON_NUMERIC -> ApacheQuoteMode.NON_NUMERIC
|
||||
QuoteMode.NONE -> ApacheQuoteMode.NONE
|
||||
QuoteMode.ALL_NON_NULL -> ApacheQuoteMode.ALL_NON_NULL
|
||||
}
|
||||
+26
@@ -0,0 +1,26 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
/** Defines quoting behavior. */
|
||||
public enum class QuoteMode {
|
||||
|
||||
/** Quotes all fields. */
|
||||
ALL,
|
||||
|
||||
/** Quotes all non-null fields. */
|
||||
ALL_NON_NULL,
|
||||
|
||||
/**
|
||||
* Quotes fields that contain special characters such as a field delimiter, quote character, or any of the
|
||||
* characters in the line separator string.
|
||||
*/
|
||||
MINIMAL,
|
||||
|
||||
/** Quotes all non-numeric fields. */
|
||||
NON_NUMERIC,
|
||||
|
||||
/**
|
||||
* Never quotes fields. When the delimiter occurs in data, the printer prefixes it with the escape character. If the
|
||||
* escape character is not set, format validation throws an exception.
|
||||
*/
|
||||
NONE,
|
||||
}
|
||||
+39
@@ -0,0 +1,39 @@
|
||||
@file:JvmName("CsvDeephavenKt")
|
||||
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.codeGen.AbstractDefaultReadMethod
|
||||
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams
|
||||
import java.io.File
|
||||
import java.io.InputStream
|
||||
import java.nio.file.Path
|
||||
import kotlin.reflect.typeOf
|
||||
|
||||
public class CsvDeephaven(private val delimiter: Char = DelimParams.CSV_DELIMITER) : SupportedDataFrameFormat {
|
||||
override fun readDataFrame(stream: InputStream, header: List<String>): DataFrame<*> =
|
||||
DataFrame.readCsv(inputStream = stream, header = header, delimiter = delimiter)
|
||||
|
||||
override fun readDataFrame(file: File, header: List<String>): DataFrame<*> =
|
||||
DataFrame.readCsv(file = file, header = header, delimiter = delimiter)
|
||||
|
||||
override fun readDataFrame(path: Path, header: List<String>): DataFrame<*> =
|
||||
DataFrame.readCsv(path = path, delimiter = delimiter, header = header)
|
||||
|
||||
override fun acceptsExtension(ext: String): Boolean = ext == "csv"
|
||||
|
||||
override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough
|
||||
|
||||
override val testOrder: Int = 20_000
|
||||
|
||||
override fun createDefaultReadMethod(pathRepresentation: String?): DefaultReadDfMethod {
|
||||
val arguments = MethodArguments().add("delimiter", typeOf<Char>(), "'%L'", delimiter)
|
||||
return DefaultReadCsvMethod(pathRepresentation, arguments)
|
||||
}
|
||||
}
|
||||
|
||||
private const val READ_CSV = "readCsv"
|
||||
|
||||
internal class DefaultReadCsvMethod(path: String?, arguments: MethodArguments) :
|
||||
AbstractDefaultReadMethod(path, arguments, READ_CSV)
|
||||
+516
@@ -0,0 +1,516 @@
|
||||
@file:JvmName("ReadCsvDeephavenKt")
|
||||
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonReadDelimDocs
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_SPECS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ALLOW_MISSING_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CHARSET
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COL_TYPES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMPRESSION
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CSV_DELIMITER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FILE_OR_URL_READ
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FILE_READ
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FIXED_COLUMN_WIDTHS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HAS_FIXED_WIDTH_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EMPTY_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EXCESS_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_SURROUNDING_SPACES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INPUT_STREAM_READ
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSER_OPTIONS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSE_PARALLEL
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PATH_READ
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.READ_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.SKIP_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TRIM_INSIDE_QUOTED
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl
|
||||
import org.jetbrains.kotlinx.dataframe.util.READ_CSV_BINARY_COMPATIBILITY
|
||||
import java.io.File
|
||||
import java.io.FileInputStream
|
||||
import java.io.InputStream
|
||||
import java.net.URL
|
||||
import java.nio.charset.Charset
|
||||
import java.nio.file.Path
|
||||
import kotlin.io.path.inputStream
|
||||
|
||||
/**
|
||||
* @include [CommonReadDelimDocs.CsvDocs]
|
||||
* @set [CommonReadDelimDocs.DATA_TITLE] File
|
||||
* @set [CommonReadDelimDocs.DATA] file
|
||||
* @include [PATH_READ]
|
||||
* @include [CSV_DELIMITER]
|
||||
* @include [COMPRESSION]
|
||||
* @include [CommonReadDelimDocs.CommonReadParams]
|
||||
*/
|
||||
public fun DataFrame.Companion.readCsv(
|
||||
path: Path,
|
||||
delimiter: Char = CSV_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
charset: Charset? = CHARSET,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = Compression.of(path),
|
||||
): DataFrame<*> =
|
||||
path.inputStream().use {
|
||||
readDelimImpl(
|
||||
inputStream = it,
|
||||
delimiter = delimiter,
|
||||
header = header,
|
||||
charset = charset,
|
||||
hasFixedWidthColumns = hasFixedWidthColumns,
|
||||
fixedColumnWidths = fixedColumnWidths,
|
||||
colTypes = colTypes,
|
||||
skipLines = skipLines,
|
||||
readLines = readLines,
|
||||
parserOptions = parserOptions,
|
||||
ignoreEmptyLines = ignoreEmptyLines,
|
||||
allowMissingColumns = allowMissingColumns,
|
||||
ignoreExcessColumns = ignoreExcessColumns,
|
||||
quote = quote,
|
||||
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
|
||||
trimInsideQuoted = trimInsideQuoted,
|
||||
parseParallel = parseParallel,
|
||||
compression = compression,
|
||||
adjustCsvSpecs = ADJUST_CSV_SPECS,
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* @include [CommonReadDelimDocs.CsvDocs]
|
||||
* @set [CommonReadDelimDocs.DATA_TITLE] File
|
||||
* @set [CommonReadDelimDocs.DATA] file
|
||||
* @include [FILE_READ]
|
||||
* @include [CSV_DELIMITER]
|
||||
* @include [COMPRESSION]
|
||||
* @include [CommonReadDelimDocs.CommonReadParams]
|
||||
*/
|
||||
public fun DataFrame.Companion.readCsv(
|
||||
file: File,
|
||||
delimiter: Char = CSV_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
charset: Charset? = CHARSET,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = Compression.of(file),
|
||||
): DataFrame<*> =
|
||||
FileInputStream(file).use {
|
||||
readDelimImpl(
|
||||
inputStream = it,
|
||||
delimiter = delimiter,
|
||||
header = header,
|
||||
charset = charset,
|
||||
hasFixedWidthColumns = hasFixedWidthColumns,
|
||||
fixedColumnWidths = fixedColumnWidths,
|
||||
colTypes = colTypes,
|
||||
skipLines = skipLines,
|
||||
readLines = readLines,
|
||||
parserOptions = parserOptions,
|
||||
ignoreEmptyLines = ignoreEmptyLines,
|
||||
allowMissingColumns = allowMissingColumns,
|
||||
ignoreExcessColumns = ignoreExcessColumns,
|
||||
quote = quote,
|
||||
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
|
||||
trimInsideQuoted = trimInsideQuoted,
|
||||
parseParallel = parseParallel,
|
||||
compression = compression,
|
||||
adjustCsvSpecs = ADJUST_CSV_SPECS,
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* @include [CommonReadDelimDocs.CsvDocs]
|
||||
* @set [CommonReadDelimDocs.DATA_TITLE] Url
|
||||
* @set [CommonReadDelimDocs.DATA] url
|
||||
* @include [DelimParams.URL_READ]
|
||||
* @include [CSV_DELIMITER]
|
||||
* @include [COMPRESSION]
|
||||
* @include [CommonReadDelimDocs.CommonReadParams]
|
||||
*/
|
||||
public fun DataFrame.Companion.readCsv(
|
||||
url: URL,
|
||||
delimiter: Char = CSV_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
charset: Charset? = CHARSET,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = Compression.of(url),
|
||||
): DataFrame<*> =
|
||||
catchHttpResponse(url) {
|
||||
readDelimImpl(
|
||||
inputStream = it,
|
||||
delimiter = delimiter,
|
||||
header = header,
|
||||
charset = charset,
|
||||
hasFixedWidthColumns = hasFixedWidthColumns,
|
||||
fixedColumnWidths = fixedColumnWidths,
|
||||
colTypes = colTypes,
|
||||
skipLines = skipLines,
|
||||
readLines = readLines,
|
||||
parserOptions = parserOptions,
|
||||
ignoreEmptyLines = ignoreEmptyLines,
|
||||
allowMissingColumns = allowMissingColumns,
|
||||
ignoreExcessColumns = ignoreExcessColumns,
|
||||
quote = quote,
|
||||
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
|
||||
trimInsideQuoted = trimInsideQuoted,
|
||||
parseParallel = parseParallel,
|
||||
compression = compression,
|
||||
adjustCsvSpecs = ADJUST_CSV_SPECS,
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* @include [CommonReadDelimDocs.CsvDocs]
|
||||
* @set [CommonReadDelimDocs.DATA_TITLE] File or URL
|
||||
* @set [CommonReadDelimDocs.DATA] file or url
|
||||
* @include [FILE_OR_URL_READ]
|
||||
* @include [CSV_DELIMITER]
|
||||
* @include [COMPRESSION]
|
||||
* @include [CommonReadDelimDocs.CommonReadParams]
|
||||
*/
|
||||
public fun DataFrame.Companion.readCsv(
|
||||
fileOrUrl: String,
|
||||
delimiter: Char = CSV_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
charset: Charset? = CHARSET,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = Compression.of(fileOrUrl),
|
||||
): DataFrame<*> =
|
||||
catchHttpResponse(asUrl(fileOrUrl = fileOrUrl)) {
|
||||
readDelimImpl(
|
||||
inputStream = it,
|
||||
delimiter = delimiter,
|
||||
header = header,
|
||||
charset = charset,
|
||||
hasFixedWidthColumns = hasFixedWidthColumns,
|
||||
fixedColumnWidths = fixedColumnWidths,
|
||||
colTypes = colTypes,
|
||||
skipLines = skipLines,
|
||||
readLines = readLines,
|
||||
parserOptions = parserOptions,
|
||||
ignoreEmptyLines = ignoreEmptyLines,
|
||||
allowMissingColumns = allowMissingColumns,
|
||||
ignoreExcessColumns = ignoreExcessColumns,
|
||||
quote = quote,
|
||||
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
|
||||
trimInsideQuoted = trimInsideQuoted,
|
||||
parseParallel = parseParallel,
|
||||
compression = compression,
|
||||
adjustCsvSpecs = ADJUST_CSV_SPECS,
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* {@comment the only one with adjustCsvSpecs}
|
||||
* @include [CommonReadDelimDocs.CsvDocs]
|
||||
* @set [CommonReadDelimDocs.DATA_TITLE] InputStream
|
||||
* @set [CommonReadDelimDocs.DATA] input stream
|
||||
* @include [INPUT_STREAM_READ]
|
||||
* @include [CSV_DELIMITER]
|
||||
* @include [COMPRESSION]
|
||||
* @include [CommonReadDelimDocs.CommonReadParams]
|
||||
* @include [ADJUST_CSV_SPECS]
|
||||
*/
|
||||
public fun DataFrame.Companion.readCsv(
|
||||
inputStream: InputStream,
|
||||
delimiter: Char = CSV_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
charset: Charset? = CHARSET,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = COMPRESSION,
|
||||
adjustCsvSpecs: AdjustCsvSpecs = ADJUST_CSV_SPECS,
|
||||
): DataFrame<*> =
|
||||
readDelimImpl(
|
||||
inputStream = inputStream,
|
||||
delimiter = delimiter,
|
||||
header = header,
|
||||
charset = charset,
|
||||
hasFixedWidthColumns = hasFixedWidthColumns,
|
||||
fixedColumnWidths = fixedColumnWidths,
|
||||
colTypes = colTypes,
|
||||
skipLines = skipLines,
|
||||
readLines = readLines,
|
||||
parserOptions = parserOptions,
|
||||
ignoreEmptyLines = ignoreEmptyLines,
|
||||
allowMissingColumns = allowMissingColumns,
|
||||
ignoreExcessColumns = ignoreExcessColumns,
|
||||
quote = quote,
|
||||
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
|
||||
trimInsideQuoted = trimInsideQuoted,
|
||||
parseParallel = parseParallel,
|
||||
compression = compression,
|
||||
adjustCsvSpecs = adjustCsvSpecs,
|
||||
)
|
||||
|
||||
// region deprecations
|
||||
|
||||
@Deprecated(READ_CSV_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
|
||||
public fun DataFrame.Companion.readCsv(
|
||||
path: Path,
|
||||
delimiter: Char = CSV_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = Compression.of(path),
|
||||
): DataFrame<*> =
|
||||
readCsv(
|
||||
path,
|
||||
delimiter,
|
||||
header,
|
||||
CHARSET,
|
||||
hasFixedWidthColumns,
|
||||
fixedColumnWidths,
|
||||
colTypes,
|
||||
skipLines,
|
||||
readLines,
|
||||
parserOptions,
|
||||
ignoreEmptyLines,
|
||||
allowMissingColumns,
|
||||
ignoreExcessColumns,
|
||||
quote,
|
||||
ignoreSurroundingSpaces,
|
||||
trimInsideQuoted,
|
||||
parseParallel,
|
||||
compression,
|
||||
)
|
||||
|
||||
@Deprecated(READ_CSV_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
|
||||
public fun DataFrame.Companion.readCsv(
|
||||
file: File,
|
||||
delimiter: Char = CSV_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = Compression.of(file),
|
||||
): DataFrame<*> =
|
||||
readCsv(
|
||||
file,
|
||||
delimiter,
|
||||
header,
|
||||
CHARSET,
|
||||
hasFixedWidthColumns,
|
||||
fixedColumnWidths,
|
||||
colTypes,
|
||||
skipLines,
|
||||
readLines,
|
||||
parserOptions,
|
||||
ignoreEmptyLines,
|
||||
allowMissingColumns,
|
||||
ignoreExcessColumns,
|
||||
quote,
|
||||
ignoreSurroundingSpaces,
|
||||
trimInsideQuoted,
|
||||
parseParallel,
|
||||
compression,
|
||||
)
|
||||
|
||||
@Deprecated(READ_CSV_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
|
||||
public fun DataFrame.Companion.readCsv(
|
||||
url: URL,
|
||||
delimiter: Char = CSV_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = Compression.of(url),
|
||||
): DataFrame<*> =
|
||||
readCsv(
|
||||
url,
|
||||
delimiter,
|
||||
header,
|
||||
CHARSET,
|
||||
hasFixedWidthColumns,
|
||||
fixedColumnWidths,
|
||||
colTypes,
|
||||
skipLines,
|
||||
readLines,
|
||||
parserOptions,
|
||||
ignoreEmptyLines,
|
||||
allowMissingColumns,
|
||||
ignoreExcessColumns,
|
||||
quote,
|
||||
ignoreSurroundingSpaces,
|
||||
trimInsideQuoted,
|
||||
parseParallel,
|
||||
compression,
|
||||
)
|
||||
|
||||
@Deprecated(READ_CSV_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
|
||||
public fun DataFrame.Companion.readCsv(
|
||||
fileOrUrl: String,
|
||||
delimiter: Char = CSV_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = Compression.of(fileOrUrl),
|
||||
): DataFrame<*> =
|
||||
readCsv(
|
||||
fileOrUrl,
|
||||
delimiter,
|
||||
header,
|
||||
CHARSET,
|
||||
hasFixedWidthColumns,
|
||||
fixedColumnWidths,
|
||||
colTypes,
|
||||
skipLines,
|
||||
readLines,
|
||||
parserOptions,
|
||||
ignoreEmptyLines,
|
||||
allowMissingColumns,
|
||||
ignoreExcessColumns,
|
||||
quote,
|
||||
ignoreSurroundingSpaces,
|
||||
trimInsideQuoted,
|
||||
parseParallel,
|
||||
compression,
|
||||
)
|
||||
|
||||
@Deprecated(READ_CSV_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
|
||||
public fun DataFrame.Companion.readCsv(
|
||||
inputStream: InputStream,
|
||||
delimiter: Char = CSV_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = COMPRESSION,
|
||||
adjustCsvSpecs: AdjustCsvSpecs = ADJUST_CSV_SPECS,
|
||||
): DataFrame<*> =
|
||||
readCsv(
|
||||
inputStream,
|
||||
delimiter,
|
||||
header,
|
||||
CHARSET,
|
||||
hasFixedWidthColumns,
|
||||
fixedColumnWidths,
|
||||
colTypes,
|
||||
skipLines,
|
||||
readLines,
|
||||
parserOptions,
|
||||
ignoreEmptyLines,
|
||||
allowMissingColumns,
|
||||
ignoreExcessColumns,
|
||||
quote,
|
||||
ignoreSurroundingSpaces,
|
||||
trimInsideQuoted,
|
||||
parseParallel,
|
||||
compression,
|
||||
adjustCsvSpecs,
|
||||
)
|
||||
|
||||
// endregion
|
||||
+71
@@ -0,0 +1,71 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonReadDelimDocs
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_SPECS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ALLOW_MISSING_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COL_TYPES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CSV_DELIMITER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FIXED_COLUMN_WIDTHS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HAS_FIXED_WIDTH_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EMPTY_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EXCESS_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_SURROUNDING_SPACES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSER_OPTIONS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSE_PARALLEL
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.READ_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.SKIP_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TEXT_READ
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TRIM_INSIDE_QUOTED
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl
|
||||
|
||||
/**
|
||||
* @include [CommonReadDelimDocs.CsvDocs]
|
||||
* @set [CommonReadDelimDocs.DATA_TITLE] String
|
||||
* @set [CommonReadDelimDocs.DATA] [String]
|
||||
* @include [TEXT_READ]
|
||||
* @include [CSV_DELIMITER]
|
||||
* @include [CommonReadDelimDocs.CommonReadParams]
|
||||
*/
|
||||
public fun DataFrame.Companion.readCsvStr(
|
||||
text: String,
|
||||
delimiter: Char = CSV_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
): DataFrame<*> =
|
||||
readDelimImpl(
|
||||
inputStream = text.byteInputStream(),
|
||||
charset = Charsets.UTF_8,
|
||||
delimiter = delimiter,
|
||||
header = header,
|
||||
hasFixedWidthColumns = hasFixedWidthColumns,
|
||||
fixedColumnWidths = fixedColumnWidths,
|
||||
compression = Compression.None, // of course
|
||||
colTypes = colTypes,
|
||||
skipLines = skipLines,
|
||||
readLines = readLines,
|
||||
parserOptions = parserOptions,
|
||||
ignoreEmptyLines = ignoreEmptyLines,
|
||||
allowMissingColumns = allowMissingColumns,
|
||||
ignoreExcessColumns = ignoreExcessColumns,
|
||||
quote = quote,
|
||||
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
|
||||
trimInsideQuoted = trimInsideQuoted,
|
||||
parseParallel = parseParallel,
|
||||
adjustCsvSpecs = ADJUST_CSV_SPECS,
|
||||
)
|
||||
+522
@@ -0,0 +1,522 @@
|
||||
@file:JvmName("ReadDelimDeephavenKt")
|
||||
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonReadDelimDocs
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_SPECS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ALLOW_MISSING_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CHARSET
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COL_TYPES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMPRESSION
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.DELIM_DELIMITER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FILE_OR_URL_READ
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FILE_READ
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FIXED_COLUMN_WIDTHS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HAS_FIXED_WIDTH_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EMPTY_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EXCESS_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_SURROUNDING_SPACES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INPUT_STREAM_READ
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSER_OPTIONS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSE_PARALLEL
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PATH_READ
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.READ_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.SKIP_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TRIM_INSIDE_QUOTED
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl
|
||||
import org.jetbrains.kotlinx.dataframe.util.READ_DELIM_BINARY_COMPATIBILITY
|
||||
import java.io.File
|
||||
import java.io.FileInputStream
|
||||
import java.io.InputStream
|
||||
import java.net.URL
|
||||
import java.nio.charset.Charset
|
||||
import java.nio.file.Path
|
||||
import kotlin.io.path.inputStream
|
||||
|
||||
/*
|
||||
* TODO these currently clash with :core's readDelim(Str) functions.
|
||||
* When those are deprecated, we can let the users fall-back to these.
|
||||
* They do the same as readCsv(Str).
|
||||
*/
|
||||
|
||||
/**
|
||||
* @include [CommonReadDelimDocs.DelimDocs]
|
||||
* @set [CommonReadDelimDocs.DATA_TITLE] File
|
||||
* @set [CommonReadDelimDocs.DATA] file
|
||||
* @include [PATH_READ]
|
||||
* @include [DELIM_DELIMITER]
|
||||
* @include [COMPRESSION]
|
||||
* @include [CommonReadDelimDocs.CommonReadParams]
|
||||
*/
|
||||
public fun DataFrame.Companion.readDelim(
|
||||
path: Path,
|
||||
delimiter: Char = DELIM_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
charset: Charset? = CHARSET,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = Compression.of(path),
|
||||
): DataFrame<*> =
|
||||
path.inputStream().use {
|
||||
readDelimImpl(
|
||||
inputStream = it,
|
||||
delimiter = delimiter,
|
||||
header = header,
|
||||
charset = charset,
|
||||
hasFixedWidthColumns = hasFixedWidthColumns,
|
||||
fixedColumnWidths = fixedColumnWidths,
|
||||
colTypes = colTypes,
|
||||
skipLines = skipLines,
|
||||
readLines = readLines,
|
||||
parserOptions = parserOptions,
|
||||
ignoreEmptyLines = ignoreEmptyLines,
|
||||
allowMissingColumns = allowMissingColumns,
|
||||
ignoreExcessColumns = ignoreExcessColumns,
|
||||
quote = quote,
|
||||
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
|
||||
trimInsideQuoted = trimInsideQuoted,
|
||||
parseParallel = parseParallel,
|
||||
compression = compression,
|
||||
adjustCsvSpecs = ADJUST_CSV_SPECS,
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* @include [CommonReadDelimDocs.DelimDocs]
|
||||
* @set [CommonReadDelimDocs.DATA_TITLE] File
|
||||
* @set [CommonReadDelimDocs.DATA] file
|
||||
* @include [FILE_READ]
|
||||
* @include [DELIM_DELIMITER]
|
||||
* @include [COMPRESSION]
|
||||
* @include [CommonReadDelimDocs.CommonReadParams]
|
||||
*/
|
||||
public fun DataFrame.Companion.readDelim(
|
||||
file: File,
|
||||
delimiter: Char = DELIM_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
charset: Charset? = CHARSET,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = Compression.of(file),
|
||||
): DataFrame<*> =
|
||||
FileInputStream(file).use {
|
||||
readDelimImpl(
|
||||
inputStream = it,
|
||||
delimiter = delimiter,
|
||||
header = header,
|
||||
charset = charset,
|
||||
hasFixedWidthColumns = hasFixedWidthColumns,
|
||||
fixedColumnWidths = fixedColumnWidths,
|
||||
colTypes = colTypes,
|
||||
skipLines = skipLines,
|
||||
readLines = readLines,
|
||||
parserOptions = parserOptions,
|
||||
ignoreEmptyLines = ignoreEmptyLines,
|
||||
allowMissingColumns = allowMissingColumns,
|
||||
ignoreExcessColumns = ignoreExcessColumns,
|
||||
quote = quote,
|
||||
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
|
||||
trimInsideQuoted = trimInsideQuoted,
|
||||
parseParallel = parseParallel,
|
||||
compression = compression,
|
||||
adjustCsvSpecs = ADJUST_CSV_SPECS,
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* @include [CommonReadDelimDocs.DelimDocs]
|
||||
* @set [CommonReadDelimDocs.DATA_TITLE] Url
|
||||
* @set [CommonReadDelimDocs.DATA] url
|
||||
* @include [DelimParams.URL_READ]
|
||||
* @include [DELIM_DELIMITER]
|
||||
* @include [COMPRESSION]
|
||||
* @include [CommonReadDelimDocs.CommonReadParams]
|
||||
*/
|
||||
public fun DataFrame.Companion.readDelim(
|
||||
url: URL,
|
||||
delimiter: Char = DELIM_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
charset: Charset? = CHARSET,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = Compression.of(url),
|
||||
): DataFrame<*> =
|
||||
catchHttpResponse(url) {
|
||||
readDelimImpl(
|
||||
inputStream = it,
|
||||
delimiter = delimiter,
|
||||
header = header,
|
||||
charset = charset,
|
||||
hasFixedWidthColumns = hasFixedWidthColumns,
|
||||
fixedColumnWidths = fixedColumnWidths,
|
||||
colTypes = colTypes,
|
||||
skipLines = skipLines,
|
||||
readLines = readLines,
|
||||
parserOptions = parserOptions,
|
||||
ignoreEmptyLines = ignoreEmptyLines,
|
||||
allowMissingColumns = allowMissingColumns,
|
||||
ignoreExcessColumns = ignoreExcessColumns,
|
||||
quote = quote,
|
||||
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
|
||||
trimInsideQuoted = trimInsideQuoted,
|
||||
parseParallel = parseParallel,
|
||||
compression = compression,
|
||||
adjustCsvSpecs = ADJUST_CSV_SPECS,
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* @include [CommonReadDelimDocs.DelimDocs]
|
||||
* @set [CommonReadDelimDocs.DATA_TITLE] File or URL
|
||||
* @set [CommonReadDelimDocs.DATA] file or url
|
||||
* @include [FILE_OR_URL_READ]
|
||||
* @include [DELIM_DELIMITER]
|
||||
* @include [COMPRESSION]
|
||||
* @include [CommonReadDelimDocs.CommonReadParams]
|
||||
*/
|
||||
public fun DataFrame.Companion.readDelim(
|
||||
fileOrUrl: String,
|
||||
delimiter: Char = DELIM_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
charset: Charset? = CHARSET,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = Compression.of(fileOrUrl),
|
||||
): DataFrame<*> =
|
||||
catchHttpResponse(asUrl(fileOrUrl = fileOrUrl)) {
|
||||
readDelimImpl(
|
||||
inputStream = it,
|
||||
delimiter = delimiter,
|
||||
header = header,
|
||||
charset = charset,
|
||||
hasFixedWidthColumns = hasFixedWidthColumns,
|
||||
fixedColumnWidths = fixedColumnWidths,
|
||||
colTypes = colTypes,
|
||||
skipLines = skipLines,
|
||||
readLines = readLines,
|
||||
parserOptions = parserOptions,
|
||||
ignoreEmptyLines = ignoreEmptyLines,
|
||||
allowMissingColumns = allowMissingColumns,
|
||||
ignoreExcessColumns = ignoreExcessColumns,
|
||||
quote = quote,
|
||||
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
|
||||
trimInsideQuoted = trimInsideQuoted,
|
||||
parseParallel = parseParallel,
|
||||
compression = compression,
|
||||
adjustCsvSpecs = ADJUST_CSV_SPECS,
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* {@comment the only one with adjustCsvSpecs}
|
||||
* @include [CommonReadDelimDocs.DelimDocs]
|
||||
* @set [CommonReadDelimDocs.DATA_TITLE] InputStream
|
||||
* @set [CommonReadDelimDocs.DATA] input stream
|
||||
* @include [INPUT_STREAM_READ]
|
||||
* @include [DELIM_DELIMITER]
|
||||
* @include [COMPRESSION]
|
||||
* @include [CommonReadDelimDocs.CommonReadParams]
|
||||
* @include [ADJUST_CSV_SPECS]
|
||||
*/
|
||||
public fun DataFrame.Companion.readDelim(
|
||||
inputStream: InputStream,
|
||||
delimiter: Char = DELIM_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
charset: Charset? = CHARSET,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = COMPRESSION,
|
||||
adjustCsvSpecs: AdjustCsvSpecs = ADJUST_CSV_SPECS,
|
||||
): DataFrame<*> =
|
||||
readDelimImpl(
|
||||
inputStream = inputStream,
|
||||
delimiter = delimiter,
|
||||
header = header,
|
||||
charset = charset,
|
||||
hasFixedWidthColumns = hasFixedWidthColumns,
|
||||
fixedColumnWidths = fixedColumnWidths,
|
||||
colTypes = colTypes,
|
||||
skipLines = skipLines,
|
||||
readLines = readLines,
|
||||
parserOptions = parserOptions,
|
||||
ignoreEmptyLines = ignoreEmptyLines,
|
||||
allowMissingColumns = allowMissingColumns,
|
||||
ignoreExcessColumns = ignoreExcessColumns,
|
||||
quote = quote,
|
||||
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
|
||||
trimInsideQuoted = trimInsideQuoted,
|
||||
parseParallel = parseParallel,
|
||||
compression = compression,
|
||||
adjustCsvSpecs = adjustCsvSpecs,
|
||||
)
|
||||
|
||||
// region deprecations
|
||||
|
||||
@Deprecated(READ_DELIM_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
|
||||
public fun DataFrame.Companion.readDelim(
|
||||
path: Path,
|
||||
delimiter: Char = DELIM_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = Compression.of(path),
|
||||
): DataFrame<*> =
|
||||
readDelim(
|
||||
path,
|
||||
delimiter,
|
||||
header,
|
||||
CHARSET,
|
||||
hasFixedWidthColumns,
|
||||
fixedColumnWidths,
|
||||
colTypes,
|
||||
skipLines,
|
||||
readLines,
|
||||
parserOptions,
|
||||
ignoreEmptyLines,
|
||||
allowMissingColumns,
|
||||
ignoreExcessColumns,
|
||||
quote,
|
||||
ignoreSurroundingSpaces,
|
||||
trimInsideQuoted,
|
||||
parseParallel,
|
||||
compression,
|
||||
)
|
||||
|
||||
@Deprecated(READ_DELIM_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
|
||||
public fun DataFrame.Companion.readDelim(
|
||||
file: File,
|
||||
delimiter: Char = DELIM_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = Compression.of(file),
|
||||
): DataFrame<*> =
|
||||
readDelim(
|
||||
file,
|
||||
delimiter,
|
||||
header,
|
||||
CHARSET,
|
||||
hasFixedWidthColumns,
|
||||
fixedColumnWidths,
|
||||
colTypes,
|
||||
skipLines,
|
||||
readLines,
|
||||
parserOptions,
|
||||
ignoreEmptyLines,
|
||||
allowMissingColumns,
|
||||
ignoreExcessColumns,
|
||||
quote,
|
||||
ignoreSurroundingSpaces,
|
||||
trimInsideQuoted,
|
||||
parseParallel,
|
||||
compression,
|
||||
)
|
||||
|
||||
@Deprecated(READ_DELIM_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
|
||||
public fun DataFrame.Companion.readDelim(
|
||||
url: URL,
|
||||
delimiter: Char = DELIM_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = Compression.of(url),
|
||||
): DataFrame<*> =
|
||||
readDelim(
|
||||
url,
|
||||
delimiter,
|
||||
header,
|
||||
CHARSET,
|
||||
hasFixedWidthColumns,
|
||||
fixedColumnWidths,
|
||||
colTypes,
|
||||
skipLines,
|
||||
readLines,
|
||||
parserOptions,
|
||||
ignoreEmptyLines,
|
||||
allowMissingColumns,
|
||||
ignoreExcessColumns,
|
||||
quote,
|
||||
ignoreSurroundingSpaces,
|
||||
trimInsideQuoted,
|
||||
parseParallel,
|
||||
compression,
|
||||
)
|
||||
|
||||
@Deprecated(READ_DELIM_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
|
||||
public fun DataFrame.Companion.readDelim(
|
||||
fileOrUrl: String,
|
||||
delimiter: Char = DELIM_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = Compression.of(fileOrUrl),
|
||||
): DataFrame<*> =
|
||||
readDelim(
|
||||
fileOrUrl,
|
||||
delimiter,
|
||||
header,
|
||||
CHARSET,
|
||||
hasFixedWidthColumns,
|
||||
fixedColumnWidths,
|
||||
colTypes,
|
||||
skipLines,
|
||||
readLines,
|
||||
parserOptions,
|
||||
ignoreEmptyLines,
|
||||
allowMissingColumns,
|
||||
ignoreExcessColumns,
|
||||
quote,
|
||||
ignoreSurroundingSpaces,
|
||||
trimInsideQuoted,
|
||||
parseParallel,
|
||||
compression,
|
||||
)
|
||||
|
||||
@Deprecated(READ_DELIM_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
|
||||
public fun DataFrame.Companion.readDelim(
|
||||
inputStream: InputStream,
|
||||
delimiter: Char = DELIM_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = COMPRESSION,
|
||||
adjustCsvSpecs: AdjustCsvSpecs = ADJUST_CSV_SPECS,
|
||||
): DataFrame<*> =
|
||||
readDelim(
|
||||
inputStream,
|
||||
delimiter,
|
||||
header,
|
||||
CHARSET,
|
||||
hasFixedWidthColumns,
|
||||
fixedColumnWidths,
|
||||
colTypes,
|
||||
skipLines,
|
||||
readLines,
|
||||
parserOptions,
|
||||
ignoreEmptyLines,
|
||||
allowMissingColumns,
|
||||
ignoreExcessColumns,
|
||||
quote,
|
||||
ignoreSurroundingSpaces,
|
||||
trimInsideQuoted,
|
||||
parseParallel,
|
||||
compression,
|
||||
adjustCsvSpecs,
|
||||
)
|
||||
|
||||
// endregion
|
||||
+71
@@ -0,0 +1,71 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonReadDelimDocs
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_SPECS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ALLOW_MISSING_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COL_TYPES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.DELIM_DELIMITER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FIXED_COLUMN_WIDTHS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HAS_FIXED_WIDTH_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EMPTY_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EXCESS_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_SURROUNDING_SPACES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSER_OPTIONS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSE_PARALLEL
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.READ_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.SKIP_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TEXT_READ
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TRIM_INSIDE_QUOTED
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl
|
||||
|
||||
/**
|
||||
* @include [CommonReadDelimDocs.DelimDocs]
|
||||
* @set [CommonReadDelimDocs.DATA_TITLE] String
|
||||
* @set [CommonReadDelimDocs.DATA] [String]
|
||||
* @include [TEXT_READ]
|
||||
* @include [DELIM_DELIMITER]
|
||||
* @include [CommonReadDelimDocs.CommonReadParams]
|
||||
*/
|
||||
public fun DataFrame.Companion.readDelimStr(
|
||||
text: String,
|
||||
delimiter: Char = DELIM_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
): DataFrame<*> =
|
||||
readDelimImpl(
|
||||
inputStream = text.byteInputStream(),
|
||||
charset = Charsets.UTF_8,
|
||||
delimiter = delimiter,
|
||||
header = header,
|
||||
hasFixedWidthColumns = hasFixedWidthColumns,
|
||||
fixedColumnWidths = fixedColumnWidths,
|
||||
compression = Compression.None, // of course
|
||||
colTypes = colTypes,
|
||||
skipLines = skipLines,
|
||||
readLines = readLines,
|
||||
parserOptions = parserOptions,
|
||||
ignoreEmptyLines = ignoreEmptyLines,
|
||||
allowMissingColumns = allowMissingColumns,
|
||||
ignoreExcessColumns = ignoreExcessColumns,
|
||||
quote = quote,
|
||||
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
|
||||
trimInsideQuoted = trimInsideQuoted,
|
||||
parseParallel = parseParallel,
|
||||
adjustCsvSpecs = ADJUST_CSV_SPECS,
|
||||
)
|
||||
+516
@@ -0,0 +1,516 @@
|
||||
@file:JvmName("ReadTsvDeephavenKt")
|
||||
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonReadDelimDocs
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_SPECS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ALLOW_MISSING_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CHARSET
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COL_TYPES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMPRESSION
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FILE_OR_URL_READ
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FILE_READ
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FIXED_COLUMN_WIDTHS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HAS_FIXED_WIDTH_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EMPTY_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EXCESS_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_SURROUNDING_SPACES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INPUT_STREAM_READ
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSER_OPTIONS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSE_PARALLEL
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PATH_READ
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.READ_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.SKIP_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TRIM_INSIDE_QUOTED
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TSV_DELIMITER
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl
|
||||
import org.jetbrains.kotlinx.dataframe.util.READ_TSV_BINARY_COMPATIBILITY
|
||||
import java.io.File
|
||||
import java.io.FileInputStream
|
||||
import java.io.InputStream
|
||||
import java.net.URL
|
||||
import java.nio.charset.Charset
|
||||
import java.nio.file.Path
|
||||
import kotlin.io.path.inputStream
|
||||
|
||||
/**
|
||||
* @include [CommonReadDelimDocs.TsvDocs]
|
||||
* @set [CommonReadDelimDocs.DATA_TITLE] File
|
||||
* @set [CommonReadDelimDocs.DATA] file
|
||||
* @include [PATH_READ]
|
||||
* @include [TSV_DELIMITER]
|
||||
* @include [COMPRESSION]
|
||||
* @include [CommonReadDelimDocs.CommonReadParams]
|
||||
*/
|
||||
public fun DataFrame.Companion.readTsv(
|
||||
path: Path,
|
||||
delimiter: Char = TSV_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
charset: Charset? = CHARSET,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = Compression.of(path),
|
||||
): DataFrame<*> =
|
||||
path.inputStream().use {
|
||||
readDelimImpl(
|
||||
inputStream = it,
|
||||
delimiter = delimiter,
|
||||
header = header,
|
||||
charset = charset,
|
||||
hasFixedWidthColumns = hasFixedWidthColumns,
|
||||
fixedColumnWidths = fixedColumnWidths,
|
||||
colTypes = colTypes,
|
||||
skipLines = skipLines,
|
||||
readLines = readLines,
|
||||
parserOptions = parserOptions,
|
||||
ignoreEmptyLines = ignoreEmptyLines,
|
||||
allowMissingColumns = allowMissingColumns,
|
||||
ignoreExcessColumns = ignoreExcessColumns,
|
||||
quote = quote,
|
||||
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
|
||||
trimInsideQuoted = trimInsideQuoted,
|
||||
parseParallel = parseParallel,
|
||||
compression = compression,
|
||||
adjustCsvSpecs = ADJUST_CSV_SPECS,
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* @include [CommonReadDelimDocs.TsvDocs]
|
||||
* @set [CommonReadDelimDocs.DATA_TITLE] File
|
||||
* @set [CommonReadDelimDocs.DATA] file
|
||||
* @include [FILE_READ]
|
||||
* @include [TSV_DELIMITER]
|
||||
* @include [COMPRESSION]
|
||||
* @include [CommonReadDelimDocs.CommonReadParams]
|
||||
*/
|
||||
public fun DataFrame.Companion.readTsv(
|
||||
file: File,
|
||||
delimiter: Char = TSV_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
charset: Charset? = CHARSET,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = Compression.of(file),
|
||||
): DataFrame<*> =
|
||||
FileInputStream(file).use {
|
||||
readDelimImpl(
|
||||
inputStream = it,
|
||||
delimiter = delimiter,
|
||||
header = header,
|
||||
charset = charset,
|
||||
hasFixedWidthColumns = hasFixedWidthColumns,
|
||||
fixedColumnWidths = fixedColumnWidths,
|
||||
colTypes = colTypes,
|
||||
skipLines = skipLines,
|
||||
readLines = readLines,
|
||||
parserOptions = parserOptions,
|
||||
ignoreEmptyLines = ignoreEmptyLines,
|
||||
allowMissingColumns = allowMissingColumns,
|
||||
ignoreExcessColumns = ignoreExcessColumns,
|
||||
quote = quote,
|
||||
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
|
||||
trimInsideQuoted = trimInsideQuoted,
|
||||
parseParallel = parseParallel,
|
||||
compression = compression,
|
||||
adjustCsvSpecs = ADJUST_CSV_SPECS,
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* @include [CommonReadDelimDocs.TsvDocs]
|
||||
* @set [CommonReadDelimDocs.DATA_TITLE] Url
|
||||
* @set [CommonReadDelimDocs.DATA] url
|
||||
* @include [DelimParams.URL_READ]
|
||||
* @include [TSV_DELIMITER]
|
||||
* @include [COMPRESSION]
|
||||
* @include [CommonReadDelimDocs.CommonReadParams]
|
||||
*/
|
||||
public fun DataFrame.Companion.readTsv(
|
||||
url: URL,
|
||||
delimiter: Char = TSV_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
charset: Charset? = CHARSET,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = Compression.of(url),
|
||||
): DataFrame<*> =
|
||||
catchHttpResponse(url) {
|
||||
readDelimImpl(
|
||||
inputStream = it,
|
||||
delimiter = delimiter,
|
||||
header = header,
|
||||
charset = charset,
|
||||
hasFixedWidthColumns = hasFixedWidthColumns,
|
||||
fixedColumnWidths = fixedColumnWidths,
|
||||
colTypes = colTypes,
|
||||
skipLines = skipLines,
|
||||
readLines = readLines,
|
||||
parserOptions = parserOptions,
|
||||
ignoreEmptyLines = ignoreEmptyLines,
|
||||
allowMissingColumns = allowMissingColumns,
|
||||
ignoreExcessColumns = ignoreExcessColumns,
|
||||
quote = quote,
|
||||
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
|
||||
trimInsideQuoted = trimInsideQuoted,
|
||||
parseParallel = parseParallel,
|
||||
compression = compression,
|
||||
adjustCsvSpecs = ADJUST_CSV_SPECS,
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* @include [CommonReadDelimDocs.TsvDocs]
|
||||
* @set [CommonReadDelimDocs.DATA_TITLE] File or URL
|
||||
* @set [CommonReadDelimDocs.DATA] file or url
|
||||
* @include [FILE_OR_URL_READ]
|
||||
* @include [TSV_DELIMITER]
|
||||
* @include [COMPRESSION]
|
||||
* @include [CommonReadDelimDocs.CommonReadParams]
|
||||
*/
|
||||
public fun DataFrame.Companion.readTsv(
|
||||
fileOrUrl: String,
|
||||
delimiter: Char = TSV_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
charset: Charset? = CHARSET,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = Compression.of(fileOrUrl),
|
||||
): DataFrame<*> =
|
||||
catchHttpResponse(asUrl(fileOrUrl = fileOrUrl)) {
|
||||
readDelimImpl(
|
||||
inputStream = it,
|
||||
delimiter = delimiter,
|
||||
header = header,
|
||||
charset = charset,
|
||||
hasFixedWidthColumns = hasFixedWidthColumns,
|
||||
fixedColumnWidths = fixedColumnWidths,
|
||||
colTypes = colTypes,
|
||||
skipLines = skipLines,
|
||||
readLines = readLines,
|
||||
parserOptions = parserOptions,
|
||||
ignoreEmptyLines = ignoreEmptyLines,
|
||||
allowMissingColumns = allowMissingColumns,
|
||||
ignoreExcessColumns = ignoreExcessColumns,
|
||||
quote = quote,
|
||||
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
|
||||
trimInsideQuoted = trimInsideQuoted,
|
||||
parseParallel = parseParallel,
|
||||
compression = compression,
|
||||
adjustCsvSpecs = ADJUST_CSV_SPECS,
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* {@comment the only one with adjustCsvSpecs}
|
||||
* @include [CommonReadDelimDocs.TsvDocs]
|
||||
* @set [CommonReadDelimDocs.DATA_TITLE] InputStream
|
||||
* @set [CommonReadDelimDocs.DATA] input stream
|
||||
* @include [INPUT_STREAM_READ]
|
||||
* @include [TSV_DELIMITER]
|
||||
* @include [COMPRESSION]
|
||||
* @include [CommonReadDelimDocs.CommonReadParams]
|
||||
* @include [ADJUST_CSV_SPECS]
|
||||
*/
|
||||
public fun DataFrame.Companion.readTsv(
|
||||
inputStream: InputStream,
|
||||
delimiter: Char = TSV_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
charset: Charset? = CHARSET,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = COMPRESSION,
|
||||
adjustCsvSpecs: AdjustCsvSpecs = ADJUST_CSV_SPECS,
|
||||
): DataFrame<*> =
|
||||
readDelimImpl(
|
||||
inputStream = inputStream,
|
||||
delimiter = delimiter,
|
||||
header = header,
|
||||
charset = charset,
|
||||
hasFixedWidthColumns = hasFixedWidthColumns,
|
||||
fixedColumnWidths = fixedColumnWidths,
|
||||
colTypes = colTypes,
|
||||
skipLines = skipLines,
|
||||
readLines = readLines,
|
||||
parserOptions = parserOptions,
|
||||
ignoreEmptyLines = ignoreEmptyLines,
|
||||
allowMissingColumns = allowMissingColumns,
|
||||
ignoreExcessColumns = ignoreExcessColumns,
|
||||
quote = quote,
|
||||
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
|
||||
trimInsideQuoted = trimInsideQuoted,
|
||||
parseParallel = parseParallel,
|
||||
compression = compression,
|
||||
adjustCsvSpecs = adjustCsvSpecs,
|
||||
)
|
||||
|
||||
// region deprecations
|
||||
|
||||
@Deprecated(READ_TSV_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
|
||||
public fun DataFrame.Companion.readTsv(
|
||||
path: Path,
|
||||
delimiter: Char = TSV_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = Compression.of(path),
|
||||
): DataFrame<*> =
|
||||
readTsv(
|
||||
path,
|
||||
delimiter,
|
||||
header,
|
||||
CHARSET,
|
||||
hasFixedWidthColumns,
|
||||
fixedColumnWidths,
|
||||
colTypes,
|
||||
skipLines,
|
||||
readLines,
|
||||
parserOptions,
|
||||
ignoreEmptyLines,
|
||||
allowMissingColumns,
|
||||
ignoreExcessColumns,
|
||||
quote,
|
||||
ignoreSurroundingSpaces,
|
||||
trimInsideQuoted,
|
||||
parseParallel,
|
||||
compression,
|
||||
)
|
||||
|
||||
@Deprecated(READ_TSV_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
|
||||
public fun DataFrame.Companion.readTsv(
|
||||
file: File,
|
||||
delimiter: Char = TSV_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = Compression.of(file),
|
||||
): DataFrame<*> =
|
||||
readTsv(
|
||||
file,
|
||||
delimiter,
|
||||
header,
|
||||
CHARSET,
|
||||
hasFixedWidthColumns,
|
||||
fixedColumnWidths,
|
||||
colTypes,
|
||||
skipLines,
|
||||
readLines,
|
||||
parserOptions,
|
||||
ignoreEmptyLines,
|
||||
allowMissingColumns,
|
||||
ignoreExcessColumns,
|
||||
quote,
|
||||
ignoreSurroundingSpaces,
|
||||
trimInsideQuoted,
|
||||
parseParallel,
|
||||
compression,
|
||||
)
|
||||
|
||||
@Deprecated(READ_TSV_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
|
||||
public fun DataFrame.Companion.readTsv(
|
||||
url: URL,
|
||||
delimiter: Char = TSV_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = Compression.of(url),
|
||||
): DataFrame<*> =
|
||||
readTsv(
|
||||
url,
|
||||
delimiter,
|
||||
header,
|
||||
CHARSET,
|
||||
hasFixedWidthColumns,
|
||||
fixedColumnWidths,
|
||||
colTypes,
|
||||
skipLines,
|
||||
readLines,
|
||||
parserOptions,
|
||||
ignoreEmptyLines,
|
||||
allowMissingColumns,
|
||||
ignoreExcessColumns,
|
||||
quote,
|
||||
ignoreSurroundingSpaces,
|
||||
trimInsideQuoted,
|
||||
parseParallel,
|
||||
compression,
|
||||
)
|
||||
|
||||
@Deprecated(READ_TSV_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
|
||||
public fun DataFrame.Companion.readTsv(
|
||||
fileOrUrl: String,
|
||||
delimiter: Char = TSV_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = Compression.of(fileOrUrl),
|
||||
): DataFrame<*> =
|
||||
readTsv(
|
||||
fileOrUrl,
|
||||
delimiter,
|
||||
header,
|
||||
CHARSET,
|
||||
hasFixedWidthColumns,
|
||||
fixedColumnWidths,
|
||||
colTypes,
|
||||
skipLines,
|
||||
readLines,
|
||||
parserOptions,
|
||||
ignoreEmptyLines,
|
||||
allowMissingColumns,
|
||||
ignoreExcessColumns,
|
||||
quote,
|
||||
ignoreSurroundingSpaces,
|
||||
trimInsideQuoted,
|
||||
parseParallel,
|
||||
compression,
|
||||
)
|
||||
|
||||
@Deprecated(READ_TSV_BINARY_COMPATIBILITY, level = DeprecationLevel.HIDDEN)
|
||||
public fun DataFrame.Companion.readTsv(
|
||||
inputStream: InputStream,
|
||||
delimiter: Char = TSV_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
compression: Compression<*> = COMPRESSION,
|
||||
adjustCsvSpecs: AdjustCsvSpecs = ADJUST_CSV_SPECS,
|
||||
): DataFrame<*> =
|
||||
readTsv(
|
||||
inputStream,
|
||||
delimiter,
|
||||
header,
|
||||
CHARSET,
|
||||
hasFixedWidthColumns,
|
||||
fixedColumnWidths,
|
||||
colTypes,
|
||||
skipLines,
|
||||
readLines,
|
||||
parserOptions,
|
||||
ignoreEmptyLines,
|
||||
allowMissingColumns,
|
||||
ignoreExcessColumns,
|
||||
quote,
|
||||
ignoreSurroundingSpaces,
|
||||
trimInsideQuoted,
|
||||
parseParallel,
|
||||
compression,
|
||||
adjustCsvSpecs,
|
||||
)
|
||||
|
||||
// endregion
|
||||
+71
@@ -0,0 +1,71 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonReadDelimDocs
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_SPECS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ALLOW_MISSING_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COL_TYPES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FIXED_COLUMN_WIDTHS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HAS_FIXED_WIDTH_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EMPTY_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_EXCESS_COLUMNS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.IGNORE_SURROUNDING_SPACES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSER_OPTIONS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PARSE_PARALLEL
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.READ_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.SKIP_LINES
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TEXT_READ
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TRIM_INSIDE_QUOTED
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TSV_DELIMITER
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl
|
||||
|
||||
/**
|
||||
* @include [CommonReadDelimDocs.TsvDocs]
|
||||
* @set [CommonReadDelimDocs.DATA_TITLE] String
|
||||
* @set [CommonReadDelimDocs.DATA] [String]
|
||||
* @include [TEXT_READ]
|
||||
* @include [TSV_DELIMITER]
|
||||
* @include [CommonReadDelimDocs.CommonReadParams]
|
||||
*/
|
||||
public fun DataFrame.Companion.readTsvStr(
|
||||
text: String,
|
||||
delimiter: Char = TSV_DELIMITER,
|
||||
header: List<String> = HEADER,
|
||||
hasFixedWidthColumns: Boolean = HAS_FIXED_WIDTH_COLUMNS,
|
||||
fixedColumnWidths: List<Int> = FIXED_COLUMN_WIDTHS,
|
||||
colTypes: Map<String, ColType> = COL_TYPES,
|
||||
skipLines: Long = SKIP_LINES,
|
||||
readLines: Long? = READ_LINES,
|
||||
parserOptions: ParserOptions? = PARSER_OPTIONS,
|
||||
ignoreEmptyLines: Boolean = IGNORE_EMPTY_LINES,
|
||||
allowMissingColumns: Boolean = ALLOW_MISSING_COLUMNS,
|
||||
ignoreExcessColumns: Boolean = IGNORE_EXCESS_COLUMNS,
|
||||
quote: Char = QUOTE,
|
||||
ignoreSurroundingSpaces: Boolean = IGNORE_SURROUNDING_SPACES,
|
||||
trimInsideQuoted: Boolean = TRIM_INSIDE_QUOTED,
|
||||
parseParallel: Boolean = PARSE_PARALLEL,
|
||||
): DataFrame<*> =
|
||||
readDelimImpl(
|
||||
inputStream = text.byteInputStream(),
|
||||
charset = Charsets.UTF_8,
|
||||
delimiter = delimiter,
|
||||
header = header,
|
||||
hasFixedWidthColumns = hasFixedWidthColumns,
|
||||
fixedColumnWidths = fixedColumnWidths,
|
||||
compression = Compression.None, // of course
|
||||
colTypes = colTypes,
|
||||
skipLines = skipLines,
|
||||
readLines = readLines,
|
||||
parserOptions = parserOptions,
|
||||
ignoreEmptyLines = ignoreEmptyLines,
|
||||
allowMissingColumns = allowMissingColumns,
|
||||
ignoreExcessColumns = ignoreExcessColumns,
|
||||
quote = quote,
|
||||
ignoreSurroundingSpaces = ignoreSurroundingSpaces,
|
||||
trimInsideQuoted = trimInsideQuoted,
|
||||
parseParallel = parseParallel,
|
||||
adjustCsvSpecs = ADJUST_CSV_SPECS,
|
||||
)
|
||||
+48
@@ -0,0 +1,48 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.AnyFrame
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMMENT_CHAR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CSV_DELIMITER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ESCAPE_CHAR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER_COMMENTS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INCLUDE_HEADER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE_MODE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.RECORD_SEPARATOR
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
|
||||
|
||||
/**
|
||||
* @include [CommonWriteDelimDocs.CsvDocs]
|
||||
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Convert
|
||||
* @set [CommonWriteDelimDocs.DATA_TITLE] String
|
||||
* @set [CommonWriteDelimDocs.DATA] [String]
|
||||
* @include [CSV_DELIMITER]
|
||||
* @include [CommonWriteDelimDocs.CommonWriteParams]
|
||||
*/
|
||||
public fun AnyFrame.toCsvStr(
|
||||
delimiter: Char = CSV_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
): String =
|
||||
buildString {
|
||||
writeDelimImpl(
|
||||
df = this@toCsvStr,
|
||||
writer = this,
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = ADJUST_CSV_FORMAT,
|
||||
)
|
||||
}
|
||||
+48
@@ -0,0 +1,48 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.AnyFrame
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMMENT_CHAR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.DELIM_DELIMITER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ESCAPE_CHAR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER_COMMENTS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INCLUDE_HEADER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE_MODE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.RECORD_SEPARATOR
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
|
||||
|
||||
/**
|
||||
* @include [CommonWriteDelimDocs.DelimDocs]
|
||||
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Convert
|
||||
* @set [CommonWriteDelimDocs.DATA_TITLE] String
|
||||
* @set [CommonWriteDelimDocs.DATA] [String]
|
||||
* @include [DELIM_DELIMITER]
|
||||
* @include [CommonWriteDelimDocs.CommonWriteParams]
|
||||
*/
|
||||
public fun AnyFrame.toDelimStr(
|
||||
delimiter: Char = DELIM_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
): String =
|
||||
buildString {
|
||||
writeDelimImpl(
|
||||
df = this@toDelimStr,
|
||||
writer = this,
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = ADJUST_CSV_FORMAT,
|
||||
)
|
||||
}
|
||||
+48
@@ -0,0 +1,48 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.AnyFrame
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMMENT_CHAR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ESCAPE_CHAR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER_COMMENTS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INCLUDE_HEADER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE_MODE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.RECORD_SEPARATOR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TSV_DELIMITER
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
|
||||
|
||||
/**
|
||||
* @include [CommonWriteDelimDocs.TsvDocs]
|
||||
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Convert
|
||||
* @set [CommonWriteDelimDocs.DATA_TITLE] String
|
||||
* @set [CommonWriteDelimDocs.DATA] [String]
|
||||
* @include [TSV_DELIMITER]
|
||||
* @include [CommonWriteDelimDocs.CommonWriteParams]
|
||||
*/
|
||||
public fun AnyFrame.toTsvStr(
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
delimiter: Char = TSV_DELIMITER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
): String =
|
||||
buildString {
|
||||
writeDelimImpl(
|
||||
df = this@toTsvStr,
|
||||
writer = this,
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = ADJUST_CSV_FORMAT,
|
||||
)
|
||||
}
|
||||
+39
@@ -0,0 +1,39 @@
|
||||
@file:JvmName("TsvDeephavenKt")
|
||||
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.codeGen.AbstractDefaultReadMethod
|
||||
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams
|
||||
import java.io.File
|
||||
import java.io.InputStream
|
||||
import java.nio.file.Path
|
||||
import kotlin.reflect.typeOf
|
||||
|
||||
public class TsvDeephaven(private val delimiter: Char = DelimParams.TSV_DELIMITER) : SupportedDataFrameFormat {
|
||||
override fun readDataFrame(stream: InputStream, header: List<String>): DataFrame<*> =
|
||||
DataFrame.readTsv(inputStream = stream, header = header, delimiter = delimiter)
|
||||
|
||||
override fun readDataFrame(file: File, header: List<String>): DataFrame<*> =
|
||||
DataFrame.readTsv(file = file, header = header, delimiter = delimiter)
|
||||
|
||||
override fun readDataFrame(path: Path, header: List<String>): DataFrame<*> =
|
||||
DataFrame.readTsv(path = path, header = header, delimiter = delimiter)
|
||||
|
||||
override fun acceptsExtension(ext: String): Boolean = ext == "tsv"
|
||||
|
||||
override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough
|
||||
|
||||
override val testOrder: Int = 30_000
|
||||
|
||||
override fun createDefaultReadMethod(pathRepresentation: String?): DefaultReadDfMethod {
|
||||
val arguments = MethodArguments().add("delimiter", typeOf<Char>(), "'%L'", delimiter)
|
||||
return DefaultReadTsvMethod(pathRepresentation, arguments)
|
||||
}
|
||||
}
|
||||
|
||||
private const val READ_TSV = "readTsv"
|
||||
|
||||
internal class DefaultReadTsvMethod(path: String?, arguments: MethodArguments) :
|
||||
AbstractDefaultReadMethod(path, arguments, READ_TSV)
|
||||
+29
@@ -0,0 +1,29 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import io.deephaven.csv.CsvSpecs
|
||||
import org.apache.commons.csv.CSVFormat
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.ExcludeFromSources
|
||||
|
||||
/** [\["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"\]][DEFAULT_DELIM_NULL_STRINGS] */
|
||||
@ExcludeFromSources
|
||||
internal typealias DefaultNullStringsContentLink = Nothing
|
||||
|
||||
/**
|
||||
* Default strings that are considered null when reading CSV / TSV / delim files:
|
||||
*
|
||||
* @include [DefaultNullStringsContentLink]
|
||||
*/
|
||||
public val DEFAULT_DELIM_NULL_STRINGS: Set<String> =
|
||||
setOf("", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil")
|
||||
|
||||
/**
|
||||
* Typealias for `CsvSpecs.Builder.(CsvSpecs.Builder) -> CsvSpecs.Builder`.
|
||||
* A lambda where you can overwrite or adjust any of the CSV specs.
|
||||
*/
|
||||
public typealias AdjustCsvSpecs = CsvSpecs.Builder.(CsvSpecs.Builder) -> CsvSpecs.Builder
|
||||
|
||||
/**
|
||||
* Typealias for `CSVFormat.Builder.(CSVFormat.Builder) -> CSVFormat.Builder`.
|
||||
* A lambda where you can overwrite or adjust any of the CSV format options.
|
||||
*/
|
||||
public typealias AdjustCSVFormat = CSVFormat.Builder.(CSVFormat.Builder) -> CSVFormat.Builder
|
||||
+162
@@ -0,0 +1,162 @@
|
||||
@file:JvmName("WriteCsvDeephavenKt")
|
||||
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.AnyFrame
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMMENT_CHAR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.CSV_DELIMITER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ESCAPE_CHAR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FILE_WRITE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER_COMMENTS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INCLUDE_HEADER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PATH_WRITE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE_MODE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.RECORD_SEPARATOR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.WRITER_WRITE
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
|
||||
import java.io.File
|
||||
import java.io.FileWriter
|
||||
import java.nio.file.Path
|
||||
import kotlin.io.path.writer
|
||||
|
||||
/**
|
||||
* @include [CommonWriteDelimDocs.CsvDocs]
|
||||
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Write
|
||||
* @set [CommonWriteDelimDocs.DATA_TITLE] File
|
||||
* @set [CommonWriteDelimDocs.DATA] file
|
||||
* @include [PATH_WRITE]
|
||||
* @include [CSV_DELIMITER]
|
||||
* @include [CommonWriteDelimDocs.CommonWriteParams]
|
||||
*/
|
||||
public fun AnyFrame.writeCsv(
|
||||
path: Path,
|
||||
delimiter: Char = CSV_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
): Unit =
|
||||
writeDelimImpl(
|
||||
df = this,
|
||||
writer = path.writer(),
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = ADJUST_CSV_FORMAT,
|
||||
)
|
||||
|
||||
/**
|
||||
* @include [CommonWriteDelimDocs.CsvDocs]
|
||||
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Write
|
||||
* @set [CommonWriteDelimDocs.DATA_TITLE] File
|
||||
* @set [CommonWriteDelimDocs.DATA] file
|
||||
* @include [FILE_WRITE]
|
||||
* @include [CSV_DELIMITER]
|
||||
* @include [CommonWriteDelimDocs.CommonWriteParams]
|
||||
*/
|
||||
public fun AnyFrame.writeCsv(
|
||||
file: File,
|
||||
delimiter: Char = CSV_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
): Unit =
|
||||
writeDelimImpl(
|
||||
df = this,
|
||||
writer = FileWriter(file),
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = ADJUST_CSV_FORMAT,
|
||||
)
|
||||
|
||||
/**
|
||||
* @include [CommonWriteDelimDocs.CsvDocs]
|
||||
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Write
|
||||
* @set [CommonWriteDelimDocs.DATA_TITLE] File
|
||||
* @set [CommonWriteDelimDocs.DATA] file
|
||||
* @include [PATH_WRITE]
|
||||
* @include [CSV_DELIMITER]
|
||||
* @include [CommonWriteDelimDocs.CommonWriteParams]
|
||||
*/
|
||||
public fun AnyFrame.writeCsv(
|
||||
path: String,
|
||||
delimiter: Char = CSV_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
): Unit =
|
||||
writeDelimImpl(
|
||||
df = this,
|
||||
writer = FileWriter(path),
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = ADJUST_CSV_FORMAT,
|
||||
)
|
||||
|
||||
/**
|
||||
* {@comment only one with adjustCsvFormat}
|
||||
* @include [CommonWriteDelimDocs.CsvDocs]
|
||||
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Write
|
||||
* @set [CommonWriteDelimDocs.DATA_TITLE] Appendable
|
||||
* @set [CommonWriteDelimDocs.DATA] [Appendable]
|
||||
* @include [WRITER_WRITE]
|
||||
* @include [CSV_DELIMITER]
|
||||
* @include [CommonWriteDelimDocs.CommonWriteParams]
|
||||
* @include [ADJUST_CSV_FORMAT]
|
||||
*/
|
||||
public fun AnyFrame.writeCsv(
|
||||
writer: Appendable,
|
||||
delimiter: Char = CSV_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
adjustCsvFormat: AdjustCSVFormat = ADJUST_CSV_FORMAT,
|
||||
): Unit =
|
||||
writeDelimImpl(
|
||||
df = this,
|
||||
writer = writer,
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = adjustCsvFormat,
|
||||
)
|
||||
+162
@@ -0,0 +1,162 @@
|
||||
@file:JvmName("WriteDelimDeephavenKt")
|
||||
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.AnyFrame
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMMENT_CHAR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.DELIM_DELIMITER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ESCAPE_CHAR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FILE_WRITE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER_COMMENTS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INCLUDE_HEADER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PATH_WRITE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE_MODE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.RECORD_SEPARATOR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.WRITER_WRITE
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
|
||||
import java.io.File
|
||||
import java.io.FileWriter
|
||||
import java.nio.file.Path
|
||||
import kotlin.io.path.writer
|
||||
|
||||
/**
|
||||
* @include [CommonWriteDelimDocs.DelimDocs]
|
||||
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Write
|
||||
* @set [CommonWriteDelimDocs.DATA_TITLE] File
|
||||
* @set [CommonWriteDelimDocs.DATA] file
|
||||
* @include [PATH_WRITE]
|
||||
* @include [DELIM_DELIMITER]
|
||||
* @include [CommonWriteDelimDocs.CommonWriteParams]
|
||||
*/
|
||||
public fun AnyFrame.writeDelim(
|
||||
path: Path,
|
||||
delimiter: Char = DELIM_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
): Unit =
|
||||
writeDelimImpl(
|
||||
df = this,
|
||||
writer = path.writer(),
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = ADJUST_CSV_FORMAT,
|
||||
)
|
||||
|
||||
/**
|
||||
* @include [CommonWriteDelimDocs.DelimDocs]
|
||||
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Write
|
||||
* @set [CommonWriteDelimDocs.DATA_TITLE] File
|
||||
* @set [CommonWriteDelimDocs.DATA] file
|
||||
* @include [FILE_WRITE]
|
||||
* @include [DELIM_DELIMITER]
|
||||
* @include [CommonWriteDelimDocs.CommonWriteParams]
|
||||
*/
|
||||
public fun AnyFrame.writeDelim(
|
||||
file: File,
|
||||
delimiter: Char = DELIM_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
): Unit =
|
||||
writeDelimImpl(
|
||||
df = this,
|
||||
writer = FileWriter(file),
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = ADJUST_CSV_FORMAT,
|
||||
)
|
||||
|
||||
/**
|
||||
* @include [CommonWriteDelimDocs.DelimDocs]
|
||||
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Write
|
||||
* @set [CommonWriteDelimDocs.DATA_TITLE] File
|
||||
* @set [CommonWriteDelimDocs.DATA] file
|
||||
* @include [PATH_WRITE]
|
||||
* @include [DELIM_DELIMITER]
|
||||
* @include [CommonWriteDelimDocs.CommonWriteParams]
|
||||
*/
|
||||
public fun AnyFrame.writeDelim(
|
||||
path: String,
|
||||
delimiter: Char = DELIM_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
): Unit =
|
||||
writeDelimImpl(
|
||||
df = this,
|
||||
writer = FileWriter(path),
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = ADJUST_CSV_FORMAT,
|
||||
)
|
||||
|
||||
/**
|
||||
* {@comment only one with adjustCsvFormat}
|
||||
* @include [CommonWriteDelimDocs.DelimDocs]
|
||||
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Write
|
||||
* @set [CommonWriteDelimDocs.DATA_TITLE] Appendable
|
||||
* @set [CommonWriteDelimDocs.DATA] [Appendable]
|
||||
* @include [WRITER_WRITE]
|
||||
* @include [DELIM_DELIMITER]
|
||||
* @include [CommonWriteDelimDocs.CommonWriteParams]
|
||||
* @include [ADJUST_CSV_FORMAT]
|
||||
*/
|
||||
public fun AnyFrame.writeDelim(
|
||||
writer: Appendable,
|
||||
delimiter: Char = DELIM_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
adjustCsvFormat: AdjustCSVFormat = ADJUST_CSV_FORMAT,
|
||||
): Unit =
|
||||
writeDelimImpl(
|
||||
df = this,
|
||||
writer = writer,
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = adjustCsvFormat,
|
||||
)
|
||||
+162
@@ -0,0 +1,162 @@
|
||||
@file:JvmName("WriteTsvDeephavenKt")
|
||||
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.AnyFrame
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.CommonWriteDelimDocs
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ADJUST_CSV_FORMAT
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.COMMENT_CHAR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.ESCAPE_CHAR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.FILE_WRITE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.HEADER_COMMENTS
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.INCLUDE_HEADER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.PATH_WRITE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.QUOTE_MODE
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.RECORD_SEPARATOR
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.TSV_DELIMITER
|
||||
import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams.WRITER_WRITE
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.writeDelimImpl
|
||||
import java.io.File
|
||||
import java.io.FileWriter
|
||||
import java.nio.file.Path
|
||||
import kotlin.io.path.writer
|
||||
|
||||
/**
|
||||
* @include [CommonWriteDelimDocs.TsvDocs]
|
||||
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Write
|
||||
* @set [CommonWriteDelimDocs.DATA_TITLE] File
|
||||
* @set [CommonWriteDelimDocs.DATA] file
|
||||
* @include [PATH_WRITE]
|
||||
* @include [TSV_DELIMITER]
|
||||
* @include [CommonWriteDelimDocs.CommonWriteParams]
|
||||
*/
|
||||
public fun AnyFrame.writeTsv(
|
||||
path: Path,
|
||||
delimiter: Char = TSV_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
): Unit =
|
||||
writeDelimImpl(
|
||||
df = this,
|
||||
writer = path.writer(),
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = ADJUST_CSV_FORMAT,
|
||||
)
|
||||
|
||||
/**
|
||||
* @include [CommonWriteDelimDocs.TsvDocs]
|
||||
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Write
|
||||
* @set [CommonWriteDelimDocs.DATA_TITLE] File
|
||||
* @set [CommonWriteDelimDocs.DATA] file
|
||||
* @include [FILE_WRITE]
|
||||
* @include [TSV_DELIMITER]
|
||||
* @include [CommonWriteDelimDocs.CommonWriteParams]
|
||||
*/
|
||||
public fun AnyFrame.writeTsv(
|
||||
file: File,
|
||||
delimiter: Char = TSV_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
): Unit =
|
||||
writeDelimImpl(
|
||||
df = this,
|
||||
writer = FileWriter(file),
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = ADJUST_CSV_FORMAT,
|
||||
)
|
||||
|
||||
/**
|
||||
* @include [CommonWriteDelimDocs.TsvDocs]
|
||||
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Write
|
||||
* @set [CommonWriteDelimDocs.DATA_TITLE] File
|
||||
* @set [CommonWriteDelimDocs.DATA] file
|
||||
* @include [PATH_WRITE]
|
||||
* @include [TSV_DELIMITER]
|
||||
* @include [CommonWriteDelimDocs.CommonWriteParams]
|
||||
*/
|
||||
public fun AnyFrame.writeTsv(
|
||||
path: String,
|
||||
delimiter: Char = TSV_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
): Unit =
|
||||
writeDelimImpl(
|
||||
df = this,
|
||||
writer = FileWriter(path),
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = ADJUST_CSV_FORMAT,
|
||||
)
|
||||
|
||||
/**
|
||||
* {@comment only one with adjustCsvFormat}
|
||||
* @include [CommonWriteDelimDocs.TsvDocs]
|
||||
* @set [CommonWriteDelimDocs.WRITE_OR_CONVERT] Write
|
||||
* @set [CommonWriteDelimDocs.DATA_TITLE] Appendable
|
||||
* @set [CommonWriteDelimDocs.DATA] [Appendable]
|
||||
* @include [WRITER_WRITE]
|
||||
* @include [TSV_DELIMITER]
|
||||
* @include [CommonWriteDelimDocs.CommonWriteParams]
|
||||
* @include [ADJUST_CSV_FORMAT]
|
||||
*/
|
||||
public fun AnyFrame.writeTsv(
|
||||
writer: Appendable,
|
||||
delimiter: Char = TSV_DELIMITER,
|
||||
includeHeader: Boolean = INCLUDE_HEADER,
|
||||
quote: Char? = QUOTE,
|
||||
quoteMode: QuoteMode = QUOTE_MODE,
|
||||
escapeChar: Char? = ESCAPE_CHAR,
|
||||
commentChar: Char? = COMMENT_CHAR,
|
||||
headerComments: List<String> = HEADER_COMMENTS,
|
||||
recordSeparator: String = RECORD_SEPARATOR,
|
||||
adjustCsvFormat: AdjustCSVFormat = ADJUST_CSV_FORMAT,
|
||||
): Unit =
|
||||
writeDelimImpl(
|
||||
df = this,
|
||||
writer = writer,
|
||||
delimiter = delimiter,
|
||||
includeHeader = includeHeader,
|
||||
quote = quote,
|
||||
quoteMode = quoteMode,
|
||||
escapeChar = escapeChar,
|
||||
commentChar = commentChar,
|
||||
headerComments = headerComments,
|
||||
recordSeparator = recordSeparator,
|
||||
adjustCsvFormat = adjustCsvFormat,
|
||||
)
|
||||
Vendored
+30
@@ -0,0 +1,30 @@
|
||||
@file:JvmName("CsvDeprecationMessagesKt")
|
||||
|
||||
package org.jetbrains.kotlinx.dataframe.util
|
||||
|
||||
/*
|
||||
* This file contains deprecation messages for the whole core module.
|
||||
* After each release, all messages should be reviewed and updated.
|
||||
* Level.WARNING -> Level.ERROR
|
||||
* Level.ERROR -> Remove
|
||||
*/
|
||||
|
||||
// region WARNING in 0.15, ERROR in 1.0
|
||||
|
||||
private const val MESSAGE_1_0 = "Will be ERROR in 1.0."
|
||||
|
||||
internal const val READ_CSV_BINARY_COMPATIBILITY = "This overload is here to maintain binary compatibility."
|
||||
internal const val READ_TSV_BINARY_COMPATIBILITY = "This overload is here to maintain binary compatibility."
|
||||
internal const val READ_DELIM_BINARY_COMPATIBILITY = "This overload is here to maintain binary compatibility."
|
||||
|
||||
// endregion
|
||||
|
||||
// region WARNING in 1.0, ERROR in 1.1
|
||||
|
||||
private const val MESSAGE_1_1 = "Will be ERROR in 1.1."
|
||||
|
||||
// endregion
|
||||
|
||||
// region keep across releases
|
||||
|
||||
// endregion
|
||||
+2
@@ -0,0 +1,2 @@
|
||||
org.jetbrains.kotlinx.dataframe.io.CsvDeephaven
|
||||
org.jetbrains.kotlinx.dataframe.io.TsvDeephaven
|
||||
Vendored
+54
@@ -0,0 +1,54 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.openjdk.jmh.annotations.Benchmark
|
||||
import org.openjdk.jmh.annotations.BenchmarkMode
|
||||
import org.openjdk.jmh.annotations.Measurement
|
||||
import org.openjdk.jmh.annotations.Mode
|
||||
import org.openjdk.jmh.annotations.Param
|
||||
import org.openjdk.jmh.annotations.Scope
|
||||
import org.openjdk.jmh.annotations.Setup
|
||||
import org.openjdk.jmh.annotations.State
|
||||
import org.openjdk.jmh.annotations.TearDown
|
||||
import org.openjdk.jmh.annotations.Warmup
|
||||
import java.io.File
|
||||
import java.util.concurrent.TimeUnit
|
||||
|
||||
@BenchmarkMode(Mode.SingleShotTime)
|
||||
@Warmup(iterations = 10, time = 5, timeUnit = TimeUnit.SECONDS)
|
||||
@Measurement(iterations = 10, timeUnit = TimeUnit.SECONDS)
|
||||
@State(Scope.Benchmark)
|
||||
open class BenchmarkTest {
|
||||
|
||||
@Param("small", "medium", "large")
|
||||
var type = ""
|
||||
var file: File? = null
|
||||
|
||||
@Setup
|
||||
fun setup() {
|
||||
System.setProperty("org.slf4j.simpleLogger.defaultLogLevel", "info")
|
||||
file = File(
|
||||
"src/test/resources/" + when (type) {
|
||||
"small" -> "testCSV.csv"
|
||||
"medium" -> "gross-domestic-product-june-2024-quarter.csv"
|
||||
"large" -> "largeCsv.csv.gz"
|
||||
else -> throw IllegalArgumentException("Invalid type")
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
@TearDown
|
||||
fun tearDown() {
|
||||
file = null
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
fun apache() {
|
||||
DataFrame.readCSV(file!!)
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
fun deephaven() {
|
||||
DataFrame.readCsv(file!!)
|
||||
}
|
||||
}
|
||||
Vendored
+887
@@ -0,0 +1,887 @@
|
||||
package org.jetbrains.kotlinx.dataframe.io
|
||||
|
||||
import io.deephaven.csv.parsers.Parsers
|
||||
import io.kotest.assertions.throwables.shouldNotThrowAny
|
||||
import io.kotest.assertions.throwables.shouldThrow
|
||||
import io.kotest.matchers.collections.shouldContainInOrder
|
||||
import io.kotest.matchers.nulls.shouldNotBeNull
|
||||
import io.kotest.matchers.shouldBe
|
||||
import io.kotest.matchers.shouldNotBe
|
||||
import kotlinx.datetime.LocalDate
|
||||
import kotlinx.datetime.LocalDateTime
|
||||
import org.intellij.lang.annotations.Language
|
||||
import org.jetbrains.kotlinx.dataframe.DataFrame
|
||||
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
|
||||
import org.jetbrains.kotlinx.dataframe.api.allNulls
|
||||
import org.jetbrains.kotlinx.dataframe.api.convert
|
||||
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
|
||||
import org.jetbrains.kotlinx.dataframe.api.group
|
||||
import org.jetbrains.kotlinx.dataframe.api.groupBy
|
||||
import org.jetbrains.kotlinx.dataframe.api.into
|
||||
import org.jetbrains.kotlinx.dataframe.api.isEmpty
|
||||
import org.jetbrains.kotlinx.dataframe.api.parser
|
||||
import org.jetbrains.kotlinx.dataframe.api.print
|
||||
import org.jetbrains.kotlinx.dataframe.api.schema
|
||||
import org.jetbrains.kotlinx.dataframe.api.toStr
|
||||
import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser
|
||||
import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema
|
||||
import org.junit.After
|
||||
import org.junit.Before
|
||||
import org.junit.Test
|
||||
import java.io.File
|
||||
import java.io.StringWriter
|
||||
import java.math.BigDecimal
|
||||
import java.net.URL
|
||||
import java.util.Locale
|
||||
import java.util.zip.GZIPInputStream
|
||||
import kotlin.reflect.KClass
|
||||
import kotlin.reflect.typeOf
|
||||
import kotlin.time.Instant as StdlibInstant
|
||||
import kotlinx.datetime.Instant as DeprecatedInstant
|
||||
|
||||
// can be enabled for showing logs for these tests
|
||||
private const val SHOW_LOGS = false
|
||||
|
||||
@Suppress("ktlint:standard:argument-list-wrapping")
|
||||
class DelimCsvTsvTests {
|
||||
|
||||
private val logLevel = "org.slf4j.simpleLogger.log.${FastDoubleParser::class.qualifiedName}"
|
||||
private var loggerBefore: String? = null
|
||||
|
||||
@Before
|
||||
fun setLogger() {
|
||||
if (!SHOW_LOGS) return
|
||||
loggerBefore = System.getProperty(logLevel)
|
||||
System.setProperty(logLevel, "trace")
|
||||
}
|
||||
|
||||
@After
|
||||
fun restoreLogger() {
|
||||
if (!SHOW_LOGS) return
|
||||
if (loggerBefore != null) {
|
||||
System.setProperty(logLevel, loggerBefore)
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun readNulls() {
|
||||
@Language("CSV")
|
||||
val src =
|
||||
"""
|
||||
first,second
|
||||
2,,
|
||||
3,,
|
||||
""".trimIndent()
|
||||
val df = DataFrame.readCsvStr(src)
|
||||
df.rowsCount() shouldBe 2
|
||||
df.columnsCount() shouldBe 2
|
||||
df["first"].type() shouldBe typeOf<Int>()
|
||||
df["second"].allNulls() shouldBe true
|
||||
df["second"].type() shouldBe typeOf<String?>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun write() {
|
||||
val df = dataFrameOf("col1", "col2")(
|
||||
1, null,
|
||||
2, null,
|
||||
).convert("col2").toStr()
|
||||
|
||||
val str = StringWriter()
|
||||
df.writeCsv(str)
|
||||
|
||||
val res = DataFrame.readCsvStr(str.buffer.toString())
|
||||
|
||||
res shouldBe df
|
||||
}
|
||||
|
||||
@Test
|
||||
fun readCsv() {
|
||||
val df = DataFrame.read(simpleCsv)
|
||||
|
||||
df.columnsCount() shouldBe 11
|
||||
df.rowsCount() shouldBe 5
|
||||
df.columnNames()[5] shouldBe "duplicate1"
|
||||
df.columnNames()[6] shouldBe "duplicate11"
|
||||
df["duplicate1"].type() shouldBe typeOf<Char?>()
|
||||
df["double"].type() shouldBe typeOf<Double?>()
|
||||
df["number"].type() shouldBe typeOf<Double>()
|
||||
df["time"].type() shouldBe typeOf<LocalDateTime>()
|
||||
|
||||
df.print(columnTypes = true, borders = true, title = true)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `readCsv different charset`() {
|
||||
val df = DataFrame.readCsv(simpleCsv)
|
||||
|
||||
DataFrame.readCsv(simpleCsvUtf16le) shouldBe df
|
||||
DataFrame.readCsv(simpleCsvUtf16le, charset = Charsets.UTF_16LE) shouldBe df
|
||||
DataFrame.readCsv(simpleCsvUtf16le, charset = Charsets.UTF_16BE) shouldNotBe df
|
||||
DataFrame.readCsv(simpleCsvUtf16le, charset = Charsets.UTF_8) shouldNotBe df
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `readCsv gz compressed different charset`() {
|
||||
val df = DataFrame.readCsv(simpleCsv)
|
||||
|
||||
DataFrame.readCsv(simpleCsvUtf16leGz) shouldBe df
|
||||
DataFrame.readCsv(simpleCsvUtf16leGz, charset = Charsets.UTF_16LE) shouldBe df
|
||||
DataFrame.readCsv(simpleCsvUtf16leGz, charset = Charsets.UTF_16BE) shouldNotBe df
|
||||
DataFrame.readCsv(simpleCsvUtf16leGz, charset = Charsets.UTF_8) shouldNotBe df
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `readCsv zip compressed different charset`() {
|
||||
val df = DataFrame.readCsv(simpleCsv)
|
||||
|
||||
DataFrame.readCsv(simpleCsvUtf16leZip) shouldBe df
|
||||
DataFrame.readCsv(simpleCsvUtf16leZip, charset = Charsets.UTF_16LE) shouldBe df
|
||||
DataFrame.readCsv(simpleCsvUtf16leZip, charset = Charsets.UTF_16BE) shouldNotBe df
|
||||
DataFrame.readCsv(simpleCsvUtf16leZip, charset = Charsets.UTF_8) shouldNotBe df
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read ZIP Csv`() {
|
||||
DataFrame.readCsv(simpleCsvZip) shouldBe DataFrame.readCsv(simpleCsv)
|
||||
|
||||
shouldThrow<IllegalStateException> {
|
||||
DataFrame.readCsv(notCsv)
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read GZ Csv`() {
|
||||
DataFrame.readCsv(simpleCsvGz) shouldBe DataFrame.readCsv(simpleCsv)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read custom compression Csv`() {
|
||||
DataFrame.readCsv(
|
||||
simpleCsvGz,
|
||||
compression = Compression(::GZIPInputStream),
|
||||
) shouldBe DataFrame.readCsv(simpleCsv)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read 2 compressed Csv`() {
|
||||
shouldThrow<IllegalArgumentException> { DataFrame.readCsv(twoCsvsZip) }
|
||||
}
|
||||
|
||||
@Test
|
||||
fun readCsvWithFrenchLocaleAndAlternativeDelimiter() {
|
||||
val df = DataFrame.readCsv(
|
||||
url = csvWithFrenchLocale,
|
||||
delimiter = ';',
|
||||
parserOptions = ParserOptions(locale = Locale.FRENCH),
|
||||
)
|
||||
|
||||
df.columnsCount() shouldBe 11
|
||||
df.rowsCount() shouldBe 5
|
||||
df.columnNames()[5] shouldBe "duplicate1"
|
||||
df.columnNames()[6] shouldBe "duplicate11"
|
||||
df["duplicate1"].type() shouldBe typeOf<Char?>()
|
||||
df["double"].type() shouldBe typeOf<Double?>()
|
||||
df["number"].type() shouldBe typeOf<Double>()
|
||||
df["time"].type() shouldBe typeOf<LocalDateTime>()
|
||||
|
||||
println(df)
|
||||
}
|
||||
|
||||
private fun assertColumnType(columnName: String, kClass: KClass<*>, schema: DataFrameSchema) {
|
||||
val col = schema.columns[columnName]
|
||||
col.shouldNotBeNull()
|
||||
col.type.classifier shouldBe kClass
|
||||
}
|
||||
|
||||
@Test
|
||||
fun readCsvWithFloats() {
|
||||
val df = DataFrame.readCsv(wineCsv, delimiter = ';')
|
||||
val schema = df.schema()
|
||||
|
||||
assertColumnType("citric acid", Double::class, schema)
|
||||
assertColumnType("alcohol", Double::class, schema)
|
||||
assertColumnType("quality", Int::class, schema)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read standard CSV with floats when user has alternative locale`() {
|
||||
val currentLocale = Locale.getDefault()
|
||||
try {
|
||||
Locale.setDefault(Locale.forLanguageTag("ru-RU"))
|
||||
val df = DataFrame.readCsv(wineCsv, delimiter = ';')
|
||||
val schema = df.schema()
|
||||
|
||||
assertColumnType("citric acid", Double::class, schema)
|
||||
assertColumnType("alcohol", Double::class, schema)
|
||||
assertColumnType("quality", Int::class, schema)
|
||||
} finally {
|
||||
Locale.setDefault(currentLocale)
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read with custom header`() {
|
||||
val header = ('A'..'K').map { it.toString() }
|
||||
val df = DataFrame.readCsv(simpleCsv, header = header, skipLines = 1)
|
||||
df.columnNames() shouldBe header
|
||||
df["B"].type() shouldBe typeOf<Int>()
|
||||
|
||||
val headerShort = ('A'..'E').map { it.toString() }
|
||||
val dfShort = DataFrame.readCsv(simpleCsv, header = headerShort, skipLines = 1)
|
||||
dfShort.columnsCount() shouldBe 5
|
||||
dfShort.columnNames() shouldBe headerShort
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read first rows`() {
|
||||
val expected =
|
||||
listOf(
|
||||
"untitled",
|
||||
"user_id",
|
||||
"name",
|
||||
"duplicate",
|
||||
"username",
|
||||
"duplicate1",
|
||||
"duplicate11",
|
||||
"double",
|
||||
"number",
|
||||
"time",
|
||||
"empty",
|
||||
)
|
||||
val dfHeader = DataFrame.readCsv(simpleCsv, readLines = 0)
|
||||
dfHeader.rowsCount() shouldBe 0
|
||||
dfHeader.columnNames() shouldBe expected
|
||||
|
||||
val dfThree = DataFrame.readCsv(simpleCsv, readLines = 3)
|
||||
dfThree.rowsCount() shouldBe 3
|
||||
|
||||
val dfFull = DataFrame.readCsv(simpleCsv, readLines = 10)
|
||||
dfFull.rowsCount() shouldBe 5
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `if string starts with a number, it should be parsed as a string anyway`() {
|
||||
@Language("CSV")
|
||||
val df = DataFrame.readCsvStr(
|
||||
"""
|
||||
duration,floatDuration
|
||||
12 min,1.0
|
||||
15,12.98 sec
|
||||
1 Season,0.9 parsec
|
||||
""".trimIndent(),
|
||||
)
|
||||
df["duration"].type() shouldBe typeOf<String>()
|
||||
df["floatDuration"].type() shouldBe typeOf<String>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `if record has fewer columns than header then pad it with nulls`() {
|
||||
@Language("CSV")
|
||||
val csvContent =
|
||||
"""
|
||||
col1,col2,col3
|
||||
568,801,587
|
||||
780,588
|
||||
""".trimIndent()
|
||||
|
||||
val df = shouldNotThrowAny {
|
||||
DataFrame.readCsvStr(csvContent)
|
||||
}
|
||||
|
||||
df shouldBe dataFrameOf("col1", "col2", "col3")(
|
||||
568, 801, 587,
|
||||
780, 588, null,
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `write and read frame column`() {
|
||||
val df = dataFrameOf("a", "b", "c")(
|
||||
1, 2, 3,
|
||||
1, 3, 2,
|
||||
2, 1, 3,
|
||||
)
|
||||
val grouped = df.groupBy("a").into("g")
|
||||
val str = grouped.toCsvStr(escapeChar = null)
|
||||
val res = DataFrame.readCsvStr(str, quote = '"')
|
||||
res shouldBe grouped
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `write and read column group`() {
|
||||
val df = dataFrameOf("a", "b", "c")(
|
||||
1, 2, 3,
|
||||
1, 3, 2,
|
||||
)
|
||||
val grouped = df.group("b", "c").into("d")
|
||||
val str = grouped.toCsvStr()
|
||||
val res = DataFrame.readCsvStr(str)
|
||||
res shouldBe grouped
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `CSV String of saved dataframe starts with column name`() {
|
||||
val df = dataFrameOf("a")(1)
|
||||
df.toCsvStr().first() shouldBe 'a'
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `guess tsv`() {
|
||||
val df = DataFrame.read(testResource("abc.tsv"))
|
||||
df.columnsCount() shouldBe 3
|
||||
df.rowsCount() shouldBe 2
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `write csv without header produce correct file`() {
|
||||
val df = dataFrameOf("a", "b", "c")(
|
||||
1, 2, 3,
|
||||
1, 3, 2,
|
||||
)
|
||||
df.writeCsv(
|
||||
path = "src/test/resources/without_header.csv",
|
||||
includeHeader = false,
|
||||
recordSeparator = "\r\n",
|
||||
)
|
||||
val producedFile = File("src/test/resources/without_header.csv")
|
||||
producedFile.exists() shouldBe true
|
||||
producedFile.readText() shouldBe "1,2,3\r\n1,3,2\r\n"
|
||||
producedFile.delete()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `check integrity of example data`() {
|
||||
shouldThrow<IllegalStateException> {
|
||||
// cannot read file with blank line at the start
|
||||
DataFrame.readCsv("../data/jetbrains repositories.csv")
|
||||
}
|
||||
shouldThrow<IllegalStateException> {
|
||||
// ignoreEmptyLines only ignores intermediate empty lines
|
||||
DataFrame.readCsv("../data/jetbrains repositories.csv", ignoreEmptyLines = true)
|
||||
}
|
||||
|
||||
val df = DataFrame.readCsv(
|
||||
"../data/jetbrains repositories.csv",
|
||||
skipLines = 1, // we need to skip the empty lines manually
|
||||
)
|
||||
df.columnNames() shouldBe listOf("full_name", "html_url", "stargazers_count", "topics", "watchers")
|
||||
df.columnTypes() shouldBe listOf(
|
||||
typeOf<String>(),
|
||||
typeOf<URL>(),
|
||||
typeOf<Int>(),
|
||||
typeOf<String>(),
|
||||
typeOf<Int>(),
|
||||
)
|
||||
// same file without empty line at the beginning
|
||||
df shouldBe DataFrame.readCsv("../data/jetbrains_repositories.csv")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `readCsvStr delimiter`() {
|
||||
@Language("TSV")
|
||||
val tsv =
|
||||
"""
|
||||
a b c
|
||||
1 2 3
|
||||
""".trimIndent()
|
||||
val df = DataFrame.readCsvStr(tsv, '\t')
|
||||
df shouldBe dataFrameOf("a", "b", "c")(1, 2, 3)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `file with BOM`() {
|
||||
val df = DataFrame.readCsv(withBomCsv, delimiter = ';')
|
||||
df.columnNames() shouldBe listOf("Column1", "Column2")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read empty CSV`() {
|
||||
val emptyDelimStr = DataFrame.readCsvStr("")
|
||||
emptyDelimStr shouldBe DataFrame.empty()
|
||||
|
||||
val emptyWidthStr = DataFrame.readCsvStr("", hasFixedWidthColumns = true)
|
||||
emptyWidthStr shouldBe DataFrame.empty()
|
||||
|
||||
val emptyCsvFile = DataFrame.readCsv(File.createTempFile("empty", "csv"))
|
||||
emptyCsvFile shouldBe DataFrame.empty()
|
||||
|
||||
val emptyCsvFileManualHeader = DataFrame.readCsv(
|
||||
file = File.createTempFile("empty", "csv"),
|
||||
header = listOf("a", "b", "c"),
|
||||
)
|
||||
emptyCsvFileManualHeader.apply {
|
||||
isEmpty() shouldBe true
|
||||
columnNames() shouldBe listOf("a", "b", "c")
|
||||
columnTypes() shouldBe listOf(typeOf<String>(), typeOf<String>(), typeOf<String>())
|
||||
}
|
||||
|
||||
val emptyCsvFileWithHeader = DataFrame.readCsv(
|
||||
file = File.createTempFile("empty", "csv").also { it.writeText("a,b,c") },
|
||||
)
|
||||
emptyCsvFileWithHeader.apply {
|
||||
isEmpty() shouldBe true
|
||||
columnNames() shouldBe listOf("a", "b", "c")
|
||||
columnTypes() shouldBe listOf(typeOf<String>(), typeOf<String>(), typeOf<String>())
|
||||
}
|
||||
|
||||
val emptyTsvStr = DataFrame.readTsv(File.createTempFile("empty", "tsv"))
|
||||
emptyTsvStr shouldBe DataFrame.empty()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `read Csv with comments`() {
|
||||
@Language("CSV")
|
||||
val csv =
|
||||
"""
|
||||
# This is a comment
|
||||
a,b,c
|
||||
1,2,3
|
||||
""".trimIndent()
|
||||
val df = DataFrame.readCsvStr(csv, skipLines = 1L)
|
||||
df shouldBe dataFrameOf("a", "b", "c")(1, 2, 3)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `csv with empty lines`() {
|
||||
@Language("CSV")
|
||||
val csv =
|
||||
"""
|
||||
a,b,c
|
||||
1,2,3
|
||||
|
||||
4,5,6
|
||||
""".trimIndent()
|
||||
val df1 = DataFrame.readCsvStr(csv)
|
||||
df1 shouldBe dataFrameOf("a", "b", "c")(
|
||||
1, 2, 3,
|
||||
null, null, null,
|
||||
4, 5, 6,
|
||||
)
|
||||
|
||||
val df2 = DataFrame.readCsvStr(csv, ignoreEmptyLines = true)
|
||||
df2 shouldBe dataFrameOf("a", "b", "c")(
|
||||
1, 2, 3,
|
||||
4, 5, 6,
|
||||
)
|
||||
|
||||
shouldThrow<IllegalStateException> { DataFrame.readCsvStr(csv, allowMissingColumns = false) }
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `don't read folder`() {
|
||||
shouldThrow<IllegalArgumentException> { DataFrame.readCsv("") }
|
||||
shouldThrow<IllegalArgumentException> { DataFrame.readCsv("NON EXISTENT FILE") }
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `cannot auto-parse specific date string`() {
|
||||
@Language("csv")
|
||||
val frenchCsv =
|
||||
"""
|
||||
name; price; date;
|
||||
a;12,45; 05/06/2021;
|
||||
b;-13,35;14/07/2025;
|
||||
c;100 123,35;;
|
||||
d;-204 235,23;;
|
||||
e;NaN;;
|
||||
f;null;;
|
||||
""".trimIndent()
|
||||
|
||||
val dfDeephaven = DataFrame.readCsvStr(
|
||||
text = frenchCsv,
|
||||
delimiter = ';',
|
||||
)
|
||||
|
||||
// could not parse, remains String
|
||||
dfDeephaven["date"].type() shouldBe typeOf<String?>()
|
||||
|
||||
val dfDataFrame = DataFrame.readCsvStr(
|
||||
text = frenchCsv,
|
||||
delimiter = ';',
|
||||
// setting any locale skips deephaven's date parsing
|
||||
parserOptions = ParserOptions(locale = Locale.ROOT),
|
||||
)
|
||||
|
||||
// could not parse, remains String
|
||||
dfDataFrame["date"].type() shouldBe typeOf<String?>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `parse with other locales`() {
|
||||
@Language("csv")
|
||||
val frenchCsv =
|
||||
"""
|
||||
name; price; date;
|
||||
a;12,45; 05/06/2021;
|
||||
b;-13,35;14/07/2025;
|
||||
c;100 123,35;;
|
||||
d;-204 235,23;;
|
||||
e;NaN;;
|
||||
f;null;;
|
||||
""".trimIndent()
|
||||
|
||||
val frenchDf = DataFrame.readCsvStr(
|
||||
text = frenchCsv,
|
||||
delimiter = ';',
|
||||
parserOptions = ParserOptions(
|
||||
dateTimePattern = "dd/MM/yyyy",
|
||||
locale = Locale.FRENCH,
|
||||
),
|
||||
)
|
||||
|
||||
frenchDf["price"].type() shouldBe typeOf<Double?>()
|
||||
frenchDf["date"].type() shouldBe typeOf<LocalDate?>()
|
||||
|
||||
@Language("csv")
|
||||
val dutchCsv =
|
||||
"""
|
||||
name; price;
|
||||
a;12,45;
|
||||
b;-13,35;
|
||||
c;100.123,35;
|
||||
d;-204.235,23;
|
||||
e;NaN;
|
||||
f;null;
|
||||
""".trimIndent()
|
||||
|
||||
val dutchDf = DataFrame.readCsvStr(
|
||||
text = dutchCsv,
|
||||
delimiter = ';',
|
||||
parserOptions = ParserOptions(
|
||||
locale = Locale.forLanguageTag("nl-NL"),
|
||||
),
|
||||
)
|
||||
|
||||
dutchDf["price"].type() shouldBe typeOf<Double?>()
|
||||
|
||||
// skipping this test on windows due to lack of support for Arabic locales
|
||||
if (!System.getProperty("os.name").startsWith("Windows")) {
|
||||
// while negative numbers in RTL languages cannot be parsed thanks to Java, others work
|
||||
@Language("csv")
|
||||
val arabicCsv =
|
||||
"""
|
||||
الاسم; السعر;
|
||||
أ;١٢٫٤٥;
|
||||
ب;١٣٫٣٥;
|
||||
ج;١٠٠٫١٢٣;
|
||||
د;٢٠٤٫٢٣٥;
|
||||
هـ;ليس رقم;
|
||||
و;null;
|
||||
""".trimIndent()
|
||||
|
||||
val easternArabicDf = DataFrame.readCsvStr(
|
||||
arabicCsv,
|
||||
delimiter = ';',
|
||||
parserOptions = ParserOptions(
|
||||
locale = Locale.forLanguageTag("ar-001"),
|
||||
),
|
||||
)
|
||||
|
||||
easternArabicDf["السعر"].type() shouldBe typeOf<Double?>()
|
||||
easternArabicDf["الاسم"].type() shouldBe typeOf<String>() // apparently not a char
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `handle slightly mixed locales`() {
|
||||
@Language("csv")
|
||||
val estonianWrongMinus =
|
||||
"""
|
||||
name; price;
|
||||
a;12,45;
|
||||
b;-13,35;
|
||||
c;100 123,35;
|
||||
d;-204 235,23;
|
||||
e;NaN;
|
||||
f;null;
|
||||
""".trimIndent()
|
||||
|
||||
val estonianDf1 = DataFrame.readCsvStr(
|
||||
text = estonianWrongMinus,
|
||||
delimiter = ';',
|
||||
parserOptions = ParserOptions(
|
||||
locale = Locale.forLanguageTag("et-EE"),
|
||||
),
|
||||
)
|
||||
|
||||
estonianDf1["price"].type() shouldBe typeOf<Double?>()
|
||||
|
||||
// also test the global setting
|
||||
DataFrame.parser.locale = Locale.forLanguageTag("et-EE")
|
||||
|
||||
val estonianDf2 = DataFrame.readCsvStr(
|
||||
text = estonianWrongMinus,
|
||||
delimiter = ';',
|
||||
)
|
||||
estonianDf2 shouldBe estonianDf1
|
||||
|
||||
DataFrame.parser.resetToDefault()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `NA and custom null string in double column`() {
|
||||
val df1 = DataFrame.readCsv(
|
||||
msleepCsv,
|
||||
parserOptions = ParserOptions(
|
||||
nullStrings = DEFAULT_DELIM_NULL_STRINGS + "nothing",
|
||||
),
|
||||
)
|
||||
|
||||
df1["name"].type() shouldBe typeOf<String>()
|
||||
df1["genus"].type() shouldBe typeOf<String>()
|
||||
df1["vore"].type() shouldBe typeOf<String?>()
|
||||
df1["order"].type() shouldBe typeOf<String>()
|
||||
df1["conservation"].type() shouldBe typeOf<String?>()
|
||||
df1["sleep_total"].type() shouldBe typeOf<Double>()
|
||||
df1["sleep_rem"].type() shouldBe typeOf<Double?>()
|
||||
df1["sleep_cycle"].type() shouldBe typeOf<Double?>()
|
||||
df1["awake"].type() shouldBe typeOf<Double>()
|
||||
df1["brainwt"].type() shouldBe typeOf<Double?>()
|
||||
df1["bodywt"].type() shouldBe typeOf<Double?>()
|
||||
|
||||
// Also test the global setting
|
||||
DataFrame.parser.addNullString("nothing")
|
||||
DEFAULT_DELIM_NULL_STRINGS.forEach {
|
||||
DataFrame.parser.addNullString(it)
|
||||
}
|
||||
|
||||
val df2 = DataFrame.readCsv(msleepCsv)
|
||||
df2 shouldBe df1
|
||||
|
||||
DataFrame.parser.resetToDefault()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `multiple spaces as delimiter`() {
|
||||
@Language("csv")
|
||||
val csv =
|
||||
"""
|
||||
NAME STATUS AGE NUMBER LABELS
|
||||
argo-events Active 2y77d 1234 app.kubernetes.io/instance=argo-events,kubernetes.io/metadata.name=argo-events
|
||||
argo-workflows Active 2y77d 1234 app.kubernetes.io/instance=argo-workflows,kubernetes.io/metadata.name=argo-workflows
|
||||
argocd Active 5y18d 1234 kubernetes.io/metadata.name=argocd
|
||||
beta Active 4y235d 1234 kubernetes.io/metadata.name=beta
|
||||
""".trimIndent()
|
||||
|
||||
val df1 = DataFrame.readCsvStr(
|
||||
text = csv,
|
||||
hasFixedWidthColumns = true,
|
||||
)
|
||||
|
||||
df1["NAME"].type() shouldBe typeOf<String>()
|
||||
df1["STATUS"].type() shouldBe typeOf<String>()
|
||||
df1["AGE"].type() shouldBe typeOf<String>()
|
||||
df1["NUMBER"].type() shouldBe typeOf<Int>()
|
||||
df1["LABELS"].type() shouldBe typeOf<String>()
|
||||
|
||||
val df2 = DataFrame.readCsvStr(
|
||||
text = csv,
|
||||
hasFixedWidthColumns = true,
|
||||
fixedColumnWidths = listOf(25, 9, 9, 9, 100),
|
||||
skipLines = 1,
|
||||
header = listOf("name", "status", "age", "number", "labels"),
|
||||
)
|
||||
|
||||
df2["name"].type() shouldBe typeOf<String>()
|
||||
df2["status"].type() shouldBe typeOf<String>()
|
||||
df2["age"].type() shouldBe typeOf<String>()
|
||||
df2["number"].type() shouldBe typeOf<Int>()
|
||||
df2["labels"].type() shouldBe typeOf<String>()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `handle default coltype with other parameters`() {
|
||||
val df = DataFrame.readCsv(
|
||||
simpleCsv,
|
||||
header = listOf("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"),
|
||||
skipLines = 2,
|
||||
colTypes = mapOf(
|
||||
"a" to ColType.Int,
|
||||
"b" to ColType.Double,
|
||||
ColType.DEFAULT to ColType.String,
|
||||
),
|
||||
)
|
||||
|
||||
df.columnTypes().shouldContainInOrder(
|
||||
typeOf<Int>(),
|
||||
typeOf<Double>(),
|
||||
typeOf<String>(),
|
||||
typeOf<String?>(),
|
||||
typeOf<String>(),
|
||||
typeOf<String?>(),
|
||||
typeOf<String?>(),
|
||||
typeOf<String?>(),
|
||||
typeOf<String>(),
|
||||
typeOf<String>(),
|
||||
typeOf<String?>(),
|
||||
)
|
||||
df.rowsCount() shouldBe 4
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `skipping types`() {
|
||||
val df1 = DataFrame.readCsv(
|
||||
irisDataset,
|
||||
colTypes = mapOf("sepal.length" to ColType.Double),
|
||||
parserOptions = ParserOptions(
|
||||
skipTypes = setOf(typeOf<Double>()),
|
||||
),
|
||||
)
|
||||
|
||||
df1["sepal.length"].type() shouldBe typeOf<Double>()
|
||||
df1["sepal.width"].type() shouldBe typeOf<BigDecimal>()
|
||||
df1["petal.length"].type() shouldBe typeOf<BigDecimal>()
|
||||
df1["petal.width"].type() shouldBe typeOf<BigDecimal>()
|
||||
df1["variety"].type() shouldBe typeOf<String>()
|
||||
|
||||
// Also test the global setting
|
||||
DataFrame.parser.addSkipType(typeOf<Double>())
|
||||
|
||||
val df2 = DataFrame.readCsv(
|
||||
irisDataset,
|
||||
colTypes = mapOf("sepal.length" to ColType.Double),
|
||||
)
|
||||
df2 shouldBe df1
|
||||
|
||||
DataFrame.parser.resetToDefault()
|
||||
}
|
||||
|
||||
// Issue #921
|
||||
@Test
|
||||
fun `read csv with custom null strings and given type`() {
|
||||
@Language("CSV")
|
||||
val csv =
|
||||
"""
|
||||
a,b
|
||||
noppes,2
|
||||
1.2,
|
||||
3,45
|
||||
,noppes
|
||||
1.3,1
|
||||
""".trimIndent()
|
||||
|
||||
val df1 = DataFrame.readCsvStr(
|
||||
csv,
|
||||
parserOptions = ParserOptions(
|
||||
nullStrings = setOf("noppes", ""),
|
||||
),
|
||||
colTypes = mapOf("a" to ColType.Double, "b" to ColType.Int),
|
||||
)
|
||||
df1 shouldBe dataFrameOf("a", "b")(
|
||||
null, 2,
|
||||
1.2, null,
|
||||
3.0, 45,
|
||||
null, null,
|
||||
1.3, 1,
|
||||
)
|
||||
|
||||
// Also test the global setting
|
||||
DataFrame.parser.addNullString("noppes")
|
||||
DataFrame.parser.addNullString("")
|
||||
|
||||
val df2 = DataFrame.readCsvStr(
|
||||
csv,
|
||||
colTypes = mapOf("a" to ColType.Double, "b" to ColType.Int),
|
||||
)
|
||||
|
||||
df2 shouldBe df1
|
||||
|
||||
DataFrame.parser.resetToDefault()
|
||||
}
|
||||
|
||||
// Issue #1047
|
||||
@Test
|
||||
fun `Only use Deephaven datetime parser with custom csv specs`() {
|
||||
@Language("csv")
|
||||
val csvContent =
|
||||
"""
|
||||
with_timezone_offset,without_timezone_offset
|
||||
2024-12-12T13:00:00+01:00,2024-12-12T13:00:00
|
||||
""".trimIndent()
|
||||
|
||||
// use DFs parsers by default for datetime-like columns
|
||||
val df1 = DataFrame.readCsvStr(csvContent)
|
||||
df1["with_timezone_offset"].let {
|
||||
it.type() shouldBe typeOf<StdlibInstant>()
|
||||
it[0] shouldBe StdlibInstant.parse("2024-12-12T13:00:00+01:00")
|
||||
}
|
||||
df1["without_timezone_offset"].let {
|
||||
it.type() shouldBe typeOf<LocalDateTime>()
|
||||
it[0] shouldBe LocalDateTime.parse("2024-12-12T13:00:00")
|
||||
}
|
||||
|
||||
// enable fast datetime parser for the first column with adjustCsvSpecs
|
||||
val df2 = DataFrame.readCsv(
|
||||
inputStream = csvContent.byteInputStream(),
|
||||
adjustCsvSpecs = {
|
||||
putParserForName("with_timezone_offset", Parsers.DATETIME)
|
||||
},
|
||||
)
|
||||
df2["with_timezone_offset"].let {
|
||||
it.type() shouldBe typeOf<LocalDateTime>()
|
||||
it[0] shouldBe LocalDateTime.parse("2024-12-12T12:00:00")
|
||||
}
|
||||
df2["without_timezone_offset"].let {
|
||||
it.type() shouldBe typeOf<LocalDateTime>()
|
||||
it[0] shouldBe LocalDateTime.parse("2024-12-12T13:00:00")
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `test parsing kotlin-time-Instant`() {
|
||||
@Language("csv")
|
||||
val csvContent =
|
||||
"""
|
||||
with_timezone_offset,without_timezone_offset
|
||||
2024-12-12T13:00:00+01:00,2024-12-12T13:00:00
|
||||
""".trimIndent()
|
||||
|
||||
DataFrame.parser.parseExperimentalInstant = true
|
||||
|
||||
// use DFs parsers by default for datetime-like columns
|
||||
val df1 = DataFrame.readCsvStr(csvContent)
|
||||
df1["with_timezone_offset"].let {
|
||||
it.type() shouldBe typeOf<StdlibInstant>()
|
||||
it[0] shouldBe StdlibInstant.parse("2024-12-12T13:00:00+01:00")
|
||||
}
|
||||
|
||||
DataFrame.parser.resetToDefault()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `json dependency test`() {
|
||||
val df = dataFrameOf("firstName", "lastName")(
|
||||
"John", "Doe",
|
||||
"Jane", "Doe",
|
||||
).group { "firstName" and "lastName" }.into { "name" }
|
||||
|
||||
df.toCsvStr(quote = '\'') shouldBe
|
||||
"""
|
||||
name
|
||||
'{"firstName":"John","lastName":"Doe"}'
|
||||
'{"firstName":"Jane","lastName":"Doe"}'
|
||||
|
||||
""".trimIndent()
|
||||
}
|
||||
|
||||
companion object {
|
||||
private val irisDataset = testCsv("irisDataset")
|
||||
private val simpleCsv = testCsv("testCSV")
|
||||
private val simpleCsvUtf16le = testCsv("testCSV-utf-16-le-bom")
|
||||
private val simpleCsvUtf16leGz = testResource("testCSV-utf16le-bom.csv.gz")
|
||||
private val simpleCsvUtf16leZip = testResource("testCSV-utf-16-le-bom.zip")
|
||||
private val simpleCsvZip = testResource("testCSV.zip")
|
||||
private val twoCsvsZip = testResource("two csvs.zip")
|
||||
private val simpleCsvGz = testResource("testCSV.csv.gz")
|
||||
private val csvWithFrenchLocale = testCsv("testCSVwithFrenchLocale")
|
||||
private val wineCsv = testCsv("wine")
|
||||
private val withBomCsv = testCsv("with-bom")
|
||||
private val msleepCsv = testCsv("msleep")
|
||||
private val notCsv = testResource("not-csv.zip")
|
||||
}
|
||||
}
|
||||
|
||||
fun testResource(resourcePath: String): URL = DelimCsvTsvTests::class.java.classLoader.getResource(resourcePath)!!
|
||||
|
||||
fun testCsv(csvName: String) = testResource("$csvName.csv")
|
||||
@@ -0,0 +1,3 @@
|
||||
A B C
|
||||
1 2 3
|
||||
4 5 6
|
||||
|
@@ -0,0 +1,608 @@
|
||||
,work_year,experience_level,employment_type,job_title,salary,salary_currency,salary_in_usd,employee_residence,remote_ratio,company_location,company_size
|
||||
0,2020,MI,FT,Data Scientist,70000,EUR,79833,DE,0,DE,L
|
||||
1,2020,SE,FT,Machine Learning Scientist,260000,USD,260000,JP,0,JP,S
|
||||
2,2020,SE,FT,Big Data Engineer,85000,GBP,109024,GB,50,GB,M
|
||||
3,2020,MI,FT,Product Data Analyst,20000,USD,20000,HN,0,HN,S
|
||||
4,2020,SE,FT,Machine Learning Engineer,150000,USD,150000,US,50,US,L
|
||||
5,2020,EN,FT,Data Analyst,72000,USD,72000,US,100,US,L
|
||||
6,2020,SE,FT,Lead Data Scientist,190000,USD,190000,US,100,US,S
|
||||
7,2020,MI,FT,Data Scientist,11000000,HUF,35735,HU,50,HU,L
|
||||
8,2020,MI,FT,Business Data Analyst,135000,USD,135000,US,100,US,L
|
||||
9,2020,SE,FT,Lead Data Engineer,125000,USD,125000,NZ,50,NZ,S
|
||||
10,2020,EN,FT,Data Scientist,45000,EUR,51321,FR,0,FR,S
|
||||
11,2020,MI,FT,Data Scientist,3000000,INR,40481,IN,0,IN,L
|
||||
12,2020,EN,FT,Data Scientist,35000,EUR,39916,FR,0,FR,M
|
||||
13,2020,MI,FT,Lead Data Analyst,87000,USD,87000,US,100,US,L
|
||||
14,2020,MI,FT,Data Analyst,85000,USD,85000,US,100,US,L
|
||||
15,2020,MI,FT,Data Analyst,8000,USD,8000,PK,50,PK,L
|
||||
16,2020,EN,FT,Data Engineer,4450000,JPY,41689,JP,100,JP,S
|
||||
17,2020,SE,FT,Big Data Engineer,100000,EUR,114047,PL,100,GB,S
|
||||
18,2020,EN,FT,Data Science Consultant,423000,INR,5707,IN,50,IN,M
|
||||
19,2020,MI,FT,Lead Data Engineer,56000,USD,56000,PT,100,US,M
|
||||
20,2020,MI,FT,Machine Learning Engineer,299000,CNY,43331,CN,0,CN,M
|
||||
21,2020,MI,FT,Product Data Analyst,450000,INR,6072,IN,100,IN,L
|
||||
22,2020,SE,FT,Data Engineer,42000,EUR,47899,GR,50,GR,L
|
||||
23,2020,MI,FT,BI Data Analyst,98000,USD,98000,US,0,US,M
|
||||
24,2020,MI,FT,Lead Data Scientist,115000,USD,115000,AE,0,AE,L
|
||||
25,2020,EX,FT,Director of Data Science,325000,USD,325000,US,100,US,L
|
||||
26,2020,EN,FT,Research Scientist,42000,USD,42000,NL,50,NL,L
|
||||
27,2020,SE,FT,Data Engineer,720000,MXN,33511,MX,0,MX,S
|
||||
28,2020,EN,CT,Business Data Analyst,100000,USD,100000,US,100,US,L
|
||||
29,2020,SE,FT,Machine Learning Manager,157000,CAD,117104,CA,50,CA,L
|
||||
30,2020,MI,FT,Data Engineering Manager,51999,EUR,59303,DE,100,DE,S
|
||||
31,2020,EN,FT,Big Data Engineer,70000,USD,70000,US,100,US,L
|
||||
32,2020,SE,FT,Data Scientist,60000,EUR,68428,GR,100,US,L
|
||||
33,2020,MI,FT,Research Scientist,450000,USD,450000,US,0,US,M
|
||||
34,2020,MI,FT,Data Analyst,41000,EUR,46759,FR,50,FR,L
|
||||
35,2020,MI,FT,Data Engineer,65000,EUR,74130,AT,50,AT,L
|
||||
36,2020,MI,FT,Data Science Consultant,103000,USD,103000,US,100,US,L
|
||||
37,2020,EN,FT,Machine Learning Engineer,250000,USD,250000,US,50,US,L
|
||||
38,2020,EN,FT,Data Analyst,10000,USD,10000,NG,100,NG,S
|
||||
39,2020,EN,FT,Machine Learning Engineer,138000,USD,138000,US,100,US,S
|
||||
40,2020,MI,FT,Data Scientist,45760,USD,45760,PH,100,US,S
|
||||
41,2020,EX,FT,Data Engineering Manager,70000,EUR,79833,ES,50,ES,L
|
||||
42,2020,MI,FT,Machine Learning Infrastructure Engineer,44000,EUR,50180,PT,0,PT,M
|
||||
43,2020,MI,FT,Data Engineer,106000,USD,106000,US,100,US,L
|
||||
44,2020,MI,FT,Data Engineer,88000,GBP,112872,GB,50,GB,L
|
||||
45,2020,EN,PT,ML Engineer,14000,EUR,15966,DE,100,DE,S
|
||||
46,2020,MI,FT,Data Scientist,60000,GBP,76958,GB,100,GB,S
|
||||
47,2020,SE,FT,Data Engineer,188000,USD,188000,US,100,US,L
|
||||
48,2020,MI,FT,Data Scientist,105000,USD,105000,US,100,US,L
|
||||
49,2020,MI,FT,Data Engineer,61500,EUR,70139,FR,50,FR,L
|
||||
50,2020,EN,FT,Data Analyst,450000,INR,6072,IN,0,IN,S
|
||||
51,2020,EN,FT,Data Analyst,91000,USD,91000,US,100,US,L
|
||||
52,2020,EN,FT,AI Scientist,300000,DKK,45896,DK,50,DK,S
|
||||
53,2020,EN,FT,Data Engineer,48000,EUR,54742,PK,100,DE,L
|
||||
54,2020,SE,FL,Computer Vision Engineer,60000,USD,60000,RU,100,US,S
|
||||
55,2020,SE,FT,Principal Data Scientist,130000,EUR,148261,DE,100,DE,M
|
||||
56,2020,MI,FT,Data Scientist,34000,EUR,38776,ES,100,ES,M
|
||||
57,2020,MI,FT,Data Scientist,118000,USD,118000,US,100,US,M
|
||||
58,2020,SE,FT,Data Scientist,120000,USD,120000,US,50,US,L
|
||||
59,2020,MI,FT,Data Scientist,138350,USD,138350,US,100,US,M
|
||||
60,2020,MI,FT,Data Engineer,110000,USD,110000,US,100,US,L
|
||||
61,2020,MI,FT,Data Engineer,130800,USD,130800,ES,100,US,M
|
||||
62,2020,EN,PT,Data Scientist,19000,EUR,21669,IT,50,IT,S
|
||||
63,2020,SE,FT,Data Scientist,412000,USD,412000,US,100,US,L
|
||||
64,2020,SE,FT,Machine Learning Engineer,40000,EUR,45618,HR,100,HR,S
|
||||
65,2020,EN,FT,Data Scientist,55000,EUR,62726,DE,50,DE,S
|
||||
66,2020,EN,FT,Data Scientist,43200,EUR,49268,DE,0,DE,S
|
||||
67,2020,SE,FT,Data Science Manager,190200,USD,190200,US,100,US,M
|
||||
68,2020,EN,FT,Data Scientist,105000,USD,105000,US,100,US,S
|
||||
69,2020,SE,FT,Data Scientist,80000,EUR,91237,AT,0,AT,S
|
||||
70,2020,MI,FT,Data Scientist,55000,EUR,62726,FR,50,LU,S
|
||||
71,2020,MI,FT,Data Scientist,37000,EUR,42197,FR,50,FR,S
|
||||
72,2021,EN,FT,Research Scientist,60000,GBP,82528,GB,50,GB,L
|
||||
73,2021,EX,FT,BI Data Analyst,150000,USD,150000,IN,100,US,L
|
||||
74,2021,EX,FT,Head of Data,235000,USD,235000,US,100,US,L
|
||||
75,2021,SE,FT,Data Scientist,45000,EUR,53192,FR,50,FR,L
|
||||
76,2021,MI,FT,BI Data Analyst,100000,USD,100000,US,100,US,M
|
||||
77,2021,MI,PT,3D Computer Vision Researcher,400000,INR,5409,IN,50,IN,M
|
||||
78,2021,MI,CT,ML Engineer,270000,USD,270000,US,100,US,L
|
||||
79,2021,EN,FT,Data Analyst,80000,USD,80000,US,100,US,M
|
||||
80,2021,SE,FT,Data Analytics Engineer,67000,EUR,79197,DE,100,DE,L
|
||||
81,2021,MI,FT,Data Engineer,140000,USD,140000,US,100,US,L
|
||||
82,2021,MI,FT,Applied Data Scientist,68000,CAD,54238,GB,50,CA,L
|
||||
83,2021,MI,FT,Machine Learning Engineer,40000,EUR,47282,ES,100,ES,S
|
||||
84,2021,EX,FT,Director of Data Science,130000,EUR,153667,IT,100,PL,L
|
||||
85,2021,MI,FT,Data Engineer,110000,PLN,28476,PL,100,PL,L
|
||||
86,2021,EN,FT,Data Analyst,50000,EUR,59102,FR,50,FR,M
|
||||
87,2021,MI,FT,Data Analytics Engineer,110000,USD,110000,US,100,US,L
|
||||
88,2021,SE,FT,Lead Data Analyst,170000,USD,170000,US,100,US,L
|
||||
89,2021,SE,FT,Data Analyst,80000,USD,80000,BG,100,US,S
|
||||
90,2021,SE,FT,Marketing Data Analyst,75000,EUR,88654,GR,100,DK,L
|
||||
91,2021,EN,FT,Data Science Consultant,65000,EUR,76833,DE,100,DE,S
|
||||
92,2021,MI,FT,Lead Data Analyst,1450000,INR,19609,IN,100,IN,L
|
||||
93,2021,SE,FT,Lead Data Engineer,276000,USD,276000,US,0,US,L
|
||||
94,2021,EN,FT,Data Scientist,2200000,INR,29751,IN,50,IN,L
|
||||
95,2021,MI,FT,Cloud Data Engineer,120000,SGD,89294,SG,50,SG,L
|
||||
96,2021,EN,PT,AI Scientist,12000,USD,12000,BR,100,US,S
|
||||
97,2021,MI,FT,Financial Data Analyst,450000,USD,450000,US,100,US,L
|
||||
98,2021,EN,FT,Computer Vision Software Engineer,70000,USD,70000,US,100,US,M
|
||||
99,2021,MI,FT,Computer Vision Software Engineer,81000,EUR,95746,DE,100,US,S
|
||||
100,2021,MI,FT,Data Analyst,75000,USD,75000,US,0,US,L
|
||||
101,2021,SE,FT,Data Engineer,150000,USD,150000,US,100,US,L
|
||||
102,2021,MI,FT,BI Data Analyst,11000000,HUF,36259,HU,50,US,L
|
||||
103,2021,MI,FT,Data Analyst,62000,USD,62000,US,0,US,L
|
||||
104,2021,MI,FT,Data Scientist,73000,USD,73000,US,0,US,L
|
||||
105,2021,MI,FT,Data Analyst,37456,GBP,51519,GB,50,GB,L
|
||||
106,2021,MI,FT,Research Scientist,235000,CAD,187442,CA,100,CA,L
|
||||
107,2021,SE,FT,Data Engineer,115000,USD,115000,US,100,US,S
|
||||
108,2021,SE,FT,Data Engineer,150000,USD,150000,US,100,US,M
|
||||
109,2021,EN,FT,Data Engineer,2250000,INR,30428,IN,100,IN,L
|
||||
110,2021,SE,FT,Machine Learning Engineer,80000,EUR,94564,DE,50,DE,L
|
||||
111,2021,SE,FT,Director of Data Engineering,82500,GBP,113476,GB,100,GB,M
|
||||
112,2021,SE,FT,Lead Data Engineer,75000,GBP,103160,GB,100,GB,S
|
||||
113,2021,EN,PT,AI Scientist,12000,USD,12000,PK,100,US,M
|
||||
114,2021,MI,FT,Data Engineer,38400,EUR,45391,NL,100,NL,L
|
||||
115,2021,EN,FT,Machine Learning Scientist,225000,USD,225000,US,100,US,L
|
||||
116,2021,MI,FT,Data Scientist,50000,USD,50000,NG,100,NG,L
|
||||
117,2021,MI,FT,Data Science Engineer,34000,EUR,40189,GR,100,GR,M
|
||||
118,2021,EN,FT,Data Analyst,90000,USD,90000,US,100,US,S
|
||||
119,2021,MI,FT,Data Engineer,200000,USD,200000,US,100,US,L
|
||||
120,2021,MI,FT,Big Data Engineer,60000,USD,60000,ES,50,RO,M
|
||||
121,2021,SE,FT,Principal Data Engineer,200000,USD,200000,US,100,US,M
|
||||
122,2021,EN,FT,Data Analyst,50000,USD,50000,US,100,US,M
|
||||
123,2021,EN,FT,Applied Data Scientist,80000,GBP,110037,GB,0,GB,L
|
||||
124,2021,EN,PT,Data Analyst,8760,EUR,10354,ES,50,ES,M
|
||||
125,2021,MI,FT,Principal Data Scientist,151000,USD,151000,US,100,US,L
|
||||
126,2021,SE,FT,Machine Learning Scientist,120000,USD,120000,US,50,US,S
|
||||
127,2021,MI,FT,Data Scientist,700000,INR,9466,IN,0,IN,S
|
||||
128,2021,EN,FT,Machine Learning Engineer,20000,USD,20000,IN,100,IN,S
|
||||
129,2021,SE,FT,Lead Data Scientist,3000000,INR,40570,IN,50,IN,L
|
||||
130,2021,EN,FT,Machine Learning Developer,100000,USD,100000,IQ,50,IQ,S
|
||||
131,2021,EN,FT,Data Scientist,42000,EUR,49646,FR,50,FR,M
|
||||
132,2021,MI,FT,Applied Machine Learning Scientist,38400,USD,38400,VN,100,US,M
|
||||
133,2021,SE,FT,Computer Vision Engineer,24000,USD,24000,BR,100,BR,M
|
||||
134,2021,EN,FT,Data Scientist,100000,USD,100000,US,0,US,S
|
||||
135,2021,MI,FT,Data Analyst,90000,USD,90000,US,100,US,M
|
||||
136,2021,MI,FT,ML Engineer,7000000,JPY,63711,JP,50,JP,S
|
||||
137,2021,MI,FT,ML Engineer,8500000,JPY,77364,JP,50,JP,S
|
||||
138,2021,SE,FT,Principal Data Scientist,220000,USD,220000,US,0,US,L
|
||||
139,2021,EN,FT,Data Scientist,80000,USD,80000,US,100,US,M
|
||||
140,2021,MI,FT,Data Analyst,135000,USD,135000,US,100,US,L
|
||||
141,2021,SE,FT,Data Science Manager,240000,USD,240000,US,0,US,L
|
||||
142,2021,SE,FT,Data Engineering Manager,150000,USD,150000,US,0,US,L
|
||||
143,2021,MI,FT,Data Scientist,82500,USD,82500,US,100,US,S
|
||||
144,2021,MI,FT,Data Engineer,100000,USD,100000,US,100,US,L
|
||||
145,2021,SE,FT,Machine Learning Engineer,70000,EUR,82744,BE,50,BE,M
|
||||
146,2021,MI,FT,Research Scientist,53000,EUR,62649,FR,50,FR,M
|
||||
147,2021,MI,FT,Data Engineer,90000,USD,90000,US,100,US,L
|
||||
148,2021,SE,FT,Data Engineering Manager,153000,USD,153000,US,100,US,L
|
||||
149,2021,SE,FT,Cloud Data Engineer,160000,USD,160000,BR,100,US,S
|
||||
150,2021,SE,FT,Director of Data Science,168000,USD,168000,JP,0,JP,S
|
||||
151,2021,MI,FT,Data Scientist,150000,USD,150000,US,100,US,M
|
||||
152,2021,MI,FT,Data Scientist,95000,CAD,75774,CA,100,CA,L
|
||||
153,2021,EN,FT,Data Scientist,13400,USD,13400,UA,100,UA,L
|
||||
154,2021,SE,FT,Data Science Manager,144000,USD,144000,US,100,US,L
|
||||
155,2021,SE,FT,Data Science Engineer,159500,CAD,127221,CA,50,CA,L
|
||||
156,2021,MI,FT,Data Scientist,160000,SGD,119059,SG,100,IL,M
|
||||
157,2021,MI,FT,Applied Machine Learning Scientist,423000,USD,423000,US,50,US,L
|
||||
158,2021,SE,FT,Data Analytics Manager,120000,USD,120000,US,100,US,M
|
||||
159,2021,EN,FT,Machine Learning Engineer,125000,USD,125000,US,100,US,S
|
||||
160,2021,EX,FT,Head of Data,230000,USD,230000,RU,50,RU,L
|
||||
161,2021,EX,FT,Head of Data Science,85000,USD,85000,RU,0,RU,M
|
||||
162,2021,MI,FT,Data Engineer,24000,EUR,28369,MT,50,MT,L
|
||||
163,2021,EN,FT,Data Science Consultant,54000,EUR,63831,DE,50,DE,L
|
||||
164,2021,EX,FT,Director of Data Science,110000,EUR,130026,DE,50,DE,M
|
||||
165,2021,SE,FT,Data Specialist,165000,USD,165000,US,100,US,L
|
||||
166,2021,EN,FT,Data Engineer,80000,USD,80000,US,100,US,L
|
||||
167,2021,EX,FT,Director of Data Science,250000,USD,250000,US,0,US,L
|
||||
168,2021,EN,FT,BI Data Analyst,55000,USD,55000,US,50,US,S
|
||||
169,2021,MI,FT,Data Architect,150000,USD,150000,US,100,US,L
|
||||
170,2021,MI,FT,Data Architect,170000,USD,170000,US,100,US,L
|
||||
171,2021,MI,FT,Data Engineer,60000,GBP,82528,GB,100,GB,L
|
||||
172,2021,EN,FT,Data Analyst,60000,USD,60000,US,100,US,S
|
||||
173,2021,SE,FT,Principal Data Scientist,235000,USD,235000,US,100,US,L
|
||||
174,2021,SE,FT,Research Scientist,51400,EUR,60757,PT,50,PT,L
|
||||
175,2021,SE,FT,Data Engineering Manager,174000,USD,174000,US,100,US,L
|
||||
176,2021,MI,FT,Data Scientist,58000,MXN,2859,MX,0,MX,S
|
||||
177,2021,MI,FT,Data Scientist,30400000,CLP,40038,CL,100,CL,L
|
||||
178,2021,EN,FT,Machine Learning Engineer,81000,USD,81000,US,50,US,S
|
||||
179,2021,MI,FT,Data Scientist,420000,INR,5679,IN,100,US,S
|
||||
180,2021,MI,FT,Big Data Engineer,1672000,INR,22611,IN,0,IN,L
|
||||
181,2021,MI,FT,Data Scientist,76760,EUR,90734,DE,50,DE,L
|
||||
182,2021,MI,FT,Data Engineer,22000,EUR,26005,RO,0,US,L
|
||||
183,2021,SE,FT,Finance Data Analyst,45000,GBP,61896,GB,50,GB,L
|
||||
184,2021,MI,FL,Machine Learning Scientist,12000,USD,12000,PK,50,PK,M
|
||||
185,2021,MI,FT,Data Engineer,4000,USD,4000,IR,100,IR,M
|
||||
186,2021,SE,FT,Data Analytics Engineer,50000,USD,50000,VN,100,GB,M
|
||||
187,2021,EX,FT,Data Science Consultant,59000,EUR,69741,FR,100,ES,S
|
||||
188,2021,SE,FT,Data Engineer,65000,EUR,76833,RO,50,GB,S
|
||||
189,2021,MI,FT,Machine Learning Engineer,74000,USD,74000,JP,50,JP,S
|
||||
190,2021,SE,FT,Data Science Manager,152000,USD,152000,US,100,FR,L
|
||||
191,2021,EN,FT,Machine Learning Engineer,21844,USD,21844,CO,50,CO,M
|
||||
192,2021,MI,FT,Big Data Engineer,18000,USD,18000,MD,0,MD,S
|
||||
193,2021,SE,FT,Data Science Manager,174000,USD,174000,US,100,US,L
|
||||
194,2021,SE,FT,Research Scientist,120500,CAD,96113,CA,50,CA,L
|
||||
195,2021,MI,FT,Data Scientist,147000,USD,147000,US,50,US,L
|
||||
196,2021,EN,FT,BI Data Analyst,9272,USD,9272,KE,100,KE,S
|
||||
197,2021,SE,FT,Machine Learning Engineer,1799997,INR,24342,IN,100,IN,L
|
||||
198,2021,SE,FT,Data Science Manager,4000000,INR,54094,IN,50,US,L
|
||||
199,2021,EN,FT,Data Science Consultant,90000,USD,90000,US,100,US,S
|
||||
200,2021,MI,FT,Data Scientist,52000,EUR,61467,DE,50,AT,M
|
||||
201,2021,SE,FT,Machine Learning Infrastructure Engineer,195000,USD,195000,US,100,US,M
|
||||
202,2021,MI,FT,Data Scientist,32000,EUR,37825,ES,100,ES,L
|
||||
203,2021,SE,FT,Research Scientist,50000,USD,50000,FR,100,US,S
|
||||
204,2021,MI,FT,Data Scientist,160000,USD,160000,US,100,US,L
|
||||
205,2021,MI,FT,Data Scientist,69600,BRL,12901,BR,0,BR,S
|
||||
206,2021,SE,FT,Machine Learning Engineer,200000,USD,200000,US,100,US,L
|
||||
207,2021,SE,FT,Data Engineer,165000,USD,165000,US,0,US,M
|
||||
208,2021,MI,FL,Data Engineer,20000,USD,20000,IT,0,US,L
|
||||
209,2021,SE,FT,Data Analytics Manager,120000,USD,120000,US,0,US,L
|
||||
210,2021,MI,FT,Machine Learning Engineer,21000,EUR,24823,SI,50,SI,L
|
||||
211,2021,MI,FT,Research Scientist,48000,EUR,56738,FR,50,FR,S
|
||||
212,2021,MI,FT,Data Engineer,48000,GBP,66022,HK,50,GB,S
|
||||
213,2021,EN,FT,Big Data Engineer,435000,INR,5882,IN,0,CH,L
|
||||
214,2021,EN,FT,Machine Learning Engineer,21000,EUR,24823,DE,50,DE,M
|
||||
215,2021,SE,FT,Principal Data Engineer,185000,USD,185000,US,100,US,L
|
||||
216,2021,EN,PT,Computer Vision Engineer,180000,DKK,28609,DK,50,DK,S
|
||||
217,2021,MI,FT,Data Scientist,76760,EUR,90734,DE,50,DE,L
|
||||
218,2021,MI,FT,Machine Learning Engineer,75000,EUR,88654,BE,100,BE,M
|
||||
219,2021,SE,FT,Data Analytics Manager,140000,USD,140000,US,100,US,L
|
||||
220,2021,MI,FT,Machine Learning Engineer,180000,PLN,46597,PL,100,PL,L
|
||||
221,2021,MI,FT,Data Scientist,85000,GBP,116914,GB,50,GB,L
|
||||
222,2021,MI,FT,Data Scientist,2500000,INR,33808,IN,0,IN,M
|
||||
223,2021,MI,FT,Data Scientist,40900,GBP,56256,GB,50,GB,L
|
||||
224,2021,SE,FT,Machine Learning Scientist,225000,USD,225000,US,100,CA,L
|
||||
225,2021,EX,CT,Principal Data Scientist,416000,USD,416000,US,100,US,S
|
||||
226,2021,SE,FT,Data Scientist,110000,CAD,87738,CA,100,CA,S
|
||||
227,2021,MI,FT,Data Scientist,75000,EUR,88654,DE,50,DE,L
|
||||
228,2021,SE,FT,Data Scientist,135000,USD,135000,US,0,US,L
|
||||
229,2021,SE,FT,Data Analyst,90000,CAD,71786,CA,100,CA,M
|
||||
230,2021,EN,FT,Big Data Engineer,1200000,INR,16228,IN,100,IN,L
|
||||
231,2021,SE,FT,ML Engineer,256000,USD,256000,US,100,US,S
|
||||
232,2021,SE,FT,Director of Data Engineering,200000,USD,200000,US,100,US,L
|
||||
233,2021,SE,FT,Data Analyst,200000,USD,200000,US,100,US,L
|
||||
234,2021,MI,FT,Data Architect,180000,USD,180000,US,100,US,L
|
||||
235,2021,MI,FT,Head of Data Science,110000,USD,110000,US,0,US,S
|
||||
236,2021,MI,FT,Research Scientist,80000,CAD,63810,CA,100,CA,M
|
||||
237,2021,MI,FT,Data Scientist,39600,EUR,46809,ES,100,ES,M
|
||||
238,2021,EN,FT,Data Scientist,4000,USD,4000,VN,0,VN,M
|
||||
239,2021,EN,FT,Data Engineer,1600000,INR,21637,IN,50,IN,M
|
||||
240,2021,SE,FT,Data Scientist,130000,CAD,103691,CA,100,CA,L
|
||||
241,2021,MI,FT,Data Analyst,80000,USD,80000,US,100,US,L
|
||||
242,2021,MI,FT,Data Engineer,110000,USD,110000,US,100,US,L
|
||||
243,2021,SE,FT,Data Scientist,165000,USD,165000,US,100,US,L
|
||||
244,2021,EN,FT,AI Scientist,1335000,INR,18053,IN,100,AS,S
|
||||
245,2021,MI,FT,Data Engineer,52500,GBP,72212,GB,50,GB,L
|
||||
246,2021,EN,FT,Data Scientist,31000,EUR,36643,FR,50,FR,L
|
||||
247,2021,MI,FT,Data Engineer,108000,TRY,12103,TR,0,TR,M
|
||||
248,2021,SE,FT,Data Engineer,70000,GBP,96282,GB,50,GB,L
|
||||
249,2021,SE,FT,Principal Data Analyst,170000,USD,170000,US,100,US,M
|
||||
250,2021,MI,FT,Data Scientist,115000,USD,115000,US,50,US,L
|
||||
251,2021,EN,FT,Data Scientist,90000,USD,90000,US,100,US,S
|
||||
252,2021,EX,FT,Principal Data Engineer,600000,USD,600000,US,100,US,L
|
||||
253,2021,EN,FT,Data Scientist,2100000,INR,28399,IN,100,IN,M
|
||||
254,2021,MI,FT,Data Analyst,93000,USD,93000,US,100,US,L
|
||||
255,2021,SE,FT,Big Data Architect,125000,CAD,99703,CA,50,CA,M
|
||||
256,2021,MI,FT,Data Engineer,200000,USD,200000,US,100,US,L
|
||||
257,2021,SE,FT,Principal Data Scientist,147000,EUR,173762,DE,100,DE,M
|
||||
258,2021,SE,FT,Machine Learning Engineer,185000,USD,185000,US,50,US,L
|
||||
259,2021,EX,FT,Director of Data Science,120000,EUR,141846,DE,0,DE,L
|
||||
260,2021,MI,FT,Data Scientist,130000,USD,130000,US,50,US,L
|
||||
261,2021,SE,FT,Data Analyst,54000,EUR,63831,DE,50,DE,L
|
||||
262,2021,MI,FT,Data Scientist,1250000,INR,16904,IN,100,IN,S
|
||||
263,2021,SE,FT,Machine Learning Engineer,4900000,INR,66265,IN,0,IN,L
|
||||
264,2021,MI,FT,Data Scientist,21600,EUR,25532,RS,100,DE,S
|
||||
265,2021,SE,FT,Lead Data Engineer,160000,USD,160000,PR,50,US,S
|
||||
266,2021,MI,FT,Data Engineer,93150,USD,93150,US,0,US,M
|
||||
267,2021,MI,FT,Data Engineer,111775,USD,111775,US,0,US,M
|
||||
268,2021,MI,FT,Data Engineer,250000,TRY,28016,TR,100,TR,M
|
||||
269,2021,EN,FT,Data Engineer,55000,EUR,65013,DE,50,DE,M
|
||||
270,2021,EN,FT,Data Engineer,72500,USD,72500,US,100,US,L
|
||||
271,2021,SE,FT,Computer Vision Engineer,102000,BRL,18907,BR,0,BR,M
|
||||
272,2021,EN,FT,Data Science Consultant,65000,EUR,76833,DE,0,DE,L
|
||||
273,2021,EN,FT,Machine Learning Engineer,85000,USD,85000,NL,100,DE,S
|
||||
274,2021,SE,FT,Data Scientist,65720,EUR,77684,FR,50,FR,M
|
||||
275,2021,EN,FT,Data Scientist,100000,USD,100000,US,100,US,M
|
||||
276,2021,EN,FT,Data Scientist,58000,USD,58000,US,50,US,L
|
||||
277,2021,SE,FT,AI Scientist,55000,USD,55000,ES,100,ES,L
|
||||
278,2021,SE,FT,Data Scientist,180000,TRY,20171,TR,50,TR,L
|
||||
279,2021,EN,FT,Business Data Analyst,50000,EUR,59102,LU,100,LU,L
|
||||
280,2021,MI,FT,Data Engineer,112000,USD,112000,US,100,US,L
|
||||
281,2021,EN,FT,Research Scientist,100000,USD,100000,JE,0,CN,L
|
||||
282,2021,MI,PT,Data Engineer,59000,EUR,69741,NL,100,NL,L
|
||||
283,2021,SE,CT,Staff Data Scientist,105000,USD,105000,US,100,US,M
|
||||
284,2021,MI,FT,Research Scientist,69999,USD,69999,CZ,50,CZ,L
|
||||
285,2021,SE,FT,Data Science Manager,7000000,INR,94665,IN,50,IN,L
|
||||
286,2021,SE,FT,Head of Data,87000,EUR,102839,SI,100,SI,L
|
||||
287,2021,MI,FT,Data Scientist,109000,USD,109000,US,50,US,L
|
||||
288,2021,MI,FT,Machine Learning Engineer,43200,EUR,51064,IT,50,IT,L
|
||||
289,2022,SE,FT,Data Engineer,135000,USD,135000,US,100,US,M
|
||||
290,2022,SE,FT,Data Analyst,155000,USD,155000,US,100,US,M
|
||||
291,2022,SE,FT,Data Analyst,120600,USD,120600,US,100,US,M
|
||||
292,2022,MI,FT,Data Scientist,130000,USD,130000,US,0,US,M
|
||||
293,2022,MI,FT,Data Scientist,90000,USD,90000,US,0,US,M
|
||||
294,2022,MI,FT,Data Engineer,170000,USD,170000,US,100,US,M
|
||||
295,2022,MI,FT,Data Engineer,150000,USD,150000,US,100,US,M
|
||||
296,2022,SE,FT,Data Analyst,102100,USD,102100,US,100,US,M
|
||||
297,2022,SE,FT,Data Analyst,84900,USD,84900,US,100,US,M
|
||||
298,2022,SE,FT,Data Scientist,136620,USD,136620,US,100,US,M
|
||||
299,2022,SE,FT,Data Scientist,99360,USD,99360,US,100,US,M
|
||||
300,2022,SE,FT,Data Scientist,90000,GBP,117789,GB,0,GB,M
|
||||
301,2022,SE,FT,Data Scientist,80000,GBP,104702,GB,0,GB,M
|
||||
302,2022,SE,FT,Data Scientist,146000,USD,146000,US,100,US,M
|
||||
303,2022,SE,FT,Data Scientist,123000,USD,123000,US,100,US,M
|
||||
304,2022,EN,FT,Data Engineer,40000,GBP,52351,GB,100,GB,M
|
||||
305,2022,SE,FT,Data Analyst,99000,USD,99000,US,0,US,M
|
||||
306,2022,SE,FT,Data Analyst,116000,USD,116000,US,0,US,M
|
||||
307,2022,MI,FT,Data Analyst,106260,USD,106260,US,0,US,M
|
||||
308,2022,MI,FT,Data Analyst,126500,USD,126500,US,0,US,M
|
||||
309,2022,EX,FT,Data Engineer,242000,USD,242000,US,100,US,M
|
||||
310,2022,EX,FT,Data Engineer,200000,USD,200000,US,100,US,M
|
||||
311,2022,MI,FT,Data Scientist,50000,GBP,65438,GB,0,GB,M
|
||||
312,2022,MI,FT,Data Scientist,30000,GBP,39263,GB,0,GB,M
|
||||
313,2022,MI,FT,Data Engineer,60000,GBP,78526,GB,0,GB,M
|
||||
314,2022,MI,FT,Data Engineer,40000,GBP,52351,GB,0,GB,M
|
||||
315,2022,SE,FT,Data Scientist,165220,USD,165220,US,100,US,M
|
||||
316,2022,EN,FT,Data Engineer,35000,GBP,45807,GB,100,GB,M
|
||||
317,2022,SE,FT,Data Scientist,120160,USD,120160,US,100,US,M
|
||||
318,2022,SE,FT,Data Analyst,90320,USD,90320,US,100,US,M
|
||||
319,2022,SE,FT,Data Engineer,181940,USD,181940,US,0,US,M
|
||||
320,2022,SE,FT,Data Engineer,132320,USD,132320,US,0,US,M
|
||||
321,2022,SE,FT,Data Engineer,220110,USD,220110,US,0,US,M
|
||||
322,2022,SE,FT,Data Engineer,160080,USD,160080,US,0,US,M
|
||||
323,2022,SE,FT,Data Scientist,180000,USD,180000,US,0,US,L
|
||||
324,2022,SE,FT,Data Scientist,120000,USD,120000,US,0,US,L
|
||||
325,2022,SE,FT,Data Analyst,124190,USD,124190,US,100,US,M
|
||||
326,2022,EX,FT,Data Analyst,130000,USD,130000,US,100,US,M
|
||||
327,2022,EX,FT,Data Analyst,110000,USD,110000,US,100,US,M
|
||||
328,2022,SE,FT,Data Analyst,170000,USD,170000,US,100,US,M
|
||||
329,2022,MI,FT,Data Analyst,115500,USD,115500,US,100,US,M
|
||||
330,2022,SE,FT,Data Analyst,112900,USD,112900,US,100,US,M
|
||||
331,2022,SE,FT,Data Analyst,90320,USD,90320,US,100,US,M
|
||||
332,2022,SE,FT,Data Analyst,112900,USD,112900,US,100,US,M
|
||||
333,2022,SE,FT,Data Analyst,90320,USD,90320,US,100,US,M
|
||||
334,2022,SE,FT,Data Engineer,165400,USD,165400,US,100,US,M
|
||||
335,2022,SE,FT,Data Engineer,132320,USD,132320,US,100,US,M
|
||||
336,2022,MI,FT,Data Analyst,167000,USD,167000,US,100,US,M
|
||||
337,2022,SE,FT,Data Engineer,243900,USD,243900,US,100,US,M
|
||||
338,2022,SE,FT,Data Analyst,136600,USD,136600,US,100,US,M
|
||||
339,2022,SE,FT,Data Analyst,109280,USD,109280,US,100,US,M
|
||||
340,2022,SE,FT,Data Engineer,128875,USD,128875,US,100,US,M
|
||||
341,2022,SE,FT,Data Engineer,93700,USD,93700,US,100,US,M
|
||||
342,2022,EX,FT,Head of Data Science,224000,USD,224000,US,100,US,M
|
||||
343,2022,EX,FT,Head of Data Science,167875,USD,167875,US,100,US,M
|
||||
344,2022,EX,FT,Analytics Engineer,175000,USD,175000,US,100,US,M
|
||||
345,2022,SE,FT,Data Engineer,156600,USD,156600,US,100,US,M
|
||||
346,2022,SE,FT,Data Engineer,108800,USD,108800,US,0,US,M
|
||||
347,2022,SE,FT,Data Scientist,95550,USD,95550,US,0,US,M
|
||||
348,2022,SE,FT,Data Engineer,113000,USD,113000,US,0,US,L
|
||||
349,2022,SE,FT,Data Analyst,135000,USD,135000,US,100,US,M
|
||||
350,2022,SE,FT,Data Science Manager,161342,USD,161342,US,100,US,M
|
||||
351,2022,SE,FT,Data Science Manager,137141,USD,137141,US,100,US,M
|
||||
352,2022,SE,FT,Data Scientist,167000,USD,167000,US,100,US,M
|
||||
353,2022,SE,FT,Data Scientist,123000,USD,123000,US,100,US,M
|
||||
354,2022,SE,FT,Data Engineer,60000,GBP,78526,GB,0,GB,M
|
||||
355,2022,SE,FT,Data Engineer,50000,GBP,65438,GB,0,GB,M
|
||||
356,2022,SE,FT,Data Scientist,150000,USD,150000,US,0,US,M
|
||||
357,2022,SE,FT,Data Scientist,211500,USD,211500,US,100,US,M
|
||||
358,2022,SE,FT,Data Architect,192400,USD,192400,CA,100,CA,M
|
||||
359,2022,SE,FT,Data Architect,90700,USD,90700,CA,100,CA,M
|
||||
360,2022,SE,FT,Data Analyst,130000,USD,130000,CA,100,CA,M
|
||||
361,2022,SE,FT,Data Analyst,61300,USD,61300,CA,100,CA,M
|
||||
362,2022,SE,FT,Data Analyst,130000,USD,130000,CA,100,CA,M
|
||||
363,2022,SE,FT,Data Analyst,61300,USD,61300,CA,100,CA,M
|
||||
364,2022,SE,FT,Data Engineer,160000,USD,160000,US,0,US,L
|
||||
365,2022,SE,FT,Data Scientist,138600,USD,138600,US,100,US,M
|
||||
366,2022,SE,FT,Data Engineer,136000,USD,136000,US,0,US,M
|
||||
367,2022,MI,FT,Data Analyst,58000,USD,58000,US,0,US,S
|
||||
368,2022,EX,FT,Analytics Engineer,135000,USD,135000,US,100,US,M
|
||||
369,2022,SE,FT,Data Scientist,170000,USD,170000,US,100,US,M
|
||||
370,2022,SE,FT,Data Scientist,123000,USD,123000,US,100,US,M
|
||||
371,2022,SE,FT,Machine Learning Engineer,189650,USD,189650,US,0,US,M
|
||||
372,2022,SE,FT,Machine Learning Engineer,164996,USD,164996,US,0,US,M
|
||||
373,2022,MI,FT,ETL Developer,50000,EUR,54957,GR,0,GR,M
|
||||
374,2022,MI,FT,ETL Developer,50000,EUR,54957,GR,0,GR,M
|
||||
375,2022,EX,FT,Lead Data Engineer,150000,CAD,118187,CA,100,CA,S
|
||||
376,2022,SE,FT,Data Analyst,132000,USD,132000,US,0,US,M
|
||||
377,2022,SE,FT,Data Engineer,165400,USD,165400,US,100,US,M
|
||||
378,2022,SE,FT,Data Architect,208775,USD,208775,US,100,US,M
|
||||
379,2022,SE,FT,Data Architect,147800,USD,147800,US,100,US,M
|
||||
380,2022,SE,FT,Data Engineer,136994,USD,136994,US,100,US,M
|
||||
381,2022,SE,FT,Data Engineer,101570,USD,101570,US,100,US,M
|
||||
382,2022,SE,FT,Data Analyst,128875,USD,128875,US,100,US,M
|
||||
383,2022,SE,FT,Data Analyst,93700,USD,93700,US,100,US,M
|
||||
384,2022,EX,FT,Head of Machine Learning,6000000,INR,79039,IN,50,IN,L
|
||||
385,2022,SE,FT,Data Engineer,132320,USD,132320,US,100,US,M
|
||||
386,2022,EN,FT,Machine Learning Engineer,28500,GBP,37300,GB,100,GB,L
|
||||
387,2022,SE,FT,Data Analyst,164000,USD,164000,US,0,US,M
|
||||
388,2022,SE,FT,Data Engineer,155000,USD,155000,US,100,US,M
|
||||
389,2022,MI,FT,Machine Learning Engineer,95000,GBP,124333,GB,0,GB,M
|
||||
390,2022,MI,FT,Machine Learning Engineer,75000,GBP,98158,GB,0,GB,M
|
||||
391,2022,MI,FT,AI Scientist,120000,USD,120000,US,0,US,M
|
||||
392,2022,SE,FT,Data Analyst,112900,USD,112900,US,100,US,M
|
||||
393,2022,SE,FT,Data Analyst,90320,USD,90320,US,100,US,M
|
||||
394,2022,SE,FT,Data Analytics Manager,145000,USD,145000,US,100,US,M
|
||||
395,2022,SE,FT,Data Analytics Manager,105400,USD,105400,US,100,US,M
|
||||
396,2022,MI,FT,Machine Learning Engineer,80000,EUR,87932,FR,100,DE,M
|
||||
397,2022,MI,FT,Data Engineer,90000,GBP,117789,GB,0,GB,M
|
||||
398,2022,SE,FT,Data Scientist,215300,USD,215300,US,100,US,L
|
||||
399,2022,SE,FT,Data Scientist,158200,USD,158200,US,100,US,L
|
||||
400,2022,SE,FT,Data Engineer,209100,USD,209100,US,100,US,L
|
||||
401,2022,SE,FT,Data Engineer,154600,USD,154600,US,100,US,L
|
||||
402,2022,SE,FT,Data Analyst,115934,USD,115934,US,0,US,M
|
||||
403,2022,SE,FT,Data Analyst,81666,USD,81666,US,0,US,M
|
||||
404,2022,SE,FT,Data Engineer,175000,USD,175000,US,100,US,M
|
||||
405,2022,MI,FT,Data Engineer,75000,GBP,98158,GB,0,GB,M
|
||||
406,2022,MI,FT,Data Analyst,58000,USD,58000,US,0,US,S
|
||||
407,2022,SE,FT,Data Engineer,183600,USD,183600,US,100,US,L
|
||||
408,2022,MI,FT,Data Analyst,40000,GBP,52351,GB,100,GB,M
|
||||
409,2022,SE,FT,Data Scientist,180000,USD,180000,US,100,US,M
|
||||
410,2022,MI,FT,Data Scientist,55000,GBP,71982,GB,0,GB,M
|
||||
411,2022,MI,FT,Data Scientist,35000,GBP,45807,GB,0,GB,M
|
||||
412,2022,MI,FT,Data Engineer,60000,EUR,65949,GR,100,GR,M
|
||||
413,2022,MI,FT,Data Engineer,45000,EUR,49461,GR,100,GR,M
|
||||
414,2022,MI,FT,Data Engineer,60000,GBP,78526,GB,100,GB,M
|
||||
415,2022,MI,FT,Data Engineer,45000,GBP,58894,GB,100,GB,M
|
||||
416,2022,SE,FT,Data Scientist,260000,USD,260000,US,100,US,M
|
||||
417,2022,SE,FT,Data Science Engineer,60000,USD,60000,AR,100,MX,L
|
||||
418,2022,MI,FT,Data Engineer,63900,USD,63900,US,0,US,M
|
||||
419,2022,MI,FT,Machine Learning Scientist,160000,USD,160000,US,100,US,L
|
||||
420,2022,MI,FT,Machine Learning Scientist,112300,USD,112300,US,100,US,L
|
||||
421,2022,MI,FT,Data Science Manager,241000,USD,241000,US,100,US,M
|
||||
422,2022,MI,FT,Data Science Manager,159000,USD,159000,US,100,US,M
|
||||
423,2022,SE,FT,Data Scientist,180000,USD,180000,US,0,US,M
|
||||
424,2022,SE,FT,Data Scientist,80000,USD,80000,US,0,US,M
|
||||
425,2022,MI,FT,Data Engineer,82900,USD,82900,US,0,US,M
|
||||
426,2022,SE,FT,Data Engineer,100800,USD,100800,US,100,US,L
|
||||
427,2022,MI,FT,Data Engineer,45000,EUR,49461,ES,100,ES,M
|
||||
428,2022,SE,FT,Data Scientist,140400,USD,140400,US,0,US,L
|
||||
429,2022,MI,FT,Data Analyst,30000,GBP,39263,GB,100,GB,M
|
||||
430,2022,MI,FT,Data Analyst,40000,EUR,43966,ES,100,ES,M
|
||||
431,2022,MI,FT,Data Analyst,30000,EUR,32974,ES,100,ES,M
|
||||
432,2022,MI,FT,Data Engineer,80000,EUR,87932,ES,100,ES,M
|
||||
433,2022,MI,FT,Data Engineer,70000,EUR,76940,ES,100,ES,M
|
||||
434,2022,MI,FT,Data Engineer,80000,GBP,104702,GB,100,GB,M
|
||||
435,2022,MI,FT,Data Engineer,70000,GBP,91614,GB,100,GB,M
|
||||
436,2022,MI,FT,Data Engineer,60000,EUR,65949,ES,100,ES,M
|
||||
437,2022,MI,FT,Data Engineer,80000,EUR,87932,GR,100,GR,M
|
||||
438,2022,SE,FT,Machine Learning Engineer,189650,USD,189650,US,0,US,M
|
||||
439,2022,SE,FT,Machine Learning Engineer,164996,USD,164996,US,0,US,M
|
||||
440,2022,MI,FT,Data Analyst,40000,EUR,43966,GR,100,GR,M
|
||||
441,2022,MI,FT,Data Analyst,30000,EUR,32974,GR,100,GR,M
|
||||
442,2022,MI,FT,Data Engineer,75000,GBP,98158,GB,100,GB,M
|
||||
443,2022,MI,FT,Data Engineer,60000,GBP,78526,GB,100,GB,M
|
||||
444,2022,SE,FT,Data Scientist,215300,USD,215300,US,0,US,L
|
||||
445,2022,MI,FT,Data Engineer,70000,EUR,76940,GR,100,GR,M
|
||||
446,2022,SE,FT,Data Engineer,209100,USD,209100,US,100,US,L
|
||||
447,2022,SE,FT,Data Engineer,154600,USD,154600,US,100,US,L
|
||||
448,2022,SE,FT,Data Engineer,180000,USD,180000,US,100,US,M
|
||||
449,2022,EN,FT,ML Engineer,20000,EUR,21983,PT,100,PT,L
|
||||
450,2022,SE,FT,Data Engineer,80000,USD,80000,US,100,US,M
|
||||
451,2022,MI,FT,Machine Learning Developer,100000,CAD,78791,CA,100,CA,M
|
||||
452,2022,EX,FT,Director of Data Science,250000,CAD,196979,CA,50,CA,L
|
||||
453,2022,MI,FT,Machine Learning Engineer,120000,USD,120000,US,100,US,S
|
||||
454,2022,EN,FT,Computer Vision Engineer,125000,USD,125000,US,0,US,M
|
||||
455,2022,MI,FT,NLP Engineer,240000,CNY,37236,US,50,US,L
|
||||
456,2022,SE,FT,Data Engineer,105000,USD,105000,US,100,US,M
|
||||
457,2022,SE,FT,Lead Machine Learning Engineer,80000,EUR,87932,DE,0,DE,M
|
||||
458,2022,MI,FT,Business Data Analyst,1400000,INR,18442,IN,100,IN,M
|
||||
459,2022,MI,FT,Data Scientist,2400000,INR,31615,IN,100,IN,L
|
||||
460,2022,MI,FT,Machine Learning Infrastructure Engineer,53000,EUR,58255,PT,50,PT,L
|
||||
461,2022,EN,FT,Financial Data Analyst,100000,USD,100000,US,50,US,L
|
||||
462,2022,MI,PT,Data Engineer,50000,EUR,54957,DE,50,DE,L
|
||||
463,2022,EN,FT,Data Scientist,1400000,INR,18442,IN,100,IN,M
|
||||
464,2022,SE,FT,Principal Data Scientist,148000,EUR,162674,DE,100,DE,M
|
||||
465,2022,EN,FT,Data Engineer,120000,USD,120000,US,100,US,M
|
||||
466,2022,SE,FT,Research Scientist,144000,USD,144000,US,50,US,L
|
||||
467,2022,SE,FT,Data Scientist,104890,USD,104890,US,100,US,M
|
||||
468,2022,SE,FT,Data Engineer,100000,USD,100000,US,100,US,M
|
||||
469,2022,SE,FT,Data Scientist,140000,USD,140000,US,100,US,M
|
||||
470,2022,MI,FT,Data Analyst,135000,USD,135000,US,100,US,M
|
||||
471,2022,MI,FT,Data Analyst,50000,USD,50000,US,100,US,M
|
||||
472,2022,SE,FT,Data Scientist,220000,USD,220000,US,100,US,M
|
||||
473,2022,SE,FT,Data Scientist,140000,USD,140000,US,100,US,M
|
||||
474,2022,MI,FT,Data Scientist,140000,GBP,183228,GB,0,GB,M
|
||||
475,2022,MI,FT,Data Scientist,70000,GBP,91614,GB,0,GB,M
|
||||
476,2022,SE,FT,Data Scientist,185100,USD,185100,US,100,US,M
|
||||
477,2022,SE,FT,Machine Learning Engineer,220000,USD,220000,US,100,US,M
|
||||
478,2022,MI,FT,Data Scientist,200000,USD,200000,US,100,US,M
|
||||
479,2022,MI,FT,Data Scientist,120000,USD,120000,US,100,US,M
|
||||
480,2022,SE,FT,Machine Learning Engineer,120000,USD,120000,AE,100,AE,S
|
||||
481,2022,SE,FT,Machine Learning Engineer,65000,USD,65000,AE,100,AE,S
|
||||
482,2022,EX,FT,Data Engineer,324000,USD,324000,US,100,US,M
|
||||
483,2022,EX,FT,Data Engineer,216000,USD,216000,US,100,US,M
|
||||
484,2022,SE,FT,Data Engineer,210000,USD,210000,US,100,US,M
|
||||
485,2022,SE,FT,Machine Learning Engineer,120000,USD,120000,US,100,US,M
|
||||
486,2022,SE,FT,Data Scientist,230000,USD,230000,US,100,US,M
|
||||
487,2022,EN,PT,Data Scientist,100000,USD,100000,DZ,50,DZ,M
|
||||
488,2022,MI,FL,Data Scientist,100000,USD,100000,CA,100,US,M
|
||||
489,2022,EN,CT,Applied Machine Learning Scientist,29000,EUR,31875,TN,100,CZ,M
|
||||
490,2022,SE,FT,Head of Data,200000,USD,200000,MY,100,US,M
|
||||
491,2022,MI,FT,Principal Data Analyst,75000,USD,75000,CA,100,CA,S
|
||||
492,2022,MI,FT,Data Scientist,150000,PLN,35590,PL,100,PL,L
|
||||
493,2022,SE,FT,Machine Learning Developer,100000,CAD,78791,CA,100,CA,M
|
||||
494,2022,SE,FT,Data Scientist,100000,USD,100000,BR,100,US,M
|
||||
495,2022,MI,FT,Machine Learning Scientist,153000,USD,153000,US,50,US,M
|
||||
496,2022,EN,FT,Data Engineer,52800,EUR,58035,PK,100,DE,M
|
||||
497,2022,SE,FT,Data Scientist,165000,USD,165000,US,100,US,M
|
||||
498,2022,SE,FT,Research Scientist,85000,EUR,93427,FR,50,FR,L
|
||||
499,2022,EN,FT,Data Scientist,66500,CAD,52396,CA,100,CA,L
|
||||
500,2022,SE,FT,Machine Learning Engineer,57000,EUR,62651,NL,100,NL,L
|
||||
501,2022,MI,FT,Head of Data,30000,EUR,32974,EE,100,EE,S
|
||||
502,2022,EN,FT,Data Scientist,40000,USD,40000,JP,100,MY,L
|
||||
503,2022,MI,FT,Machine Learning Engineer,121000,AUD,87425,AU,100,AU,L
|
||||
504,2022,SE,FT,Data Engineer,115000,USD,115000,US,100,US,M
|
||||
505,2022,EN,FT,Data Scientist,120000,AUD,86703,AU,50,AU,M
|
||||
506,2022,MI,FT,Applied Machine Learning Scientist,75000,USD,75000,BO,100,US,L
|
||||
507,2022,MI,FT,Research Scientist,59000,EUR,64849,AT,0,AT,L
|
||||
508,2022,EN,FT,Research Scientist,120000,USD,120000,US,100,US,L
|
||||
509,2022,MI,FT,Applied Data Scientist,157000,USD,157000,US,100,US,L
|
||||
510,2022,EN,FT,Computer Vision Software Engineer,150000,USD,150000,AU,100,AU,S
|
||||
511,2022,MI,FT,Business Data Analyst,90000,CAD,70912,CA,50,CA,L
|
||||
512,2022,EN,FT,Data Engineer,65000,USD,65000,US,100,US,S
|
||||
513,2022,SE,FT,Machine Learning Engineer,65000,EUR,71444,IE,100,IE,S
|
||||
514,2022,EN,FT,Data Analytics Engineer,20000,USD,20000,PK,0,PK,M
|
||||
515,2022,MI,FT,Data Scientist,48000,USD,48000,RU,100,US,S
|
||||
516,2022,SE,FT,Data Science Manager,152500,USD,152500,US,100,US,M
|
||||
517,2022,MI,FT,Data Engineer,62000,EUR,68147,FR,100,FR,M
|
||||
518,2022,MI,FT,Data Scientist,115000,CHF,122346,CH,0,CH,L
|
||||
519,2022,SE,FT,Applied Data Scientist,380000,USD,380000,US,100,US,L
|
||||
520,2022,MI,FT,Data Scientist,88000,CAD,69336,CA,100,CA,M
|
||||
521,2022,EN,FT,Computer Vision Engineer,10000,USD,10000,PT,100,LU,M
|
||||
522,2022,MI,FT,Data Analyst,20000,USD,20000,GR,100,GR,S
|
||||
523,2022,SE,FT,Data Analytics Lead,405000,USD,405000,US,100,US,L
|
||||
524,2022,MI,FT,Data Scientist,135000,USD,135000,US,100,US,L
|
||||
525,2022,SE,FT,Applied Data Scientist,177000,USD,177000,US,100,US,L
|
||||
526,2022,MI,FT,Data Scientist,78000,USD,78000,US,100,US,M
|
||||
527,2022,SE,FT,Data Analyst,135000,USD,135000,US,100,US,M
|
||||
528,2022,SE,FT,Data Analyst,100000,USD,100000,US,100,US,M
|
||||
529,2022,SE,FT,Data Analyst,90320,USD,90320,US,100,US,M
|
||||
530,2022,MI,FT,Data Analyst,85000,USD,85000,CA,0,CA,M
|
||||
531,2022,MI,FT,Data Analyst,75000,USD,75000,CA,0,CA,M
|
||||
532,2022,SE,FT,Machine Learning Engineer,214000,USD,214000,US,100,US,M
|
||||
533,2022,SE,FT,Machine Learning Engineer,192600,USD,192600,US,100,US,M
|
||||
534,2022,SE,FT,Data Architect,266400,USD,266400,US,100,US,M
|
||||
535,2022,SE,FT,Data Architect,213120,USD,213120,US,100,US,M
|
||||
536,2022,SE,FT,Data Analyst,112900,USD,112900,US,100,US,M
|
||||
537,2022,SE,FT,Data Engineer,155000,USD,155000,US,100,US,M
|
||||
538,2022,MI,FT,Data Scientist,141300,USD,141300,US,0,US,M
|
||||
539,2022,MI,FT,Data Scientist,102100,USD,102100,US,0,US,M
|
||||
540,2022,SE,FT,Data Analyst,115934,USD,115934,US,100,US,M
|
||||
541,2022,SE,FT,Data Analyst,81666,USD,81666,US,100,US,M
|
||||
542,2022,MI,FT,Data Engineer,206699,USD,206699,US,0,US,M
|
||||
543,2022,MI,FT,Data Engineer,99100,USD,99100,US,0,US,M
|
||||
544,2022,SE,FT,Data Engineer,130000,USD,130000,US,100,US,M
|
||||
545,2022,SE,FT,Data Engineer,115000,USD,115000,US,100,US,M
|
||||
546,2022,SE,FT,Data Engineer,110500,USD,110500,US,100,US,M
|
||||
547,2022,SE,FT,Data Engineer,130000,USD,130000,US,100,US,M
|
||||
548,2022,SE,FT,Data Analyst,99050,USD,99050,US,100,US,M
|
||||
549,2022,SE,FT,Data Engineer,160000,USD,160000,US,100,US,M
|
||||
550,2022,SE,FT,Data Scientist,205300,USD,205300,US,0,US,L
|
||||
551,2022,SE,FT,Data Scientist,140400,USD,140400,US,0,US,L
|
||||
552,2022,SE,FT,Data Scientist,176000,USD,176000,US,100,US,M
|
||||
553,2022,SE,FT,Data Scientist,144000,USD,144000,US,100,US,M
|
||||
554,2022,SE,FT,Data Engineer,200100,USD,200100,US,100,US,M
|
||||
555,2022,SE,FT,Data Engineer,160000,USD,160000,US,100,US,M
|
||||
556,2022,SE,FT,Data Engineer,145000,USD,145000,US,100,US,M
|
||||
557,2022,SE,FT,Data Engineer,70500,USD,70500,US,0,US,M
|
||||
558,2022,SE,FT,Data Scientist,205300,USD,205300,US,0,US,M
|
||||
559,2022,SE,FT,Data Scientist,140400,USD,140400,US,0,US,M
|
||||
560,2022,SE,FT,Analytics Engineer,205300,USD,205300,US,0,US,M
|
||||
561,2022,SE,FT,Analytics Engineer,184700,USD,184700,US,0,US,M
|
||||
562,2022,SE,FT,Data Engineer,175100,USD,175100,US,100,US,M
|
||||
563,2022,SE,FT,Data Engineer,140250,USD,140250,US,100,US,M
|
||||
564,2022,SE,FT,Data Analyst,116150,USD,116150,US,100,US,M
|
||||
565,2022,SE,FT,Data Engineer,54000,USD,54000,US,0,US,M
|
||||
566,2022,SE,FT,Data Analyst,170000,USD,170000,US,100,US,M
|
||||
567,2022,MI,FT,Data Analyst,50000,GBP,65438,GB,0,GB,M
|
||||
568,2022,SE,FT,Data Analyst,80000,USD,80000,US,100,US,M
|
||||
569,2022,SE,FT,Data Scientist,140000,USD,140000,US,100,US,M
|
||||
570,2022,SE,FT,Data Scientist,210000,USD,210000,US,100,US,M
|
||||
571,2022,SE,FT,Data Scientist,140000,USD,140000,US,100,US,M
|
||||
572,2022,SE,FT,Data Analyst,100000,USD,100000,US,100,US,M
|
||||
573,2022,SE,FT,Data Analyst,69000,USD,69000,US,100,US,M
|
||||
574,2022,SE,FT,Data Scientist,210000,USD,210000,US,100,US,M
|
||||
575,2022,SE,FT,Data Scientist,140000,USD,140000,US,100,US,M
|
||||
576,2022,SE,FT,Data Scientist,210000,USD,210000,US,100,US,M
|
||||
577,2022,SE,FT,Data Analyst,150075,USD,150075,US,100,US,M
|
||||
578,2022,SE,FT,Data Engineer,100000,USD,100000,US,100,US,M
|
||||
579,2022,SE,FT,Data Engineer,25000,USD,25000,US,100,US,M
|
||||
580,2022,SE,FT,Data Analyst,126500,USD,126500,US,100,US,M
|
||||
581,2022,SE,FT,Data Analyst,106260,USD,106260,US,100,US,M
|
||||
582,2022,SE,FT,Data Engineer,220110,USD,220110,US,100,US,M
|
||||
583,2022,SE,FT,Data Engineer,160080,USD,160080,US,100,US,M
|
||||
584,2022,SE,FT,Data Analyst,105000,USD,105000,US,100,US,M
|
||||
585,2022,SE,FT,Data Analyst,110925,USD,110925,US,100,US,M
|
||||
586,2022,MI,FT,Data Analyst,35000,GBP,45807,GB,0,GB,M
|
||||
587,2022,SE,FT,Data Scientist,140000,USD,140000,US,100,US,M
|
||||
588,2022,SE,FT,Data Analyst,99000,USD,99000,US,0,US,M
|
||||
589,2022,SE,FT,Data Analyst,60000,USD,60000,US,100,US,M
|
||||
590,2022,SE,FT,Data Architect,192564,USD,192564,US,100,US,M
|
||||
591,2022,SE,FT,Data Architect,144854,USD,144854,US,100,US,M
|
||||
592,2022,SE,FT,Data Scientist,230000,USD,230000,US,100,US,M
|
||||
593,2022,SE,FT,Data Scientist,150000,USD,150000,US,100,US,M
|
||||
594,2022,SE,FT,Data Analytics Manager,150260,USD,150260,US,100,US,M
|
||||
595,2022,SE,FT,Data Analytics Manager,109280,USD,109280,US,100,US,M
|
||||
596,2022,SE,FT,Data Scientist,210000,USD,210000,US,100,US,M
|
||||
597,2022,SE,FT,Data Analyst,170000,USD,170000,US,100,US,M
|
||||
598,2022,MI,FT,Data Scientist,160000,USD,160000,US,100,US,M
|
||||
599,2022,MI,FT,Data Scientist,130000,USD,130000,US,100,US,M
|
||||
600,2022,EN,FT,Data Analyst,67000,USD,67000,CA,0,CA,M
|
||||
601,2022,EN,FT,Data Analyst,52000,USD,52000,CA,0,CA,M
|
||||
602,2022,SE,FT,Data Engineer,154000,USD,154000,US,100,US,M
|
||||
603,2022,SE,FT,Data Engineer,126000,USD,126000,US,100,US,M
|
||||
604,2022,SE,FT,Data Analyst,129000,USD,129000,US,0,US,M
|
||||
605,2022,SE,FT,Data Analyst,150000,USD,150000,US,100,US,M
|
||||
606,2022,MI,FT,AI Scientist,200000,USD,200000,IN,100,US,L
|
||||
|
@@ -0,0 +1,4 @@
|
||||
duration,floatDuration
|
||||
12 min,1.0
|
||||
15,12.98 sec
|
||||
1 Season,0.9 parsec
|
||||
|
+93282
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,151 @@
|
||||
"sepal.length","sepal.width","petal.length","petal.width","variety"
|
||||
5.1,3.5,1.4,.2,"Setosa"
|
||||
4.9,3,1.4,.2,"Setosa"
|
||||
4.7,3.2,1.3,.2,"Setosa"
|
||||
4.6,3.1,1.5,.2,"Setosa"
|
||||
5,3.6,1.4,.2,"Setosa"
|
||||
5.4,3.9,1.7,.4,"Setosa"
|
||||
4.6,3.4,1.4,.3,"Setosa"
|
||||
5,3.4,1.5,.2,"Setosa"
|
||||
4.4,2.9,1.4,.2,"Setosa"
|
||||
4.9,3.1,1.5,.1,"Setosa"
|
||||
5.4,3.7,1.5,.2,"Setosa"
|
||||
4.8,3.4,1.6,.2,"Setosa"
|
||||
4.8,3,1.4,.1,"Setosa"
|
||||
4.3,3,1.1,.1,"Setosa"
|
||||
5.8,4,1.2,.2,"Setosa"
|
||||
5.7,4.4,1.5,.4,"Setosa"
|
||||
5.4,3.9,1.3,.4,"Setosa"
|
||||
5.1,3.5,1.4,.3,"Setosa"
|
||||
5.7,3.8,1.7,.3,"Setosa"
|
||||
5.1,3.8,1.5,.3,"Setosa"
|
||||
5.4,3.4,1.7,.2,"Setosa"
|
||||
5.1,3.7,1.5,.4,"Setosa"
|
||||
4.6,3.6,1,.2,"Setosa"
|
||||
5.1,3.3,1.7,.5,"Setosa"
|
||||
4.8,3.4,1.9,.2,"Setosa"
|
||||
5,3,1.6,.2,"Setosa"
|
||||
5,3.4,1.6,.4,"Setosa"
|
||||
5.2,3.5,1.5,.2,"Setosa"
|
||||
5.2,3.4,1.4,.2,"Setosa"
|
||||
4.7,3.2,1.6,.2,"Setosa"
|
||||
4.8,3.1,1.6,.2,"Setosa"
|
||||
5.4,3.4,1.5,.4,"Setosa"
|
||||
5.2,4.1,1.5,.1,"Setosa"
|
||||
5.5,4.2,1.4,.2,"Setosa"
|
||||
4.9,3.1,1.5,.2,"Setosa"
|
||||
5,3.2,1.2,.2,"Setosa"
|
||||
5.5,3.5,1.3,.2,"Setosa"
|
||||
4.9,3.6,1.4,.1,"Setosa"
|
||||
4.4,3,1.3,.2,"Setosa"
|
||||
5.1,3.4,1.5,.2,"Setosa"
|
||||
5,3.5,1.3,.3,"Setosa"
|
||||
4.5,2.3,1.3,.3,"Setosa"
|
||||
4.4,3.2,1.3,.2,"Setosa"
|
||||
5,3.5,1.6,.6,"Setosa"
|
||||
5.1,3.8,1.9,.4,"Setosa"
|
||||
4.8,3,1.4,.3,"Setosa"
|
||||
5.1,3.8,1.6,.2,"Setosa"
|
||||
4.6,3.2,1.4,.2,"Setosa"
|
||||
5.3,3.7,1.5,.2,"Setosa"
|
||||
5,3.3,1.4,.2,"Setosa"
|
||||
7,3.2,4.7,1.4,"Versicolor"
|
||||
6.4,3.2,4.5,1.5,"Versicolor"
|
||||
6.9,3.1,4.9,1.5,"Versicolor"
|
||||
5.5,2.3,4,1.3,"Versicolor"
|
||||
6.5,2.8,4.6,1.5,"Versicolor"
|
||||
5.7,2.8,4.5,1.3,"Versicolor"
|
||||
6.3,3.3,4.7,1.6,"Versicolor"
|
||||
4.9,2.4,3.3,1,"Versicolor"
|
||||
6.6,2.9,4.6,1.3,"Versicolor"
|
||||
5.2,2.7,3.9,1.4,"Versicolor"
|
||||
5,2,3.5,1,"Versicolor"
|
||||
5.9,3,4.2,1.5,"Versicolor"
|
||||
6,2.2,4,1,"Versicolor"
|
||||
6.1,2.9,4.7,1.4,"Versicolor"
|
||||
5.6,2.9,3.6,1.3,"Versicolor"
|
||||
6.7,3.1,4.4,1.4,"Versicolor"
|
||||
5.6,3,4.5,1.5,"Versicolor"
|
||||
5.8,2.7,4.1,1,"Versicolor"
|
||||
6.2,2.2,4.5,1.5,"Versicolor"
|
||||
5.6,2.5,3.9,1.1,"Versicolor"
|
||||
5.9,3.2,4.8,1.8,"Versicolor"
|
||||
6.1,2.8,4,1.3,"Versicolor"
|
||||
6.3,2.5,4.9,1.5,"Versicolor"
|
||||
6.1,2.8,4.7,1.2,"Versicolor"
|
||||
6.4,2.9,4.3,1.3,"Versicolor"
|
||||
6.6,3,4.4,1.4,"Versicolor"
|
||||
6.8,2.8,4.8,1.4,"Versicolor"
|
||||
6.7,3,5,1.7,"Versicolor"
|
||||
6,2.9,4.5,1.5,"Versicolor"
|
||||
5.7,2.6,3.5,1,"Versicolor"
|
||||
5.5,2.4,3.8,1.1,"Versicolor"
|
||||
5.5,2.4,3.7,1,"Versicolor"
|
||||
5.8,2.7,3.9,1.2,"Versicolor"
|
||||
6,2.7,5.1,1.6,"Versicolor"
|
||||
5.4,3,4.5,1.5,"Versicolor"
|
||||
6,3.4,4.5,1.6,"Versicolor"
|
||||
6.7,3.1,4.7,1.5,"Versicolor"
|
||||
6.3,2.3,4.4,1.3,"Versicolor"
|
||||
5.6,3,4.1,1.3,"Versicolor"
|
||||
5.5,2.5,4,1.3,"Versicolor"
|
||||
5.5,2.6,4.4,1.2,"Versicolor"
|
||||
6.1,3,4.6,1.4,"Versicolor"
|
||||
5.8,2.6,4,1.2,"Versicolor"
|
||||
5,2.3,3.3,1,"Versicolor"
|
||||
5.6,2.7,4.2,1.3,"Versicolor"
|
||||
5.7,3,4.2,1.2,"Versicolor"
|
||||
5.7,2.9,4.2,1.3,"Versicolor"
|
||||
6.2,2.9,4.3,1.3,"Versicolor"
|
||||
5.1,2.5,3,1.1,"Versicolor"
|
||||
5.7,2.8,4.1,1.3,"Versicolor"
|
||||
6.3,3.3,6,2.5,"Virginica"
|
||||
5.8,2.7,5.1,1.9,"Virginica"
|
||||
7.1,3,5.9,2.1,"Virginica"
|
||||
6.3,2.9,5.6,1.8,"Virginica"
|
||||
6.5,3,5.8,2.2,"Virginica"
|
||||
7.6,3,6.6,2.1,"Virginica"
|
||||
4.9,2.5,4.5,1.7,"Virginica"
|
||||
7.3,2.9,6.3,1.8,"Virginica"
|
||||
6.7,2.5,5.8,1.8,"Virginica"
|
||||
7.2,3.6,6.1,2.5,"Virginica"
|
||||
6.5,3.2,5.1,2,"Virginica"
|
||||
6.4,2.7,5.3,1.9,"Virginica"
|
||||
6.8,3,5.5,2.1,"Virginica"
|
||||
5.7,2.5,5,2,"Virginica"
|
||||
5.8,2.8,5.1,2.4,"Virginica"
|
||||
6.4,3.2,5.3,2.3,"Virginica"
|
||||
6.5,3,5.5,1.8,"Virginica"
|
||||
7.7,3.8,6.7,2.2,"Virginica"
|
||||
7.7,2.6,6.9,2.3,"Virginica"
|
||||
6,2.2,5,1.5,"Virginica"
|
||||
6.9,3.2,5.7,2.3,"Virginica"
|
||||
5.6,2.8,4.9,2,"Virginica"
|
||||
7.7,2.8,6.7,2,"Virginica"
|
||||
6.3,2.7,4.9,1.8,"Virginica"
|
||||
6.7,3.3,5.7,2.1,"Virginica"
|
||||
7.2,3.2,6,1.8,"Virginica"
|
||||
6.2,2.8,4.8,1.8,"Virginica"
|
||||
6.1,3,4.9,1.8,"Virginica"
|
||||
6.4,2.8,5.6,2.1,"Virginica"
|
||||
7.2,3,5.8,1.6,"Virginica"
|
||||
7.4,2.8,6.1,1.9,"Virginica"
|
||||
7.9,3.8,6.4,2,"Virginica"
|
||||
6.4,2.8,5.6,2.2,"Virginica"
|
||||
6.3,2.8,5.1,1.5,"Virginica"
|
||||
6.1,2.6,5.6,1.4,"Virginica"
|
||||
7.7,3,6.1,2.3,"Virginica"
|
||||
6.3,3.4,5.6,2.4,"Virginica"
|
||||
6.4,3.1,5.5,1.8,"Virginica"
|
||||
6,3,4.8,1.8,"Virginica"
|
||||
6.9,3.1,5.4,2.1,"Virginica"
|
||||
6.7,3.1,5.6,2.4,"Virginica"
|
||||
6.9,3.1,5.1,2.3,"Virginica"
|
||||
5.8,2.7,5.1,1.9,"Virginica"
|
||||
6.8,3.2,5.9,2.3,"Virginica"
|
||||
6.7,3.3,5.7,2.5,"Virginica"
|
||||
6.7,3,5.2,2.3,"Virginica"
|
||||
6.3,2.5,5,1.9,"Virginica"
|
||||
6.5,3,5.2,2,"Virginica"
|
||||
6.2,3.4,5.4,2.3,"Virginica"
|
||||
5.9,3,5.1,1.8,"Virginica"
|
||||
|
Binary file not shown.
@@ -0,0 +1,84 @@
|
||||
name,genus,vore,order,conservation,sleep_total,sleep_rem,sleep_cycle,awake,brainwt,bodywt
|
||||
Cheetah,Acinonyx,carni,Carnivora,lc,12.1,nothing,NA,11.9,NA,50
|
||||
Owl monkey,Aotus,omni,Primates,NA,17,1.8,NA,7,0.0155,0.48
|
||||
Mountain beaver,Aplodontia,herbi,Rodentia,nt,14.4,2.4,NA,9.6,NA,1.35
|
||||
Greater short-tailed shrew,Blarina,omni,Soricomorpha,lc,14.9,2.3,0.133333333,9.1,0.00029,0.019
|
||||
Cow,Bos,herbi,Artiodactyla,domesticated,4,0.7,0.666666667,20,0.423,600
|
||||
Three-toed sloth,Bradypus,herbi,Pilosa,NA,14.4,2.2,0.766666667,9.6,NA,3.85
|
||||
Northern fur seal,Callorhinus,carni,Carnivora,vu,8.7,1.4,0.383333333,15.3,NA,20.49
|
||||
Vesper mouse,Calomys,NA,Rodentia,NA,7,NA,NA,17,NA,0.045
|
||||
Dog,Canis,carni,Carnivora,domesticated,10.1,2.9,0.333333333,13.9,0.07,14
|
||||
Roe deer,Capreolus,herbi,Artiodactyla,lc,3,NA,NA,21,0.0982,14.8
|
||||
Goat,Capri,herbi,Artiodactyla,lc,5.3,0.6,NA,18.7,0.115,33.5
|
||||
Guinea pig,Cavis,herbi,Rodentia,domesticated,9.4,0.8,0.216666667,14.6,0.0055,0.728
|
||||
Grivet,Cercopithecus,omni,Primates,lc,10,0.7,NA,14,NA,4.75
|
||||
Chinchilla,Chinchilla,herbi,Rodentia,domesticated,12.5,1.5,0.116666667,11.5,0.0064,0.42
|
||||
Star-nosed mole,Condylura,omni,Soricomorpha,lc,10.3,2.2,NA,13.7,0.001,0.06
|
||||
African giant pouched rat,Cricetomys,omni,Rodentia,NA,8.3,2,NA,15.7,0.0066,1
|
||||
Lesser short-tailed shrew,Cryptotis,omni,Soricomorpha,lc,9.1,1.4,0.15,14.9,0.00014,0.005
|
||||
Long-nosed armadillo,Dasypus,carni,Cingulata,lc,17.4,3.1,0.383333333,6.6,0.0108,3.5
|
||||
Tree hyrax,Dendrohyrax,herbi,Hyracoidea,lc,5.3,0.5,NA,18.7,0.0123,2.95
|
||||
North American Opossum,Didelphis,omni,Didelphimorphia,lc,18,4.9,0.333333333,6,0.0063,1.7
|
||||
Asian elephant,Elephas,herbi,Proboscidea,en,3.9,NA,NA,20.1,4.603,2547
|
||||
Big brown bat,Eptesicus,insecti,Chiroptera,lc,19.7,3.9,0.116666667,4.3,3e-04,0.023
|
||||
Horse,Equus,herbi,Perissodactyla,domesticated,2.9,0.6,1,21.1,0.655,521
|
||||
Donkey,Equus,herbi,Perissodactyla,domesticated,3.1,0.4,NA,20.9,0.419,187
|
||||
European hedgehog,Erinaceus,omni,Erinaceomorpha,lc,10.1,3.5,0.283333333,13.9,0.0035,0.77
|
||||
Patas monkey,Erythrocebus,omni,Primates,lc,10.9,1.1,NA,13.1,0.115,10
|
||||
Western american chipmunk,Eutamias,herbi,Rodentia,NA,14.9,NA,NA,9.1,NA,0.071
|
||||
Domestic cat,Felis,carni,Carnivora,domesticated,12.5,3.2,0.416666667,11.5,0.0256,3.3
|
||||
Galago,Galago,omni,Primates,NA,9.8,1.1,0.55,14.2,0.005,0.2
|
||||
Giraffe,Giraffa,herbi,Artiodactyla,cd,1.9,0.4,NA,22.1,NA,899.995
|
||||
Pilot whale,Globicephalus,carni,Cetacea,cd,2.7,0.1,NA,21.35,NA,800
|
||||
Gray seal,Haliochoerus,carni,Carnivora,lc,6.2,1.5,NA,17.8,0.325,85
|
||||
Gray hyrax,Heterohyrax,herbi,Hyracoidea,lc,6.3,0.6,NA,17.7,0.01227,2.625
|
||||
Human,Homo,omni,Primates,NA,8,1.9,1.5,16,1.32,62
|
||||
Mongoose lemur,Lemur,herbi,Primates,vu,9.5,0.9,NA,14.5,NA,1.67
|
||||
African elephant,Loxodonta,herbi,Proboscidea,vu,3.3,NA,NA,20.7,5.712,6654
|
||||
Thick-tailed opposum,Lutreolina,carni,Didelphimorphia,lc,19.4,6.6,NA,4.6,NA,0.37
|
||||
Macaque,Macaca,omni,Primates,NA,10.1,1.2,0.75,13.9,0.179,6.8
|
||||
Mongolian gerbil,Meriones,herbi,Rodentia,lc,14.2,1.9,NA,9.8,NA,0.053
|
||||
Golden hamster,Mesocricetus,herbi,Rodentia,en,14.3,3.1,0.2,9.7,0.001,0.12
|
||||
Vole ,Microtus,herbi,Rodentia,NA,12.8,NA,NA,11.2,NA,0.035
|
||||
House mouse,Mus,herbi,Rodentia,nt,12.5,1.4,0.183333333,11.5,4e-04,0.022
|
||||
Little brown bat,Myotis,insecti,Chiroptera,NA,19.9,2,0.2,4.1,0.00025,0.01
|
||||
Round-tailed muskrat,Neofiber,herbi,Rodentia,nt,14.6,NA,NA,9.4,NA,0.266
|
||||
Slow loris,Nyctibeus,carni,Primates,NA,11,NA,NA,13,0.0125,1.4
|
||||
Degu,Octodon,herbi,Rodentia,lc,7.7,0.9,NA,16.3,NA,0.21
|
||||
Northern grasshopper mouse,Onychomys,carni,Rodentia,lc,14.5,NA,NA,9.5,NA,0.028
|
||||
Rabbit,Oryctolagus,herbi,Lagomorpha,domesticated,8.4,0.9,0.416666667,15.6,0.0121,2.5
|
||||
Sheep,Ovis,herbi,Artiodactyla,domesticated,3.8,0.6,NA,20.2,0.175,55.5
|
||||
Chimpanzee,Pan,omni,Primates,NA,9.7,1.4,1.416666667,14.3,0.44,52.2
|
||||
Tiger,Panthera,carni,Carnivora,en,15.8,NA,NA,8.2,NA,162.564
|
||||
Jaguar,Panthera,carni,Carnivora,nt,10.4,NA,NA,13.6,0.157,100
|
||||
Lion,Panthera,carni,Carnivora,vu,13.5,NA,NA,10.5,NA,161.499
|
||||
Baboon,Papio,omni,Primates,NA,9.4,1,0.666666667,14.6,0.18,25.235
|
||||
Desert hedgehog,Paraechinus,NA,Erinaceomorpha,lc,10.3,2.7,NA,13.7,0.0024,0.55
|
||||
Potto,Perodicticus,omni,Primates,lc,11,NA,NA,13,NA,1.1
|
||||
Deer mouse,Peromyscus,NA,Rodentia,NA,11.5,NA,NA,12.5,NA,0.021
|
||||
Phalanger,Phalanger,NA,Diprotodontia,NA,13.7,1.8,NA,10.3,0.0114,1.62
|
||||
Caspian seal,Phoca,carni,Carnivora,vu,3.5,0.4,NA,20.5,NA,86
|
||||
Common porpoise,Phocoena,carni,Cetacea,vu,5.6,NA,NA,18.45,NA,53.18
|
||||
Potoroo,Potorous,herbi,Diprotodontia,NA,11.1,1.5,NA,12.9,NA,1.1
|
||||
Giant armadillo,Priodontes,insecti,Cingulata,en,18.1,6.1,NA,5.9,0.081,60
|
||||
Rock hyrax,Procavia,NA,Hyracoidea,lc,5.4,0.5,NA,18.6,0.021,3.6
|
||||
Laboratory rat,Rattus,herbi,Rodentia,lc,13,2.4,0.183333333,11,0.0019,0.32
|
||||
African striped mouse,Rhabdomys,omni,Rodentia,NA,8.7,NA,NA,15.3,NA,0.044
|
||||
Squirrel monkey,Saimiri,omni,Primates,NA,9.6,1.4,NA,14.4,0.02,0.743
|
||||
Eastern american mole,Scalopus,insecti,Soricomorpha,lc,8.4,2.1,0.166666667,15.6,0.0012,0.075
|
||||
Cotton rat,Sigmodon,herbi,Rodentia,NA,11.3,1.1,0.15,12.7,0.00118,0.148
|
||||
Mole rat,Spalax,NA,Rodentia,NA,10.6,2.4,NA,13.4,0.003,0.122
|
||||
Arctic ground squirrel,Spermophilus,herbi,Rodentia,lc,16.6,NA,NA,7.4,0.0057,0.92
|
||||
Thirteen-lined ground squirrel,Spermophilus,herbi,Rodentia,lc,13.8,3.4,0.216666667,10.2,0.004,0.101
|
||||
Golden-mantled ground squirrel,Spermophilus,herbi,Rodentia,lc,15.9,3,NA,8.1,NA,0.205
|
||||
Musk shrew,Suncus,NA,Soricomorpha,NA,12.8,2,0.183333333,11.2,0.00033,0.048
|
||||
Pig,Sus,omni,Artiodactyla,domesticated,9.1,2.4,0.5,14.9,0.18,86.25
|
||||
Short-nosed echidna,Tachyglossus,insecti,Monotremata,NA,8.6,NA,NA,15.4,0.025,4.5
|
||||
Eastern american chipmunk,Tamias,herbi,Rodentia,NA,15.8,NA,NA,8.2,NA,0.112
|
||||
Brazilian tapir,Tapirus,herbi,Perissodactyla,vu,4.4,1,0.9,19.6,0.169,207.501
|
||||
Tenrec,Tenrec,omni,Afrosoricida,NA,15.6,2.3,NA,8.4,0.0026,0.9
|
||||
Tree shrew,Tupaia,omni,Scandentia,NA,8.9,2.6,0.233333333,15.1,0.0025,0.104
|
||||
Bottle-nosed dolphin,Tursiops,carni,Cetacea,NA,5.2,NA,NA,18.8,NA,173.33
|
||||
Genet,Genetta,carni,Carnivora,NA,6.3,1.3,NA,17.7,0.0175,2
|
||||
Arctic fox,Vulpes,carni,Carnivora,NA,12.5,NA,NA,11.5,0.0445,3.38
|
||||
Red fox,Vulpes,carni,Carnivora,NA,9.8,2.4,0.35,14.2,0.0504,4.23
|
||||
|
Binary file not shown.
@@ -0,0 +1,34 @@
|
||||
# SLF4J's SimpleLogger configuration file
|
||||
# Simple implementation of Logger that sends all enabled log messages, for all defined loggers, to System.err.
|
||||
|
||||
# Default logging detail level for all instances of SimpleLogger.
|
||||
# Must be one of ("trace", "debug", "info", "warn", or "error").
|
||||
# If not specified, defaults to "info".
|
||||
org.slf4j.simpleLogger.defaultLogLevel=debug
|
||||
|
||||
# Logging detail level for a SimpleLogger instance named "xxxxx".
|
||||
# Must be one of ("trace", "debug", "info", "warn", or "error").
|
||||
# If not specified, the default logging detail level is used.
|
||||
#org.slf4j.simpleLogger.log.xxxxx=
|
||||
|
||||
# Set to true if you want the current date and time to be included in output messages.
|
||||
# Default is false, and will output the number of milliseconds elapsed since startup.
|
||||
org.slf4j.simpleLogger.showDateTime=true
|
||||
|
||||
# The date and time format to be used in the output messages.
|
||||
# The pattern describing the date and time format is the same that is used in java.text.SimpleDateFormat.
|
||||
# If the format is not specified or is invalid, the default format is used.
|
||||
# The default format is yyyy-MM-dd HH:mm:ss:SSS Z.
|
||||
org.slf4j.simpleLogger.dateTimeFormat=yyyy-MM-dd HH:mm:ss:SSS Z
|
||||
|
||||
# Set to true if you want to output the current thread name.
|
||||
# Defaults to true.
|
||||
org.slf4j.simpleLogger.showThreadName=true
|
||||
|
||||
# Set to true if you want the Logger instance name to be included in output messages.
|
||||
# Defaults to true.
|
||||
org.slf4j.simpleLogger.showLogName=true
|
||||
|
||||
# Set to true if you want the last component of the name to be included in output messages.
|
||||
# Defaults to false.
|
||||
#org.slf4j.simpleLogger.showShortLogName=false
|
||||
@@ -0,0 +1,3 @@
|
||||
12|tuv|0.12|true
|
||||
41|xyz|3.6|not assigned
|
||||
89|abc|7.1|false
|
||||
|
Binary file not shown.
|
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,6 @@
|
||||
,user_id,name,duplicate,username,duplicate,duplicate1,double,number,time,empty
|
||||
0,4,George,,abc,a,null,1203,599.213,2021-01-07 15:12:32,null
|
||||
1,5,Paul,,paul,null,null,N/A,214.211,2021-01-14 14:36:19,null
|
||||
2,8,Johnny,,qwerty,b,null,20,412.214,2021-02-23 19:47:00,null
|
||||
3,10,Jack,,buk,N/A,null,2414,01.01,2021-03-08 23:38:52,null
|
||||
4,12,Samuel,,qwerty,NA,null,inf,00,2021-04-01 02:30:22,null
|
||||
|
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,6 @@
|
||||
;user_id;name;duplicate;username;duplicate;duplicate1;double;number;time;empty
|
||||
0;4;George;"";abc;a;null;1203;599,213;2021-01-07 15:12:32;null
|
||||
1;5;Paul;"";paul;null;null;N/A;214,211;2021-01-14 14:36:19;null
|
||||
2;8;Johnny;;qwerty;b;null;20;412,214;2021-02-23 19:47:00;null
|
||||
3;10;Jack;"";buk;N/A;null;2414;01,01;2021-03-08 23:38:52;null
|
||||
4;12;Samuel;"";qwerty;NA;null;inf;00;2021-04-01 02:30:22;null
|
||||
|
Binary file not shown.
@@ -0,0 +1,10 @@
|
||||
"fixed acidity";"volatile acidity";"citric acid";"residual sugar";"chlorides";"free sulfur dioxide";"total sulfur dioxide";"density";"pH";"sulphates";"alcohol";"quality"
|
||||
7.4;0.7;0;1.9;0.076;11;34;0.9978;3.51;0.56;9.4;5
|
||||
7.8;0.88;0;2.6;0.098;25;67;0.9968;3.2;0.68;9.8;5
|
||||
7.8;0.76;0.04;2.3;0.092;15;54;0.997;3.26;0.65;9.8;5
|
||||
11.2;0.28;0.56;1.9;0.075;17;60;0.998;3.16;0.58;9.8;6
|
||||
7.4;0.7;0;1.9;0.076;11;34;0.9978;3.51;0.56;9.4;5
|
||||
7.4;0.66;0;1.8;0.075;13;40;0.9978;3.51;0.56;9.4;5
|
||||
7.9;0.6;0.06;1.6;0.069;15;59;0.9964;3.3;0.46;9.4;5
|
||||
7.3;0.65;0;1.2;0.065;15;21;0.9946;3.39;0.47;10;7
|
||||
7.8;0.58;0.02;2;0.073;9;18;0.9968;3.36;0.57;9.5;7
|
||||
|
@@ -0,0 +1,3 @@
|
||||
Column1;Column2
|
||||
0,25;18
|
||||
1,24;19
|
||||
|
Reference in New Issue
Block a user