1670 lines
78 KiB
HTML
Vendored
1670 lines
78 KiB
HTML
Vendored
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
|
<!-- NewPage -->
|
|
<html lang="en">
|
|
<head>
|
|
<!-- Generated by javadoc (1.8.0_312) on Sun Dec 11 09:30:03 MST 2022 -->
|
|
<title>TMD (tech.ml.dataset Documentation)</title>
|
|
<meta name="date" content="2022-12-11">
|
|
<link rel="stylesheet" type="text/css" href="../../stylesheet.css" title="Style">
|
|
<script type="text/javascript" src="../../script.js"></script>
|
|
</head>
|
|
<body>
|
|
<script type="text/javascript"><!--
|
|
try {
|
|
if (location.href.indexOf('is-external=true') == -1) {
|
|
parent.document.title="TMD (tech.ml.dataset Documentation)";
|
|
}
|
|
}
|
|
catch(err) {
|
|
}
|
|
//-->
|
|
var methods = {"i0":9,"i1":9,"i2":9,"i3":9,"i4":9,"i5":9,"i6":9,"i7":9,"i8":9,"i9":9,"i10":9,"i11":9,"i12":9,"i13":9,"i14":9,"i15":9,"i16":9,"i17":9,"i18":9,"i19":9,"i20":9,"i21":9,"i22":9,"i23":9,"i24":9,"i25":9,"i26":9,"i27":9,"i28":9,"i29":9,"i30":9,"i31":9,"i32":9,"i33":9,"i34":9,"i35":9,"i36":9,"i37":9,"i38":9,"i39":9,"i40":9,"i41":9,"i42":9,"i43":9,"i44":9,"i45":9,"i46":9,"i47":9,"i48":9,"i49":9,"i50":9,"i51":9,"i52":9,"i53":9,"i54":9,"i55":9,"i56":9,"i57":9,"i58":9,"i59":9,"i60":9,"i61":9,"i62":9};
|
|
var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],8:["t4","Concrete Methods"]};
|
|
var altColor = "altColor";
|
|
var rowColor = "rowColor";
|
|
var tableTab = "tableTab";
|
|
var activeTableTab = "activeTableTab";
|
|
</script>
|
|
<noscript>
|
|
<div>JavaScript is disabled on your browser.</div>
|
|
</noscript>
|
|
<!-- ========= START OF TOP NAVBAR ======= -->
|
|
<div class="topNav"><a name="navbar.top">
|
|
<!-- -->
|
|
</a>
|
|
<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
|
|
<a name="navbar.top.firstrow">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="navList" title="Navigation">
|
|
<li><a href="../../overview-summary.html">Overview</a></li>
|
|
<li><a href="package-summary.html">Package</a></li>
|
|
<li class="navBarCell1Rev">Class</li>
|
|
<li><a href="package-tree.html">Tree</a></li>
|
|
<li><a href="../../deprecated-list.html">Deprecated</a></li>
|
|
<li><a href="../../index-all.html">Index</a></li>
|
|
<li><a href="../../help-doc.html">Help</a></li>
|
|
</ul>
|
|
<div class="aboutLanguage"><script async src="https://www.googletagmanager.com/gtag/js?id=G-RGTB4J7LGP"</script><script>window.dataLayer = window.dataLayer || [];\nfunction gtag(){dataLayer.push(arguments);}\ngtag('js', new Date());\ngtag('config', 'G-RGTB4J7LGP');</div>
|
|
</div>
|
|
<div class="subNav">
|
|
<ul class="navList">
|
|
<li>Prev Class</li>
|
|
<li>Next Class</li>
|
|
</ul>
|
|
<ul class="navList">
|
|
<li><a href="../../index.html?tech/v3/TMD.html" target="_top">Frames</a></li>
|
|
<li><a href="TMD.html" target="_top">No Frames</a></li>
|
|
</ul>
|
|
<ul class="navList" id="allclasses_navbar_top">
|
|
<li><a href="../../allclasses-noframe.html">All Classes</a></li>
|
|
</ul>
|
|
<div>
|
|
<script type="text/javascript"><!--
|
|
allClassesLink = document.getElementById("allclasses_navbar_top");
|
|
if(window==top) {
|
|
allClassesLink.style.display = "block";
|
|
}
|
|
else {
|
|
allClassesLink.style.display = "none";
|
|
}
|
|
//-->
|
|
</script>
|
|
</div>
|
|
<div>
|
|
<ul class="subNavList">
|
|
<li>Summary: </li>
|
|
<li>Nested | </li>
|
|
<li>Field | </li>
|
|
<li>Constr | </li>
|
|
<li><a href="#method.summary">Method</a></li>
|
|
</ul>
|
|
<ul class="subNavList">
|
|
<li>Detail: </li>
|
|
<li>Field | </li>
|
|
<li>Constr | </li>
|
|
<li><a href="#method.detail">Method</a></li>
|
|
</ul>
|
|
</div>
|
|
<a name="skip.navbar.top">
|
|
<!-- -->
|
|
</a></div>
|
|
<!-- ========= END OF TOP NAVBAR ========= -->
|
|
<!-- ======== START OF CLASS DATA ======== -->
|
|
<div class="header">
|
|
<div class="subTitle">tech.v3</div>
|
|
<h2 title="Class TMD" class="title">Class TMD</h2>
|
|
</div>
|
|
<div class="contentContainer">
|
|
<ul class="inheritance">
|
|
<li>java.lang.Object</li>
|
|
<li>
|
|
<ul class="inheritance">
|
|
<li>tech.v3.TMD</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
<div class="description">
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<hr>
|
|
<br>
|
|
<pre>public class <span class="typeNameLabel">TMD</span>
|
|
extends java.lang.Object</pre>
|
|
<div class="block"><p><code>tech.ml.dataset</code> is a high performance library for processing columnar data similar to pandas or R’ data table. Datasets are <code>maps</code> of their columns and columns derive from various Clojure interfaces such as IIndexed and IFn to make accessing their data as easy as possible.</p>
|
|
<p>Columns have a conversion to a <code>tech.v3.datate.Buffer</code> object accessible via <code>tech.v3.DType.toBuffer()</code> so if you want higher performance non-boxing access that is also available. Any bit of sequential data can be turned into a column. The best way is if the data is already in a primitive array or nio buffer use that as a column - it will be used in place. It is also possible to direclty instantiate a Buffer object in a read-only pathway to create a virtualized column:</p>
|
|
<pre><code class="java">println(head(assoc(colmapDs, kw("c"), new tech.v3.datatype.LongReader() {
|
|
public long lsize() { return 10; }
|
|
public long readLong( long idx) {
|
|
return 2*idx;
|
|
}
|
|
})));
|
|
//testds [5 3]:
|
|
|
|
//| :b | :a | :c |
|
|
//|----:|---:|---:|
|
|
//| 9.0 | 0 | 0 |
|
|
//| 8.0 | 1 | 2 |
|
|
//| 7.0 | 2 | 4 |
|
|
//| 6.0 | 3 | 6 |
|
|
//| 5.0 | 4 | 8 |
|
|
</code></pre>
|
|
<p>Datasets implement a subset of java.util.Map and clojure’s persistent map interfaces. This means you can use various <code>java.util.Map</code> functions and you can also use <code>clojure.core/assoc</code>, <code>clojure.core/dissoc</code>, and <code>clojure.core/merge</code> in order to add and remove columns from the dataset. These are exposed in <code>tech.v3.Clj</code> as equivalently named functions. In combination with the fact that columns implement Clojure.lang.IIndexed providing <code>nth</code> as well as the single arity IFn invoke method you can do a surprising amount of dataset processing without using bespoke TMD functions at all.</p>
|
|
<p>All of the functions in <code>tech.v3.datatype.VecMath</code> will work with column although most of those functions require the columns to have no missing data. The recommendation is to do you missing-value processing first and then move into the various elemwise functions. Integer columns with missing values will upcast themselves to double columns for any math operation so the result keeps consistent w/r/t NaN behavior. Again, ideally missing values should be dealt with before doing operations in the <code>VecMath</code> namespace.</p>
|
|
<p>Most of the functions of the dataset (filter, sort, groupBy) will auto-parallelize but but there are many times where the most efficient use of machine resources is to parallelize a the outermost level using <code>pmapDs</code>. The parallelization primitives check and run in serial mode of the current thread is already in a parallelization pathway.</p></div>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
<div class="summary">
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<!-- ========== METHOD SUMMARY =========== -->
|
|
<ul class="blockList">
|
|
<li class="blockList"><a name="method.summary">
|
|
<!-- -->
|
|
</a>
|
|
<h3>Method Summary</h3>
|
|
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
|
|
<caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd"> </span></span><span id="t1" class="tableTab"><span><a href="javascript:show(1);">Static Methods</a></span><span class="tabEnd"> </span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd"> </span></span></caption>
|
|
<tr>
|
|
<th class="colFirst" scope="col">Modifier and Type</th>
|
|
<th class="colLast" scope="col">Method and Description</th>
|
|
</tr>
|
|
<tr id="i0" class="altColor">
|
|
<td class="colFirst"><code>static java.lang.Object</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#column-java.lang.Object-java.lang.Object-">column</a></span>(java.lang.Object ds,
|
|
java.lang.Object cname)</code>
|
|
<div class="block">Return the column named <code>cname</code> else throw exception.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i1" class="rowColor">
|
|
<td class="colFirst"><code>static long</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#columnCount-java.lang.Object-">columnCount</a></span>(java.lang.Object ds)</code>
|
|
<div class="block">Return the number of columns.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i2" class="altColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#columnDef-java.lang.Object-java.lang.Object-">columnDef</a></span>(java.lang.Object name,
|
|
java.lang.Object data)</code>
|
|
<div class="block">Efficiently create a column definition explicitly specifying name and data.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i3" class="rowColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#columnDef-java.lang.Object-java.lang.Object-java.lang.Object-">columnDef</a></span>(java.lang.Object name,
|
|
java.lang.Object data,
|
|
java.lang.Object missing)</code>
|
|
<div class="block">Efficiently create a column definition explicitly specifying name, data, and missing.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i4" class="altColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#columnDef-java.lang.Object-java.lang.Object-java.lang.Object-java.lang.Object-">columnDef</a></span>(java.lang.Object name,
|
|
java.lang.Object data,
|
|
java.lang.Object missing,
|
|
java.lang.Object metadata)</code>
|
|
<div class="block">Efficiently create a column definition explicitly specifying name, data, missing, and metadata.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i5" class="rowColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#columnMap-java.lang.Object-java.lang.Object-clojure.lang.IFn-java.lang.Object-">columnMap</a></span>(java.lang.Object ds,
|
|
java.lang.Object resultCname,
|
|
clojure.lang.IFn mapFn,
|
|
java.lang.Object srcCnames)</code>
|
|
<div class="block">Map a function across 1 or more columns to produce a new column.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i6" class="altColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#concatCopying-java.lang.Object-">concatCopying</a></span>(java.lang.Object datasets)</code>
|
|
<div class="block">Concatenate an Iterable of datasets into one dataset via copying data into one dataset.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i7" class="rowColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#concatInplace-java.lang.Object-">concatInplace</a></span>(java.lang.Object datasets)</code>
|
|
<div class="block">Concatenate an Iterable of datasets into one dataset via creating virtual buffers that index into the previous datasets.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i8" class="altColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#descriptiveStats-java.lang.Object-">descriptiveStats</a></span>(java.lang.Object ds)</code>
|
|
<div class="block">Create a dataset of the descriptive statistics of the input dataset.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i9" class="rowColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#descriptiveStats-java.lang.Object-java.lang.Object-">descriptiveStats</a></span>(java.lang.Object ds,
|
|
java.lang.Object options)</code>
|
|
<div class="block">Create a dataset of the descriptive statistics of the input dataset.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i10" class="altColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#dropColumns-java.lang.Object-java.lang.Object-">dropColumns</a></span>(java.lang.Object ds,
|
|
java.lang.Object columnNames)</code>
|
|
<div class="block">Drop columns by name.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i11" class="rowColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#dropRows-java.lang.Object-java.lang.Object-">dropRows</a></span>(java.lang.Object ds,
|
|
java.lang.Object rowIndexes)</code>
|
|
<div class="block">Drop rows by index.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i12" class="altColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#filter-java.lang.Object-clojure.lang.IFn-">filter</a></span>(java.lang.Object ds,
|
|
clojure.lang.IFn predicate)</code>
|
|
<div class="block">Filter a dataset.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i13" class="rowColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#filterColumn-java.lang.Object-java.lang.Object-clojure.lang.IFn-">filterColumn</a></span>(java.lang.Object ds,
|
|
java.lang.Object cname,
|
|
clojure.lang.IFn predicate)</code>
|
|
<div class="block">Filter a dataset.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i14" class="altColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#groupBy-java.lang.Object-clojure.lang.IFn-">groupBy</a></span>(java.lang.Object ds,
|
|
clojure.lang.IFn groupFn)</code>
|
|
<div class="block">Group a dataset returning a Map of keys to dataset.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i15" class="rowColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#groupByColumn-java.lang.Object-java.lang.Object-">groupByColumn</a></span>(java.lang.Object ds,
|
|
java.lang.Object cname)</code>
|
|
<div class="block">Group a dataset by a specific column returning a Map of keys to dataset.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i16" class="altColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#head-java.lang.Object-">head</a></span>(java.lang.Object ds)</code>
|
|
<div class="block">Return the first 5 rows of the dataset</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i17" class="rowColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#head-java.lang.Object-long-">head</a></span>(java.lang.Object ds,
|
|
long nRows)</code>
|
|
<div class="block">Return the first N rows of the dataset</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i18" class="altColor">
|
|
<td class="colFirst"><code>static boolean</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#isDataset-java.lang.Object-">isDataset</a></span>(java.lang.Object ds)</code>
|
|
<div class="block">Returns true if this object is a dataset.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i19" class="rowColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#join-java.util.Map-java.util.Map-java.util.Map-">join</a></span>(java.util.Map leftDs,
|
|
java.util.Map rightDs,
|
|
java.util.Map options)</code>
|
|
<div class="block">Perform a join operation between two datasets.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i20" class="altColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#leftJoinAsof-java.lang.Object-java.util.Map-java.util.Map-">leftJoinAsof</a></span>(java.lang.Object colname,
|
|
java.util.Map lhs,
|
|
java.util.Map rhs)</code> </td>
|
|
</tr>
|
|
<tr id="i21" class="rowColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#leftJoinAsof-java.lang.Object-java.util.Map-java.util.Map-java.lang.Object-">leftJoinAsof</a></span>(java.lang.Object colname,
|
|
java.util.Map lhs,
|
|
java.util.Map rhs,
|
|
java.lang.Object options)</code>
|
|
<div class="block">Perform a left join but join on nearest value as opposed to matching value.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i22" class="altColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#makeDataset-java.lang.Object-">makeDataset</a></span>(java.lang.Object dsData)</code>
|
|
<div class="block">Make a dataset.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i23" class="rowColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#makeDataset-java.lang.Object-java.util.Map-">makeDataset</a></span>(java.lang.Object dsData,
|
|
java.util.Map options)</code>
|
|
<div class="block">Basic pathway to take data and get back a datasets.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i24" class="altColor">
|
|
<td class="colFirst"><code>static org.roaringbitmap.RoaringBitmap</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#missing-java.lang.Object-">missing</a></span>(java.lang.Object dsOrColumn)</code>
|
|
<div class="block">Return the missing set of a dataset or a column in the form of a RoaringBitmap.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i25" class="rowColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#neanderthalToDataset-java.lang.Object-">neanderthalToDataset</a></span>(java.lang.Object denseMat)</code>
|
|
<div class="block">Convert a neanderthal matrix to a dataset such that the columns of the matrix become the columns of the dataset.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i26" class="altColor">
|
|
<td class="colFirst"><code>static java.lang.Object</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#pmapDS-java.lang.Object-clojure.lang.IFn-java.lang.Object-">pmapDS</a></span>(java.lang.Object ds,
|
|
clojure.lang.IFn mapFn,
|
|
java.lang.Object options)</code>
|
|
<div class="block">Parallelize mapping a function from dataset->dataset across a dataset.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i27" class="rowColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#renameColumns-java.lang.Object-java.util.Map-">renameColumns</a></span>(java.lang.Object ds,
|
|
java.util.Map renameMap)</code>
|
|
<div class="block">Rename columns providing a map of oldname to newname.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i28" class="altColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#replaceMissing-java.lang.Object-java.lang.Object-">replaceMissing</a></span>(java.lang.Object ds,
|
|
java.lang.Object strategy)</code>
|
|
<div class="block">Replace missing values.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i29" class="rowColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#replaceMissing-java.lang.Object-java.lang.Object-java.lang.Object-">replaceMissing</a></span>(java.lang.Object ds,
|
|
java.lang.Object strategy,
|
|
java.lang.Object columns)</code>
|
|
<div class="block">Replace the missing values from a column or set of columns.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i30" class="altColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#reverseRows-java.lang.Object-">reverseRows</a></span>(java.lang.Object ds)</code>
|
|
<div class="block">Reverse the rows of the dataset</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i31" class="rowColor">
|
|
<td class="colFirst"><code>static long</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#rowCount-java.lang.Object-">rowCount</a></span>(java.lang.Object ds)</code>
|
|
<div class="block">Return the number of rows.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i32" class="altColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#rowMap-java.lang.Object-clojure.lang.IFn-">rowMap</a></span>(java.lang.Object ds,
|
|
clojure.lang.IFn mapFn)</code>
|
|
<div class="block">Map a function across the rows of the dataset with each row in map form.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i33" class="rowColor">
|
|
<td class="colFirst"><code>static java.lang.Object</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#rowMap-java.lang.Object-clojure.lang.IFn-java.lang.Object-">rowMap</a></span>(java.lang.Object ds,
|
|
clojure.lang.IFn mapFn,
|
|
java.lang.Object options)</code>
|
|
<div class="block">Map a function across the rows of the dataset with each row in map form.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i34" class="altColor">
|
|
<td class="colFirst"><code>static java.lang.Object</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#rowMapcat-java.lang.Object-clojure.lang.IFn-java.lang.Object-">rowMapcat</a></span>(java.lang.Object ds,
|
|
clojure.lang.IFn mapFn,
|
|
java.lang.Object options)</code>
|
|
<div class="block">Map a function across the rows of the dataset with each row in map form.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i35" class="rowColor">
|
|
<td class="colFirst"><code>static tech.v3.datatype.Buffer</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#rows-java.lang.Object-">rows</a></span>(java.lang.Object ds)</code>
|
|
<div class="block">Return the rows of the dataset in a flyweight map format.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i36" class="altColor">
|
|
<td class="colFirst"><code>static tech.v3.datatype.Buffer</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#rowvecs-java.lang.Object-">rowvecs</a></span>(java.lang.Object ds)</code>
|
|
<div class="block">Return the rows of the dataset where each row is just a flat Buffer of data.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i37" class="rowColor">
|
|
<td class="colFirst"><code>static tech.v3.datatype.Buffer</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#rowvecs-java.lang.Object-boolean-">rowvecs</a></span>(java.lang.Object ds,
|
|
boolean copying)</code>
|
|
<div class="block">Return the rows of the dataset where each row is just a flat Buffer of data.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i38" class="altColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#sample-java.lang.Object-">sample</a></span>(java.lang.Object ds)</code>
|
|
<div class="block">Return a random sampling of 5 rows without replacement of the data</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i39" class="rowColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#sample-java.lang.Object-long-">sample</a></span>(java.lang.Object ds,
|
|
long nRows)</code>
|
|
<div class="block">Return a random sampling of N rows without replacement of the data</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i40" class="altColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#sample-java.lang.Object-long-java.util.Map-">sample</a></span>(java.lang.Object ds,
|
|
long nRows,
|
|
java.util.Map options)</code>
|
|
<div class="block">Return a random sampling of N rows of the data.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i41" class="rowColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#select-java.lang.Object-java.lang.Object-java.lang.Object-">select</a></span>(java.lang.Object ds,
|
|
java.lang.Object columnNames,
|
|
java.lang.Object rows)</code>
|
|
<div class="block">Select a sub-rect of the dataset.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i42" class="altColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#selectColumns-java.lang.Object-java.lang.Object-">selectColumns</a></span>(java.lang.Object ds,
|
|
java.lang.Object columnNames)</code>
|
|
<div class="block">Select columns by name.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i43" class="rowColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#selectRows-java.lang.Object-java.lang.Object-">selectRows</a></span>(java.lang.Object ds,
|
|
java.lang.Object rowIndexes)</code>
|
|
<div class="block">Select rows by index.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i44" class="altColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#shuffle-java.lang.Object-">shuffle</a></span>(java.lang.Object ds)</code>
|
|
<div class="block">Randomly shuffle the dataset rows.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i45" class="rowColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#shuffle-java.lang.Object-java.util.Map-">shuffle</a></span>(java.lang.Object ds,
|
|
java.util.Map options)</code>
|
|
<div class="block">Randomly shuffle the dataset rows.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i46" class="altColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#sortBy-java.lang.Object-clojure.lang.IFn-">sortBy</a></span>(java.lang.Object ds,
|
|
clojure.lang.IFn sortFn)</code>
|
|
<div class="block">Sort a dataset.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i47" class="rowColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#sortBy-java.lang.Object-clojure.lang.IFn-java.lang.Object-">sortBy</a></span>(java.lang.Object ds,
|
|
clojure.lang.IFn sortFn,
|
|
java.lang.Object compareFn)</code>
|
|
<div class="block">Sort a dataset.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i48" class="altColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#sortBy-java.lang.Object-clojure.lang.IFn-java.lang.Object-java.lang.Object-">sortBy</a></span>(java.lang.Object ds,
|
|
clojure.lang.IFn sortFn,
|
|
java.lang.Object compareFn,
|
|
java.lang.Object options)</code>
|
|
<div class="block">Sort a dataset by first mapping <code>sortFn</code> over it and then sorting over the result.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i49" class="rowColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#sortByColumn-java.lang.Object-java.lang.Object-">sortByColumn</a></span>(java.lang.Object ds,
|
|
java.lang.Object cname)</code>
|
|
<div class="block">Sort a dataset by a specific column.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i50" class="altColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#sortByColumn-java.lang.Object-java.lang.Object-java.lang.Object-">sortByColumn</a></span>(java.lang.Object ds,
|
|
java.lang.Object cname,
|
|
java.lang.Object compareFn)</code>
|
|
<div class="block">Sort a dataset by a specific column.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i51" class="rowColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#sortByColumn-java.lang.Object-java.lang.Object-java.lang.Object-java.lang.Object-">sortByColumn</a></span>(java.lang.Object ds,
|
|
java.lang.Object cname,
|
|
java.lang.Object compareFn,
|
|
java.lang.Object options)</code>
|
|
<div class="block">Sort a dataset by using the values from column <code>cname</code>.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i52" class="altColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#tail-java.lang.Object-">tail</a></span>(java.lang.Object ds)</code>
|
|
<div class="block">Return the last 5 rows of the dataset</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i53" class="rowColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#tail-java.lang.Object-long-">tail</a></span>(java.lang.Object ds,
|
|
long nRows)</code>
|
|
<div class="block">Return the last N rows of the dataset</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i54" class="altColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#tensorToDataset-java.lang.Object-">tensorToDataset</a></span>(java.lang.Object tens)</code>
|
|
<div class="block">Convert a tensor to a dataset such that the columns of the tensor become the columns of the dataset named after their index.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i55" class="rowColor">
|
|
<td class="colFirst"><code>static java.lang.Object</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#toNeanderthal-java.lang.Object-">toNeanderthal</a></span>(java.lang.Object ds)</code>
|
|
<div class="block">Convert a dataset to a neanderthal 2D matrix such that the columns of the dataset become the columns of the matrix.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i56" class="altColor">
|
|
<td class="colFirst"><code>static java.lang.Object</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#toNeanderthal-java.lang.Object-clojure.lang.Keyword-clojure.lang.Keyword-">toNeanderthal</a></span>(java.lang.Object ds,
|
|
clojure.lang.Keyword layout,
|
|
clojure.lang.Keyword datatype)</code>
|
|
<div class="block">Convert a dataset to a neanderthal 2D matrix such that the columns of the dataset become the columns of the matrix.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i57" class="rowColor">
|
|
<td class="colFirst"><code>static tech.v3.datatype.NDBuffer</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#toTensor-java.lang.Object-">toTensor</a></span>(java.lang.Object ds)</code>
|
|
<div class="block">Convert a dataset to a jvm-heap based 2D double (float64) tensor.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i58" class="altColor">
|
|
<td class="colFirst"><code>static tech.v3.datatype.NDBuffer</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#toTensor-java.lang.Object-clojure.lang.Keyword-">toTensor</a></span>(java.lang.Object ds,
|
|
clojure.lang.Keyword datatype)</code>
|
|
<div class="block">Convert a dataset to a jvm-heap based 2D tensor such that the columns of the dataset become the columns of the tensor.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i59" class="rowColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#uniqueBy-java.lang.Object-clojure.lang.IFn-">uniqueBy</a></span>(java.lang.Object ds,
|
|
clojure.lang.IFn uniqueFn)</code>
|
|
<div class="block">Create a dataset with no duplicates by taking first of duplicate values.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i60" class="altColor">
|
|
<td class="colFirst"><code>static java.util.Map</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#uniqueByColumn-java.lang.Object-java.lang.Object-">uniqueByColumn</a></span>(java.lang.Object ds,
|
|
java.lang.Object cname)</code>
|
|
<div class="block">Make a dataset unique using a particular column as the uniqueness criteria and taking the first value.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i61" class="rowColor">
|
|
<td class="colFirst"><code>static void</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#writeDataset-java.lang.Object-java.lang.String-">writeDataset</a></span>(java.lang.Object ds,
|
|
java.lang.String path)</code>
|
|
<div class="block">Write a dataset to disc as csv, tsv, csv.gz, tsv.gz or nippy.</div>
|
|
</td>
|
|
</tr>
|
|
<tr id="i62" class="altColor">
|
|
<td class="colFirst"><code>static void</code></td>
|
|
<td class="colLast"><code><span class="memberNameLink"><a href="../../tech/v3/TMD.html#writeDataset-java.lang.Object-java.lang.String-java.lang.Object-">writeDataset</a></span>(java.lang.Object ds,
|
|
java.lang.String path,
|
|
java.lang.Object options)</code>
|
|
<div class="block">Write a dataset to disc as csv, tsv, csv.gz, tsv.gz, json, json.gz or nippy.</div>
|
|
</td>
|
|
</tr>
|
|
</table>
|
|
<ul class="blockList">
|
|
<li class="blockList"><a name="methods.inherited.from.class.java.lang.Object">
|
|
<!-- -->
|
|
</a>
|
|
<h3>Methods inherited from class java.lang.Object</h3>
|
|
<code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
<div class="details">
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<!-- ============ METHOD DETAIL ========== -->
|
|
<ul class="blockList">
|
|
<li class="blockList"><a name="method.detail">
|
|
<!-- -->
|
|
</a>
|
|
<h3>Method Detail</h3>
|
|
<a name="makeDataset-java.lang.Object-java.util.Map-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>makeDataset</h4>
|
|
<pre>public static java.util.Map makeDataset(java.lang.Object dsData,
|
|
java.util.Map options)</pre>
|
|
<div class="block"><p>Basic pathway to take data and get back a datasets. If dsData is a string a built in system can parse csv, tsv, csv.gz, tsv.gz, .json, json.gz and .nippy format files. Specific other formats such as xlsx, apache arrow and parquet formats are provided in other classes.</p>
|
|
<p>Aside from string data formats, you can explicitly provide either a sequence of maps or a map of columns with the map of columns being by far more the most efficient. In the map-of-columns approach arrays of primitive numeric data and native buffers will be used in-place.</p>
|
|
<p>The options for parsing a dataset are extensive and documented at <a href="https://techascent.github.io/tech.ml.dataset/tech.v3.dataset.html#var--.3Edataset">->dataset</a>.</p>
|
|
<p>Example:</p>
|
|
<pre><code class="java"> Map ds = makeDataset("https://github.com/techascent/tech.ml.dataset/raw/master/test/data/stocks.csv");
|
|
tech.v3.Clj.println(head(ds));
|
|
// https://github.com/techascent/tech.ml.dataset/raw/master/test/data/stocks.csv [5 3]:
|
|
// | symbol | date | price |
|
|
// |--------|------------|------:|
|
|
// | MSFT | 2000-01-01 | 39.81 |
|
|
// | MSFT | 2000-02-01 | 36.35 |
|
|
// | MSFT | 2000-03-01 | 43.22 |
|
|
// | MSFT | 2000-04-01 | 28.37 |
|
|
// | MSFT | 2000-05-01 | 25.45 |
|
|
Map colmapDs = makeDataset(hashmap(kw("a"), range(10),
|
|
kw("b"), toDoubleArray(range(9,-1,-1))),
|
|
hashmap(kw("dataset-name"), "testds"));
|
|
println(colmapDs);
|
|
// testds [10 2]:
|
|
|
|
// | :b | :a |
|
|
// |----:|---:|
|
|
// | 9.0 | 0 |
|
|
// | 8.0 | 1 |
|
|
// | 7.0 | 2 |
|
|
// | 6.0 | 3 |
|
|
// | 5.0 | 4 |
|
|
// | 4.0 | 5 |
|
|
// | 3.0 | 6 |
|
|
// | 2.0 | 7 |
|
|
// | 1.0 | 8 |
|
|
// | 0.0 | 9 |
|
|
</code></pre></div>
|
|
</li>
|
|
</ul>
|
|
<a name="makeDataset-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>makeDataset</h4>
|
|
<pre>public static java.util.Map makeDataset(java.lang.Object dsData)</pre>
|
|
<div class="block"><p>Make a dataset. See 2-arity form of function.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="isDataset-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>isDataset</h4>
|
|
<pre>public static boolean isDataset(java.lang.Object ds)</pre>
|
|
<div class="block"><p>Returns true if this object is a dataset.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="rowCount-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>rowCount</h4>
|
|
<pre>public static long rowCount(java.lang.Object ds)</pre>
|
|
<div class="block"><p>Return the number of rows.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="columnCount-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>columnCount</h4>
|
|
<pre>public static long columnCount(java.lang.Object ds)</pre>
|
|
<div class="block"><p>Return the number of columns.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="column-java.lang.Object-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>column</h4>
|
|
<pre>public static java.lang.Object column(java.lang.Object ds,
|
|
java.lang.Object cname)</pre>
|
|
<div class="block"><p>Return the column named <code>cname</code> else throw exception.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="columnDef-java.lang.Object-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>columnDef</h4>
|
|
<pre>public static java.util.Map columnDef(java.lang.Object name,
|
|
java.lang.Object data)</pre>
|
|
<div class="block"><p>Efficiently create a column definition explicitly specifying name and data. Typed data will be scanned for missing values and untyped data will be read element by element to discern datatype and missing information. The result can be <code>assoc</code>d back into the dataset.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="columnDef-java.lang.Object-java.lang.Object-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>columnDef</h4>
|
|
<pre>public static java.util.Map columnDef(java.lang.Object name,
|
|
java.lang.Object data,
|
|
java.lang.Object missing)</pre>
|
|
<div class="block"><p>Efficiently create a column definition explicitly specifying name, data, and missing. The result can be <code>assoc</code>d back into the dataset. Missing will be converted to a RoaringBitmap but can additionally be an integer array, a java set, or a sequence of integers.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="columnDef-java.lang.Object-java.lang.Object-java.lang.Object-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>columnDef</h4>
|
|
<pre>public static java.util.Map columnDef(java.lang.Object name,
|
|
java.lang.Object data,
|
|
java.lang.Object missing,
|
|
java.lang.Object metadata)</pre>
|
|
<div class="block"><p>Efficiently create a column definition explicitly specifying name, data, missing, and metadata. The result can be <code>assoc</code>d back into the dataset and saves the system the time required to scan for missing elements. Missing will be converted to a RoaringBitmap but can additionally be an integer array, a java set, or a sequence of integers.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="select-java.lang.Object-java.lang.Object-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>select</h4>
|
|
<pre>public static java.util.Map select(java.lang.Object ds,
|
|
java.lang.Object columnNames,
|
|
java.lang.Object rows)</pre>
|
|
<div class="block"><p>Select a sub-rect of the dataset. Dataset names is a sequence of column names that must exist in the dataset. Rows is a sequence, list, array, or bitmap of integer row indexes to select. Dataset returned has column in the order specified by <code>columnNames</code>.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="selectColumns-java.lang.Object-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>selectColumns</h4>
|
|
<pre>public static java.util.Map selectColumns(java.lang.Object ds,
|
|
java.lang.Object columnNames)</pre>
|
|
<div class="block"><p>Select columns by name. All names must exist in the dataset.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="dropColumns-java.lang.Object-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>dropColumns</h4>
|
|
<pre>public static java.util.Map dropColumns(java.lang.Object ds,
|
|
java.lang.Object columnNames)</pre>
|
|
<div class="block"><p>Drop columns by name. All names must exist in the dataset. Another option is to use the Clojure function <code>dissoc</code>.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="renameColumns-java.lang.Object-java.util.Map-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>renameColumns</h4>
|
|
<pre>public static java.util.Map renameColumns(java.lang.Object ds,
|
|
java.util.Map renameMap)</pre>
|
|
<div class="block"><p>Rename columns providing a map of oldname to newname.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="selectRows-java.lang.Object-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>selectRows</h4>
|
|
<pre>public static java.util.Map selectRows(java.lang.Object ds,
|
|
java.lang.Object rowIndexes)</pre>
|
|
<div class="block"><p>Select rows by index.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="dropRows-java.lang.Object-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>dropRows</h4>
|
|
<pre>public static java.util.Map dropRows(java.lang.Object ds,
|
|
java.lang.Object rowIndexes)</pre>
|
|
<div class="block"><p>Drop rows by index.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="missing-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>missing</h4>
|
|
<pre>public static org.roaringbitmap.RoaringBitmap missing(java.lang.Object dsOrColumn)</pre>
|
|
<div class="block"><p>Return the missing set of a dataset or a column in the form of a RoaringBitmap.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="replaceMissing-java.lang.Object-java.lang.Object-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>replaceMissing</h4>
|
|
<pre>public static java.util.Map replaceMissing(java.lang.Object ds,
|
|
java.lang.Object strategy,
|
|
java.lang.Object columns)</pre>
|
|
<div class="block"><p>Replace the missing values from a column or set of columns. To replace across all columns use the keyword :all.</p>
|
|
<p>Strategy can be:</p>
|
|
<ul>
|
|
<li><code>:up</code> - take next value</li>
|
|
<li><code>:down</code> - take previous value</li>
|
|
<li><code>:lerp</code> - linearly interpolate across values. Datetime objects will have interpolation in done in millisecond space.</li>
|
|
<li><code>vector(:value, val)</code> - Provide this value explicity to replace entries.</li>
|
|
<li><code>:nearest</code> - use the nearest value.</li>
|
|
<li><code>:midpoint</code> - use the mean of the range.</li>
|
|
<li><code>:abb</code> - impute missing values using approximate bayesian bootstrap.</li>
|
|
</ul>
|
|
<p>Further documentation is located at <a href="https://techascent.github.io/tech.ml.dataset/tech.v3.dataset.html#var-replace-missing">replace-missing</a>.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="replaceMissing-java.lang.Object-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>replaceMissing</h4>
|
|
<pre>public static java.util.Map replaceMissing(java.lang.Object ds,
|
|
java.lang.Object strategy)</pre>
|
|
<div class="block"><p>Replace missing values. See 3-arity form of function for documentation.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="rows-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>rows</h4>
|
|
<pre>public static tech.v3.datatype.Buffer rows(java.lang.Object ds)</pre>
|
|
<div class="block"><p>Return the rows of the dataset in a flyweight map format. Maps share keys and read their data lazily from the base dataset.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="rowvecs-java.lang.Object-boolean-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>rowvecs</h4>
|
|
<pre>public static tech.v3.datatype.Buffer rowvecs(java.lang.Object ds,
|
|
boolean copying)</pre>
|
|
<div class="block"><p>Return the rows of the dataset where each row is just a flat Buffer of data.</p>
|
|
<p>When copying is true data is copied upon each access from the underlying dataset. This makes doing something like using each row as the key in a map more efficient.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="rowvecs-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>rowvecs</h4>
|
|
<pre>public static tech.v3.datatype.Buffer rowvecs(java.lang.Object ds)</pre>
|
|
<div class="block"><p>Return the rows of the dataset where each row is just a flat Buffer of data.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="head-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>head</h4>
|
|
<pre>public static java.util.Map head(java.lang.Object ds)</pre>
|
|
<div class="block"><p>Return the first 5 rows of the dataset</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="head-java.lang.Object-long-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>head</h4>
|
|
<pre>public static java.util.Map head(java.lang.Object ds,
|
|
long nRows)</pre>
|
|
<div class="block"><p>Return the first N rows of the dataset</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="tail-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>tail</h4>
|
|
<pre>public static java.util.Map tail(java.lang.Object ds)</pre>
|
|
<div class="block"><p>Return the last 5 rows of the dataset</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="tail-java.lang.Object-long-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>tail</h4>
|
|
<pre>public static java.util.Map tail(java.lang.Object ds,
|
|
long nRows)</pre>
|
|
<div class="block"><p>Return the last N rows of the dataset</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="sample-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>sample</h4>
|
|
<pre>public static java.util.Map sample(java.lang.Object ds)</pre>
|
|
<div class="block"><p>Return a random sampling of 5 rows without replacement of the data</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="sample-java.lang.Object-long-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>sample</h4>
|
|
<pre>public static java.util.Map sample(java.lang.Object ds,
|
|
long nRows)</pre>
|
|
<div class="block"><p>Return a random sampling of N rows without replacement of the data</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="sample-java.lang.Object-long-java.util.Map-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>sample</h4>
|
|
<pre>public static java.util.Map sample(java.lang.Object ds,
|
|
long nRows,
|
|
java.util.Map options)</pre>
|
|
<div class="block"><p>Return a random sampling of N rows of the data.</p>
|
|
<p>Options:</p>
|
|
<ul>
|
|
<li><code>:replacement?</code> - Do sampling with replacement. Defaults to false.</li>
|
|
<li><code>:seed</code> - Either an integer or an implementation of java.util.Random.</li>
|
|
</ul></div>
|
|
</li>
|
|
</ul>
|
|
<a name="shuffle-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>shuffle</h4>
|
|
<pre>public static java.util.Map shuffle(java.lang.Object ds)</pre>
|
|
<div class="block"><p>Randomly shuffle the dataset rows.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="shuffle-java.lang.Object-java.util.Map-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>shuffle</h4>
|
|
<pre>public static java.util.Map shuffle(java.lang.Object ds,
|
|
java.util.Map options)</pre>
|
|
<div class="block"><p>Randomly shuffle the dataset rows.</p>
|
|
<p>Options:</p>
|
|
<ul>
|
|
<li><code>:seed</code> - Either an integer or an implementation of java.util.Random.</li>
|
|
</ul></div>
|
|
</li>
|
|
</ul>
|
|
<a name="reverseRows-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>reverseRows</h4>
|
|
<pre>public static java.util.Map reverseRows(java.lang.Object ds)</pre>
|
|
<div class="block"><p>Reverse the rows of the dataset</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="columnMap-java.lang.Object-java.lang.Object-clojure.lang.IFn-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>columnMap</h4>
|
|
<pre>public static java.util.Map columnMap(java.lang.Object ds,
|
|
java.lang.Object resultCname,
|
|
clojure.lang.IFn mapFn,
|
|
java.lang.Object srcCnames)</pre>
|
|
<div class="block"><p>Map a function across 1 or more columns to produce a new column. The new column is serially scanned to detect datatype and its missing set.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="rowMap-java.lang.Object-clojure.lang.IFn-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>rowMap</h4>
|
|
<pre>public static java.util.Map rowMap(java.lang.Object ds,
|
|
clojure.lang.IFn mapFn)</pre>
|
|
<div class="block"><p>Map a function across the rows of the dataset with each row in map form. Function must return a new map for each row. Result is generated in parallel so, when used with a map factory, this is a suprisingly efficient strategy to create multiple columns at once from each row.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="rowMap-java.lang.Object-clojure.lang.IFn-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>rowMap</h4>
|
|
<pre>public static java.lang.Object rowMap(java.lang.Object ds,
|
|
clojure.lang.IFn mapFn,
|
|
java.lang.Object options)</pre>
|
|
<div class="block"><p>Map a function across the rows of the dataset with each row in map form. Function must return a new map for each row. Result is generated in parallel so, when used with a map factory, this is a suprisingly efficient strategy to create multiple columns at once from each row.</p>
|
|
<p>See options for pmapDs. Especially note <code>:max-batch-size</code> and <code>:result-type</code>. In order to conserve memory it may be much more efficient to return a sequence of datasets rather than one large dataset. If returning sequences of datasets perhaps consider a transducing pathway across them or the tech.v3.dataset.reductions namespace.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="rowMapcat-java.lang.Object-clojure.lang.IFn-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>rowMapcat</h4>
|
|
<pre>public static java.lang.Object rowMapcat(java.lang.Object ds,
|
|
clojure.lang.IFn mapFn,
|
|
java.lang.Object options)</pre>
|
|
<div class="block"><p>Map a function across the rows of the dataset with each row in map form. Function must return either null or a sequence of maps and thus can produce many new rows for each input row. Function is called in a parallelized context. Maps returned must be an implementation of clojure’s IPersistentMap. See <a href="https://cnuernber.github.io/dtype-next/javadoc/tech/v3/DType.html#mapFactory-java.util.List-">tech.v3.Clj.mapFactory</a> for an efficient way to create those in bulk.</p>
|
|
<p>See options for pmapDs. Especially note <code>:max-batch-size</code> and <code>:result-type</code>. In order to conserve memory it may be much more efficient to return a sequence of datasets rather than one large dataset. If returning sequences of datasets perhaps consider a transducing pathway across them or the tech.v3.dataset.reductions namespace.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="pmapDS-java.lang.Object-clojure.lang.IFn-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>pmapDS</h4>
|
|
<pre>public static java.lang.Object pmapDS(java.lang.Object ds,
|
|
clojure.lang.IFn mapFn,
|
|
java.lang.Object options)</pre>
|
|
<div class="block"><p>Parallelize mapping a function from dataset->dataset across a dataset. Function may return null. The original dataset is simply sliced into n-core results and map-fn is called n-core times with the results either concatenated into a new dataset or returned as an Iterable.</p>
|
|
<p>Most of the functions of the dataset (filter, sort, groupBy) will auto-parallelize but but there are many times where the most efficient use of machine resources is to parallelize a the outermost level. The parallelization primitives check and run in serial mode of the current thread is already in a parallelization pathway.</p></div>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>mapFn</code> - a function from dataset->dataset although it may return null.</p>
|
|
<p>Options:</p>
|
|
<ul>
|
|
<li><code>:max-batch-size</code> - Defaults to 64000. This controls the size of each parallelized chunk.</li>
|
|
<li><code>:result-type</code> - Either <code>:as-seq</code> in which case the output of this function is a sequence of datasets or <code>:as-ds</code> in which case the output is a single dataset. The default is <code>:as-ds</code>.</li>
|
|
</ul></dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a name="sortBy-java.lang.Object-clojure.lang.IFn-java.lang.Object-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>sortBy</h4>
|
|
<pre>public static java.util.Map sortBy(java.lang.Object ds,
|
|
clojure.lang.IFn sortFn,
|
|
java.lang.Object compareFn,
|
|
java.lang.Object options)</pre>
|
|
<div class="block"><p>Sort a dataset by first mapping <code>sortFn</code> over it and then sorting over the result. <code>sortFn</code> is passed each row in map form and the return value is used to sort the dataset.</p></div>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>sortFn</code> - function taking a single argument which is the row-map and returns the value to sort on.</dd>
|
|
<dd><code>compareFn</code> - Comparison operator or comparator. Some examples are the Clojure ‘<’ or ‘>’ operators - tech.v3.Clj.lessThanFn, tech.v3.Clj.greaterThanFn. The clojure keywords <code>:tech.numerics/<</code> and <code>:tech.numerics/></code> can be used for somewhat higher performance unboxed primitive comparisons or the Clojure function <code>compare</code> - tech.v3.Clj.compareFn - which is similar to .compareTo except it works with null and the input must implement Comparable. Finally you can instantiate an instance of java.util.Comparator.</p>
|
|
<p>Options:</p>
|
|
<ul>
|
|
<li><code>:nan-strategy</code> - General missing strategy. Options are <code>:first</code>, <code>:last</code>, and <code>:exception</code>.</li>
|
|
<li><code>:parallel?</code> - Uses parallel quicksort when true and regular quicksort when false.</li>
|
|
</ul></dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a name="sortBy-java.lang.Object-clojure.lang.IFn-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>sortBy</h4>
|
|
<pre>public static java.util.Map sortBy(java.lang.Object ds,
|
|
clojure.lang.IFn sortFn,
|
|
java.lang.Object compareFn)</pre>
|
|
<div class="block"><p>Sort a dataset. See documentation of 4-arity version.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="sortBy-java.lang.Object-clojure.lang.IFn-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>sortBy</h4>
|
|
<pre>public static java.util.Map sortBy(java.lang.Object ds,
|
|
clojure.lang.IFn sortFn)</pre>
|
|
<div class="block"><p>Sort a dataset. See documentation of 4-arity version.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="sortByColumn-java.lang.Object-java.lang.Object-java.lang.Object-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>sortByColumn</h4>
|
|
<pre>public static java.util.Map sortByColumn(java.lang.Object ds,
|
|
java.lang.Object cname,
|
|
java.lang.Object compareFn,
|
|
java.lang.Object options)</pre>
|
|
<div class="block"><p>Sort a dataset by using the values from column <code>cname</code>. to sort on.</p></div>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>compareFn</code> - Comparison operator or comparator. Some examples are the Clojure ‘<’ or ‘>’ operators - tech.v3.Clj.lessThanFn, tech.v3.Clj.greaterThanFn. The clojure keywords <code>:tech.numerics/<</code> and <code>:tech.numerics/></code> can be used for somewhat higher performance unboxed primitive comparisons or the Clojure function <code>compare</code> - tech.v3.Clj.compareFn - which is similar to .compareTo except it works with null and the input must implement Comparable. Finally you can instantiate an instance of java.util.Comparator.</p>
|
|
<p>Options:</p>
|
|
<ul>
|
|
<li><code>:nan-strategy</code> - General missing strategy. Options are <code>:first</code>, <code>:last</code>, and <code>:exception</code>.</li>
|
|
<li><code>:parallel?</code> - Uses parallel quicksort when true and regular quicksort when false.</li>
|
|
</ul></dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a name="sortByColumn-java.lang.Object-java.lang.Object-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>sortByColumn</h4>
|
|
<pre>public static java.util.Map sortByColumn(java.lang.Object ds,
|
|
java.lang.Object cname,
|
|
java.lang.Object compareFn)</pre>
|
|
<div class="block"><p>Sort a dataset by a specific column. See documentation on 4-arity version.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="sortByColumn-java.lang.Object-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>sortByColumn</h4>
|
|
<pre>public static java.util.Map sortByColumn(java.lang.Object ds,
|
|
java.lang.Object cname)</pre>
|
|
<div class="block"><p>Sort a dataset by a specific column. See documentation on 4-arity version.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="filter-java.lang.Object-clojure.lang.IFn-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>filter</h4>
|
|
<pre>public static java.util.Map filter(java.lang.Object ds,
|
|
clojure.lang.IFn predicate)</pre>
|
|
<div class="block"><p>Filter a dataset. Predicate gets passed all rows and must return a <code>truthy</code> values.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="filterColumn-java.lang.Object-java.lang.Object-clojure.lang.IFn-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>filterColumn</h4>
|
|
<pre>public static java.util.Map filterColumn(java.lang.Object ds,
|
|
java.lang.Object cname,
|
|
clojure.lang.IFn predicate)</pre>
|
|
<div class="block"><p>Filter a dataset. Predicate gets passed a values from column cname and must return a <code>truthy</code> values.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="groupBy-java.lang.Object-clojure.lang.IFn-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>groupBy</h4>
|
|
<pre>public static java.util.Map groupBy(java.lang.Object ds,
|
|
clojure.lang.IFn groupFn)</pre>
|
|
<div class="block"><p>Group a dataset returning a Map of keys to dataset.</p></div>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>groupFn</code> - Gets passed each row in map format and must return the desired key.</dd>
|
|
<dt><span class="returnLabel">Returns:</span></dt>
|
|
<dd>a map of key to dataset.</dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a name="groupByColumn-java.lang.Object-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>groupByColumn</h4>
|
|
<pre>public static java.util.Map groupByColumn(java.lang.Object ds,
|
|
java.lang.Object cname)</pre>
|
|
<div class="block"><p>Group a dataset by a specific column returning a Map of keys to dataset.</p></div>
|
|
<dl>
|
|
<dt><span class="returnLabel">Returns:</span></dt>
|
|
<dd>a map of key to dataset.</dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a name="concatCopying-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>concatCopying</h4>
|
|
<pre>public static java.util.Map concatCopying(java.lang.Object datasets)</pre>
|
|
<div class="block"><p>Concatenate an Iterable of datasets into one dataset via copying data into one dataset. This generally results in higher performance than an in-place concatenation with the exception of small (< 3) numbers of datasets. Null datasets will be silently ignored.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="concatInplace-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>concatInplace</h4>
|
|
<pre>public static java.util.Map concatInplace(java.lang.Object datasets)</pre>
|
|
<div class="block"><p>Concatenate an Iterable of datasets into one dataset via creating virtual buffers that index into the previous datasets. This generally results in lower performance than a copying concatenation with the exception of small (< 3) numbers of datasets. Null datasets will be silently ignored.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="uniqueBy-java.lang.Object-clojure.lang.IFn-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>uniqueBy</h4>
|
|
<pre>public static java.util.Map uniqueBy(java.lang.Object ds,
|
|
clojure.lang.IFn uniqueFn)</pre>
|
|
<div class="block"><p>Create a dataset with no duplicates by taking first of duplicate values.</p></div>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>uniqueFn</code> - is passed a row and must return the uniqueness criteria. A uniqueFn is the identity function.</dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a name="uniqueByColumn-java.lang.Object-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>uniqueByColumn</h4>
|
|
<pre>public static java.util.Map uniqueByColumn(java.lang.Object ds,
|
|
java.lang.Object cname)</pre>
|
|
<div class="block"><p>Make a dataset unique using a particular column as the uniqueness criteria and taking the first value.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="descriptiveStats-java.lang.Object-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>descriptiveStats</h4>
|
|
<pre>public static java.util.Map descriptiveStats(java.lang.Object ds,
|
|
java.lang.Object options)</pre>
|
|
<div class="block"><p>Create a dataset of the descriptive statistics of the input dataset. This works with date-time columns, missing values, etc. and serves as very fast way to quickly get a feel for a dataset.</p>
|
|
<p>Options:</p>
|
|
<ul>
|
|
<li><code>:stat-names</code> - A set of desired stat names. Possible statistic operations are: <code>[:col-name :datatype :n-valid :n-missing :min :quartile-1 :mean :mode :median
|
|
:quartile-3 :max :standard-deviation :skew :n-values :values :histogram :first
|
|
:last]</code></li>
|
|
</ul></div>
|
|
</li>
|
|
</ul>
|
|
<a name="descriptiveStats-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>descriptiveStats</h4>
|
|
<pre>public static java.util.Map descriptiveStats(java.lang.Object ds)</pre>
|
|
<div class="block"><p>Create a dataset of the descriptive statistics of the input dataset. This works with date-time columns, missing values, etc. and serves as very fast way to quickly get a feel for a dataset.</p>
|
|
<p>Options:</p>
|
|
<ul>
|
|
<li><code>:stat-names</code> - A set of desired stat names. Possible statistic operations are: <code>[:col-name :datatype :n-valid :n-missing :min :quartile-1 :mean :mode :median
|
|
:quartile-3 :max :standard-deviation :skew :n-values :values :histogram :first
|
|
:last]</code></li>
|
|
</ul></div>
|
|
</li>
|
|
</ul>
|
|
<a name="join-java.util.Map-java.util.Map-java.util.Map-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>join</h4>
|
|
<pre>public static java.util.Map join(java.util.Map leftDs,
|
|
java.util.Map rightDs,
|
|
java.util.Map options)</pre>
|
|
<div class="block"><p>Perform a join operation between two datasets.</p>
|
|
<p>Options:</p>
|
|
<ul>
|
|
<li><code>:on</code> - column name or list of columns names. Names must be found in both datasets.</li>
|
|
<li><code>:left-on</code> - Column name or list of column names</li>
|
|
<li><code>:right-on</code> - Column name or list of column names</li>
|
|
<li><code>:how</code> - <code>:left</code>, <code>:right</code> <code>:inner</code>, <code>:outer</code>, <code>:cross</code>. If <code>:cross</code>, then it is an error to provide <code>:on</code>, <code>:left-on</code>, <code>:right-on</code>. Defaults to <code>:inner</code>.</li>
|
|
</ul>
|
|
<p>Examples:</p>
|
|
<pre><code class="java">Map dsa = makeDataset(hashmap("a", vector("a", "b", "b", "a", "c"),
|
|
"b", range(5),
|
|
"c", range(5)));
|
|
println(dsa);
|
|
//_unnamed [5 3]:
|
|
|
|
//| a | b | c |
|
|
//|---|--:|--:|
|
|
//| a | 0 | 0 |
|
|
//| b | 1 | 1 |
|
|
//| b | 2 | 2 |
|
|
//| a | 3 | 3 |
|
|
//| c | 4 | 4 |
|
|
|
|
|
|
Map dsb = makeDataset(hashmap("a", vector("a", "b", "a", "b", "d"),
|
|
"b", range(5),
|
|
"c", range(6,11)));
|
|
println(dsb);
|
|
//_unnamed [5 3]:
|
|
|
|
//| a | b | c |
|
|
//|---|--:|---:|
|
|
//| a | 0 | 6 |
|
|
//| b | 1 | 7 |
|
|
//| a | 2 | 8 |
|
|
//| b | 3 | 9 |
|
|
//| d | 4 | 10 |
|
|
|
|
//Join on the columns a,b. Default join mode is inner
|
|
println(join(dsa, dsb, hashmap(kw("on"), vector("a", "b"))));
|
|
//inner-join [2 4]:
|
|
|
|
//| a | b | c | right.c |
|
|
//|---|--:|--:|--------:|
|
|
//| a | 0 | 0 | 6 |
|
|
//| b | 1 | 1 | 7 |
|
|
|
|
|
|
//Outer join on same columns
|
|
println(join(dsa, dsb, hashmap(kw("on"), vector("a", "b"),
|
|
kw("how"), kw("outer"))));
|
|
//outer-join [8 4]:
|
|
|
|
//| a | b | c | right.c |
|
|
//|---|--:|--:|--------:|
|
|
//| a | 0 | 0 | 6 |
|
|
//| b | 1 | 1 | 7 |
|
|
//| b | 2 | 2 | |
|
|
//| a | 3 | 3 | |
|
|
//| c | 4 | 4 | |
|
|
//| a | 2 | | 8 |
|
|
//| b | 3 | | 9 |
|
|
//| d | 4 | | 10 |
|
|
</code></pre></div>
|
|
</li>
|
|
</ul>
|
|
<a name="leftJoinAsof-java.lang.Object-java.util.Map-java.util.Map-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>leftJoinAsof</h4>
|
|
<pre>public static java.util.Map leftJoinAsof(java.lang.Object colname,
|
|
java.util.Map lhs,
|
|
java.util.Map rhs,
|
|
java.lang.Object options)</pre>
|
|
<div class="block"><p>Perform a left join but join on nearest value as opposed to matching value. Both datasets must be sorted by the join column and the join column itself must be either a datetime column or a numeric column. When the join column is a datetime column the join happens in millisecond space.</p>
|
|
<p>Options:</p>
|
|
<ul>
|
|
<li><code>:asof-op</code> - One of the keywords <code>[:< :<= :nearest :>= :>]</code>. Defaults to <code>:<=</code>.</li>
|
|
</ul>
|
|
<p>Examples:</p>
|
|
<pre><code class="java">println(head(googPrices, 200));
|
|
//GOOG [68 3]:
|
|
//| symbol | date | price |
|
|
//|--------|------------|-------:|
|
|
//| GOOG | 2004-08-01 | 102.37 |
|
|
//| GOOG | 2004-09-01 | 129.60 |
|
|
//| GOOG | 2005-03-01 | 180.51 |
|
|
//| GOOG | 2004-11-01 | 181.98 |
|
|
//| GOOG | 2005-02-01 | 187.99 |
|
|
//| GOOG | 2004-10-01 | 190.64 |
|
|
//| GOOG | 2004-12-01 | 192.79 |
|
|
//| GOOG | 2005-01-01 | 195.62 |
|
|
//| GOOG | 2005-04-01 | 220.00 |
|
|
//| GOOG | 2005-05-01 | 277.27 |
|
|
//| GOOG | 2005-08-01 | 286.00 |
|
|
//| GOOG | 2005-07-01 | 287.76 |
|
|
//| GOOG | 2008-11-01 | 292.96 |
|
|
//| GOOG | 2005-06-01 | 294.15 |
|
|
//| GOOG | 2008-12-01 | 307.65 |
|
|
//| GOOG | 2005-09-01 | 316.46 |
|
|
//| GOOG | 2009-02-01 | 337.99 |
|
|
//| GOOG | 2009-01-01 | 338.53 |
|
|
//| GOOG | 2009-03-01 | 348.06 |
|
|
//| GOOG | 2008-10-01 | 359.36 |
|
|
//| GOOG | 2006-02-01 | 362.62 |
|
|
//| GOOG | 2006-05-01 | 371.82 |
|
|
//| GOOG | 2005-10-01 | 372.14 |
|
|
//| GOOG | 2006-08-01 | 378.53 |
|
|
//| GOOG | 2006-07-01 | 386.60 |
|
|
//| GOOG | 2006-03-01 | 390.00 |
|
|
//| GOOG | 2009-04-01 | 395.97 |
|
|
//| GOOG | 2008-09-01 | 400.52 |
|
|
//| GOOG | 2006-09-01 | 401.90 |
|
|
//| GOOG | 2005-11-01 | 404.91 |
|
|
//| GOOG | 2005-12-01 | 414.86 |
|
|
//| GOOG | 2009-05-01 | 417.23 |
|
|
//| GOOG | 2006-04-01 | 417.94 |
|
|
//| GOOG | 2006-06-01 | 419.33 |
|
|
//| GOOG | 2009-06-01 | 421.59 |
|
|
//| GOOG | 2006-01-01 | 432.66 |
|
|
//| GOOG | 2008-03-01 | 440.47 |
|
|
//| GOOG | 2009-07-01 | 443.05 |
|
|
//| GOOG | 2007-02-01 | 449.45 |
|
|
//| GOOG | 2007-03-01 | 458.16 |
|
|
//| GOOG | 2006-12-01 | 460.48 |
|
|
//| GOOG | 2009-08-01 | 461.67 |
|
|
//| GOOG | 2008-08-01 | 463.29 |
|
|
//| GOOG | 2008-02-01 | 471.18 |
|
|
//| GOOG | 2007-04-01 | 471.38 |
|
|
//| GOOG | 2008-07-01 | 473.75 |
|
|
//| GOOG | 2006-10-01 | 476.39 |
|
|
//| GOOG | 2006-11-01 | 484.81 |
|
|
//| GOOG | 2009-09-01 | 495.85 |
|
|
//| GOOG | 2007-05-01 | 497.91 |
|
|
//| GOOG | 2007-01-01 | 501.50 |
|
|
//| GOOG | 2007-07-01 | 510.00 |
|
|
//| GOOG | 2007-08-01 | 515.25 |
|
|
//| GOOG | 2007-06-01 | 522.70 |
|
|
//| GOOG | 2008-06-01 | 526.42 |
|
|
//| GOOG | 2010-02-01 | 526.80 |
|
|
//| GOOG | 2010-01-01 | 529.94 |
|
|
//| GOOG | 2009-10-01 | 536.12 |
|
|
//| GOOG | 2010-03-01 | 560.19 |
|
|
//| GOOG | 2008-01-01 | 564.30 |
|
|
//| GOOG | 2007-09-01 | 567.27 |
|
|
//| GOOG | 2008-04-01 | 574.29 |
|
|
//| GOOG | 2009-11-01 | 583.00 |
|
|
//| GOOG | 2008-05-01 | 585.80 |
|
|
//| GOOG | 2009-12-01 | 619.98 |
|
|
//| GOOG | 2007-12-01 | 691.48 |
|
|
//| GOOG | 2007-11-01 | 693.00 |
|
|
//| GOOG | 2007-10-01 | 707.00 |
|
|
|
|
Map targetPrices = makeDataset(hashmap("price", new Double[] { 200.0, 300.0, 400.0 }));
|
|
|
|
println(leftJoinAsof("price", targetPrices, googPrices, hashmap(kw("asof-op"), kw("<="))));
|
|
//asof-<= [3 4]:
|
|
//| price | symbol | date | GOOG.price |
|
|
//|------:|--------|------------|-----------:|
|
|
//| 200.0 | GOOG | 2005-04-01 | 220.00 |
|
|
//| 300.0 | GOOG | 2008-12-01 | 307.65 |
|
|
//| 400.0 | GOOG | 2008-09-01 | 400.52 |
|
|
println(leftJoinAsof("price", targetPrices, googPrices, hashmap(kw("asof-op"), kw(">"))));
|
|
//asof-> [3 4]:
|
|
//| price | symbol | date | GOOG.price |
|
|
//|------:|--------|------------|-----------:|
|
|
//| 200.0 | GOOG | 2005-01-01 | 195.62 |
|
|
//| 300.0 | GOOG | 2005-06-01 | 294.15 |
|
|
//| 400.0 | GOOG | 2009-04-01 | 395.97 |
|
|
</code></pre></div>
|
|
</li>
|
|
</ul>
|
|
<a name="leftJoinAsof-java.lang.Object-java.util.Map-java.util.Map-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>leftJoinAsof</h4>
|
|
<pre>public static java.util.Map leftJoinAsof(java.lang.Object colname,
|
|
java.util.Map lhs,
|
|
java.util.Map rhs)</pre>
|
|
</li>
|
|
</ul>
|
|
<a name="toNeanderthal-java.lang.Object-clojure.lang.Keyword-clojure.lang.Keyword-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>toNeanderthal</h4>
|
|
<pre>public static java.lang.Object toNeanderthal(java.lang.Object ds,
|
|
clojure.lang.Keyword layout,
|
|
clojure.lang.Keyword datatype)</pre>
|
|
<div class="block"><p>Convert a dataset to a neanderthal 2D matrix such that the columns of the dataset become the columns of the matrix. This function dynamically loads the neanderthal MKL bindings so there may be some pause when first called. If you would like to have the pause somewhere else call <code>require("tech.v3.dataset.neanderthal");</code> at some previous point of the program. You must have an update-to-date version of neanderthal in your classpath such as <code>[uncomplicate/neanderthal "0.43.3"]</code>.</p>
|
|
<p>See the <a href="https://neanderthal.uncomplicate.org/">neanderthal documentation</a>`</p></div>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>layout</code> - One of <code>:column</code> or <code>:row</code>.</dd>
|
|
<dd><code>datatype</code> - One of <code>:float32</code> or <code>:float64</code>.</p>
|
|
<p>Note that you can get a tech tensor (tech.v3.datatype.NDBuffer) from a neanderthal matrix using <code>tech.v3.DType.asTensor()</code>.</dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a name="toNeanderthal-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>toNeanderthal</h4>
|
|
<pre>public static java.lang.Object toNeanderthal(java.lang.Object ds)</pre>
|
|
<div class="block"><p>Convert a dataset to a neanderthal 2D matrix such that the columns of the dataset become the columns of the matrix. See documentation for 4-arity version of function. This function creates a column-major float64 (double) matrix.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="neanderthalToDataset-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>neanderthalToDataset</h4>
|
|
<pre>public static java.util.Map neanderthalToDataset(java.lang.Object denseMat)</pre>
|
|
<div class="block"><p>Convert a neanderthal matrix to a dataset such that the columns of the matrix become the columns of the dataset. Column names are the indexes of the columns.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="toTensor-java.lang.Object-clojure.lang.Keyword-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>toTensor</h4>
|
|
<pre>public static tech.v3.datatype.NDBuffer toTensor(java.lang.Object ds,
|
|
clojure.lang.Keyword datatype)</pre>
|
|
<div class="block"><p>Convert a dataset to a jvm-heap based 2D tensor such that the columns of the dataset become the columns of the tensor.</p></div>
|
|
<dl>
|
|
<dt><span class="paramLabel">Parameters:</span></dt>
|
|
<dd><code>datatype</code> - Any numeric datatype - <code>:int8</code>, <code>:uint8</code>, <code>:float32</code>, <code>:float64</code>, etc.</dd>
|
|
</dl>
|
|
</li>
|
|
</ul>
|
|
<a name="toTensor-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>toTensor</h4>
|
|
<pre>public static tech.v3.datatype.NDBuffer toTensor(java.lang.Object ds)</pre>
|
|
<div class="block"><p>Convert a dataset to a jvm-heap based 2D double (float64) tensor.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="tensorToDataset-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>tensorToDataset</h4>
|
|
<pre>public static java.util.Map tensorToDataset(java.lang.Object tens)</pre>
|
|
<div class="block"><p>Convert a tensor to a dataset such that the columns of the tensor become the columns of the dataset named after their index.</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="writeDataset-java.lang.Object-java.lang.String-java.lang.Object-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockList">
|
|
<li class="blockList">
|
|
<h4>writeDataset</h4>
|
|
<pre>public static void writeDataset(java.lang.Object ds,
|
|
java.lang.String path,
|
|
java.lang.Object options)</pre>
|
|
<div class="block"><p>Write a dataset to disc as csv, tsv, csv.gz, tsv.gz, json, json.gz or nippy.</p>
|
|
<p>Reading/writing to parquet or arrow is accessible via separate clasess</p></div>
|
|
</li>
|
|
</ul>
|
|
<a name="writeDataset-java.lang.Object-java.lang.String-">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="blockListLast">
|
|
<li class="blockList">
|
|
<h4>writeDataset</h4>
|
|
<pre>public static void writeDataset(java.lang.Object ds,
|
|
java.lang.String path)</pre>
|
|
<div class="block"><p>Write a dataset to disc as csv, tsv, csv.gz, tsv.gz or nippy.</p></div>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
<!-- ========= END OF CLASS DATA ========= -->
|
|
<!-- ======= START OF BOTTOM NAVBAR ====== -->
|
|
<div class="bottomNav"><a name="navbar.bottom">
|
|
<!-- -->
|
|
</a>
|
|
<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
|
|
<a name="navbar.bottom.firstrow">
|
|
<!-- -->
|
|
</a>
|
|
<ul class="navList" title="Navigation">
|
|
<li><a href="../../overview-summary.html">Overview</a></li>
|
|
<li><a href="package-summary.html">Package</a></li>
|
|
<li class="navBarCell1Rev">Class</li>
|
|
<li><a href="package-tree.html">Tree</a></li>
|
|
<li><a href="../../deprecated-list.html">Deprecated</a></li>
|
|
<li><a href="../../index-all.html">Index</a></li>
|
|
<li><a href="../../help-doc.html">Help</a></li>
|
|
</ul>
|
|
<div class="aboutLanguage"><script type="text/javascript" src="../../highlight.pack.js"></script>
|
|
<script type="text/javascript"><!--
|
|
hljs.initHighlightingOnLoad();
|
|
//--></script></div>
|
|
</div>
|
|
<div class="subNav">
|
|
<ul class="navList">
|
|
<li>Prev Class</li>
|
|
<li>Next Class</li>
|
|
</ul>
|
|
<ul class="navList">
|
|
<li><a href="../../index.html?tech/v3/TMD.html" target="_top">Frames</a></li>
|
|
<li><a href="TMD.html" target="_top">No Frames</a></li>
|
|
</ul>
|
|
<ul class="navList" id="allclasses_navbar_bottom">
|
|
<li><a href="../../allclasses-noframe.html">All Classes</a></li>
|
|
</ul>
|
|
<div>
|
|
<script type="text/javascript"><!--
|
|
allClassesLink = document.getElementById("allclasses_navbar_bottom");
|
|
if(window==top) {
|
|
allClassesLink.style.display = "block";
|
|
}
|
|
else {
|
|
allClassesLink.style.display = "none";
|
|
}
|
|
//-->
|
|
</script>
|
|
</div>
|
|
<div>
|
|
<ul class="subNavList">
|
|
<li>Summary: </li>
|
|
<li>Nested | </li>
|
|
<li>Field | </li>
|
|
<li>Constr | </li>
|
|
<li><a href="#method.summary">Method</a></li>
|
|
</ul>
|
|
<ul class="subNavList">
|
|
<li>Detail: </li>
|
|
<li>Field | </li>
|
|
<li>Constr | </li>
|
|
<li><a href="#method.detail">Method</a></li>
|
|
</ul>
|
|
</div>
|
|
<a name="skip.navbar.bottom">
|
|
<!-- -->
|
|
</a></div>
|
|
<!-- ======== END OF BOTTOM NAVBAR ======= -->
|
|
</body>
|
|
</html>
|