Tablesaw
java数据分析原创Tablesaw小于 1 分钟约 250 字
1. pom包
<dependency>
<groupId>tech.tablesaw</groupId>
<artifactId>tablesaw-core</artifactId>
<version>LATEST</version>
</dependency>
2. reduce
2.1. 实例代码
double[] numbers = {1, 2, 3, 4};
DoubleColumn nc = DoubleColumn.create("nc", numbers);
System.out.println(nc.print());
Optional<Double> reduce = nc.reduce((DoubleBinaryOperator) (v1, v2) -> v1 + v2);
System.out.println(nc.sum());
3. 数据过滤排序
3.1. 实例代码
Table table = Table.read().csv("D:/Temp/student_csv.csv");
//过滤语文分数大于等于80的学生
Table filterResult = table
.where(QuerySupport.all(t -> t.intColumn("语文分数").isGreaterThanOrEqualTo(80)));
System.out.println(filterResult);
//统计语文分数的平均值,最大值和最小值
Table summarizeResult = table
.summarize("语文分数", AggregateFunctions.mean, AggregateFunctions.max, AggregateFunctions.min)
.apply();
System.out.println(summarizeResult);
4. 运算
4.1. join运算
val tbl3 = tbl.joinOn("name").inner(tbl2)
@Test
public void joinTablesWithStringColumnsSuccess() throws IOException {
Table t1 =
Table.read()
.usingOptions(
CsvReadOptions.builderFromString(
Joiner.on(System.lineSeparator())
.join(
"TIME,TEMP SENSOR 1",
"13:21:50.430,23.7",
"13:21:51,23.1",
"13:21:52.451,24.2"))
.columnTypesToDetect(Arrays.asList(ColumnType.DOUBLE, ColumnType.TEXT)));
Table t2 =
Table.read()
.usingOptions(
CsvReadOptions.builderFromString(
Joiner.on(System.lineSeparator())
.join(
"TIME,TEMP SENSOR 2",
"13:21:50.430,24.9",
"13:21:51,25.2",
"13:21:52.451,26.1"))
.columnTypesToDetect(Arrays.asList(ColumnType.DOUBLE, ColumnType.TEXT)));
Table joined = t1.joinOn("TIME").fullOuter(t2);
assertEquals(3, joined.columnCount());
}