Files
2026-02-08 11:20:43 -10:00

28 lines
1.0 KiB
Python
Executable File
Vendored

#!/usr/bin/python3
import pandas as pd
import pyarrow.parquet as pq
import pyarrow as pa
rows = [{"id": 1,
"val":[("a",{"weight":41.5, "temp":36.1}),
("b",{"weight":31.6,"temp":34.5})],
"val2":[("va", {"weight":2, "temp":3}),
("vb", {"weight":3, "temp":4})]},
{"id": 2,
"val":[("a",{"weight":11.5, "temp":22.1}),
("b",{"weight":31.6,"temp":34.5})]},
{"id": 3,
"val":[("a",{"weight":22.5,"temp":33.1}),
("b",{"weight":33.6, "temp":44.5}),
("c",{"weight":44.6, "temp":55.5})],
"val2":[("vb", {"weight":5, "temp":10})]
}]
df2 = pd.DataFrame(rows)
mystruct = pa.struct([pa.field("weight", pa.float32()),
pa.field("temp", pa.float32())])
mymap = pa.map_(pa.string(), mystruct)
schema = pa.schema([pa.field('id', pa.int32()), pa.field('val', mymap), pa.field("val2", mymap)])
print(schema)
table = pa.Table.from_pandas(df2, schema)
pq.write_table(table, 'test/data/nested.parquet')