<dependency> <groupId>org.apache.parquet</groupId> <artifactId>parquet-column</artifactId> <version>1.11.1</version> </dependency> import org.apache.parquet.column.ParquetProperties; import org.apache.parquet.example.data.Group; import org.apache.parquet.example.data.simple.SimpleGroupFactory; import org.apache.parquet.hadoop.ParquetFileWriter; import org.apache.parquet.hadoop.ParquetWriter; import org.apache.parquet.hadoop.metadata.CompressionCodecName; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.MessageTypeParser; import java.io.IOException; public class ParquetWriterExample { public static void main(String[] args) throws IOException { String schemaStr = "message example { " + "required int64 id; " + "required binary name; " + "}"; MessageType schema = MessageTypeParser.parseMessageType(schemaStr); ParquetWriter<Group> writer = ParquetFileWriter .builder(new Path("data.parquet")) .withRowGroupSize(ParquetWriter.DEFAULT_PAGE_SIZE) .withPageSize(ParquetWriter.DEFAULT_PAGE_SIZE) .withCompressionCodec(CompressionCodecName.GZIP) .withWriterVersion(ParquetProperties.WriterVersion.PARQUET_1_0) .withSchema(schema) .build(); SimpleGroupFactory groupFactory = new SimpleGroupFactory(schema); Group group = groupFactory.newGroup() .append("id", 1) .append("name", "John Doe"); writer.write(group); writer.close(); } }


上一篇:
下一篇:
切换中文