<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-column</artifactId>
<version>1.11.1</version>
</dependency>
import org.apache.parquet.column.ParquetProperties;
import org.apache.parquet.example.data.Group;
import org.apache.parquet.example.data.simple.SimpleGroupFactory;
import org.apache.parquet.hadoop.ParquetFileWriter;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.MessageTypeParser;
import java.io.IOException;
public class ParquetWriterExample {
public static void main(String[] args) throws IOException {
String schemaStr = "message example {
" +
"required int64 id;
" +
"required binary name;
" +
"}";
MessageType schema = MessageTypeParser.parseMessageType(schemaStr);
ParquetWriter<Group> writer = ParquetFileWriter
.builder(new Path("data.parquet"))
.withRowGroupSize(ParquetWriter.DEFAULT_PAGE_SIZE)
.withPageSize(ParquetWriter.DEFAULT_PAGE_SIZE)
.withCompressionCodec(CompressionCodecName.GZIP)
.withWriterVersion(ParquetProperties.WriterVersion.PARQUET_1_0)
.withSchema(schema)
.build();
SimpleGroupFactory groupFactory = new SimpleGroupFactory(schema);
Group group = groupFactory.newGroup()
.append("id", 1)
.append("name", "John Doe");
writer.write(group);
writer.close();
}
}