<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-column</artifactId>
<version>1.12.0</version>
</dependency>
import org.apache.parquet.column.ParquetProperties;
import org.apache.parquet.column.ParquetWriter;
import org.apache.parquet.example.data.Group;
import org.apache.parquet.example.data.simple.SimpleGroupFactory;
import org.apache.parquet.example.data.simple.SimpleGroup;
public class ParquetExample {
public static void main(String[] args) throws IOException {
Configuration conf = new Configuration();
Path outputPath = new Path("data.parquet");
MessageType schema = MessageTypeParser.parseMessageType("message Pair { required int32 key; required binary value; }");
ParquetWriter<Group> writer = new ParquetWriter<Group>(
outputPath,
new GroupWriteSupport(),
CompressionCodecName.SNAPPY,
ParquetWriter.DEFAULT_BLOCK_SIZE,
ParquetWriter.DEFAULT_PAGE_SIZE,
ParquetWriter.DEFAULT_PAGE_SIZE,
ParquetWriter.DEFAULT_IS_DICTIONARY_ENABLED,
ParquetWriter.DEFAULT_IS_VALIDATING_ENABLED,
ParquetProperties.WriterVersion.PARQUET_2_0,
conf
);
SimpleGroupFactory groupFactory = new SimpleGroupFactory(schema);
Group group1 = groupFactory.newGroup().append("key", 1).append("value", "value1");
Group group2 = groupFactory.newGroup().append("key", 2).append("value", "value2");
writer.write(group1);
writer.write(group2);
writer.close();
}
}