<dependencies>
<!-- Apache Parquet dependencies -->
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-column</artifactId>
<version>1.11.0</version>
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-avro</artifactId>
<version>1.11.0</version>
</dependency>
<!-- Other dependencies -->
...
</dependencies>
public class Student {
private int id;
private String name;
private int age;
// getters and setters
...
}
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.parquet.avro.AvroParquetWriter;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
public class ParquetColumnWriterExample {
private static final String FILE_PATH = "students.parquet";
public static void main(String[] args) throws IOException {
Schema schema = new Schema.Parser().parse(
"{\"type\":\"record\",\"name\":\"Student\",\"fields\":[{\"name\":\"id\",\"type\":\"int\"},{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"age\",\"type\":\"int\"}]}"
);
Configuration conf = new Configuration();
try (ParquetWriter<GenericRecord> writer = AvroParquetWriter
.<GenericRecord>builder(new Path(FILE_PATH))
.withSchema(schema)
.withConf(conf)
.withCompressionCodec(CompressionCodecName.SNAPPY)
.build()) {
// Creating sample data
GenericRecord record1 = new GenericData.Record(schema);
record1.put("id", 1);
record1.put("name", "Alice");
record1.put("age", 20);
GenericRecord record2 = new GenericData.Record(schema);
record2.put("id", 2);
record2.put("name", "Bob");
record2.put("age", 22);
// Writing data to the Parquet file
writer.write(record1);
writer.write(record2);
System.out.println("Data written to Parquet file successfully.");
} catch (IOException e) {
e.printStackTrace();
}
}
}