<dependencies> <!-- Apache Parquet dependencies --> <dependency> <groupId>org.apache.parquet</groupId> <artifactId>parquet-column</artifactId> <version>1.11.0</version> </dependency> <dependency> <groupId>org.apache.parquet</groupId> <artifactId>parquet-avro</artifactId> <version>1.11.0</version> </dependency> <!-- Other dependencies --> ... </dependencies> public class Student { private int id; private String name; private int age; // getters and setters ... } import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericRecord; import org.apache.hadoop.conf.Configuration; import org.apache.parquet.avro.AvroParquetWriter; import org.apache.parquet.hadoop.ParquetWriter; import org.apache.parquet.hadoop.metadata.CompressionCodecName; public class ParquetColumnWriterExample { private static final String FILE_PATH = "students.parquet"; public static void main(String[] args) throws IOException { Schema schema = new Schema.Parser().parse( "{\"type\":\"record\",\"name\":\"Student\",\"fields\":[{\"name\":\"id\",\"type\":\"int\"},{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"age\",\"type\":\"int\"}]}" ); Configuration conf = new Configuration(); try (ParquetWriter<GenericRecord> writer = AvroParquetWriter .<GenericRecord>builder(new Path(FILE_PATH)) .withSchema(schema) .withConf(conf) .withCompressionCodec(CompressionCodecName.SNAPPY) .build()) { // Creating sample data GenericRecord record1 = new GenericData.Record(schema); record1.put("id", 1); record1.put("name", "Alice"); record1.put("age", 20); GenericRecord record2 = new GenericData.Record(schema); record2.put("id", 2); record2.put("name", "Bob"); record2.put("age", 22); // Writing data to the Parquet file writer.write(record1); writer.write(record2); System.out.println("Data written to Parquet file successfully."); } catch (IOException e) { e.printStackTrace(); } } }


上一篇:
下一篇:
切换中文