Sample MapReduce Code
A newer version of this documentation is available. Use the version menu above to view the most up-to-date release of the Greenplum 4.x documentation.
Sample MapReduce Code
import com.emc.greenplum.gpdb.hadoop.io.GPDBWritable; import com.emc.greenplum.gpdb.hadoop.mapreduce.lib.input.GPDBInputFormat; import com.emc.greenplum.gpdb.hadoop.mapreduce.lib.output.GPDBOutputFormat; import java.io.*; import java.util.*; import org.apache.hadoop.fs.Path; import org.apache.hadoop.conf.*; import org.apache.hadoop.io.*; import org.apache.hadoop.mapreduce.*; import org.apache.hadoop.mapreduce.lib.output.*; import org.apache.hadoop.mapreduce.lib.input.*; import org.apache.hadoop.util.*; public class demoMR { /* * Helper routine to create our generic record. This section shows the * format of the data. Modify as necessary. */ public static GPDBWritable generateGenericRecord() throws IOException { int[] colType = new int[3]; colType[0] = GPDBWritable.BIGINT; colType[1] = GPDBWritable.BOOLEAN; colType[2] = GPDBWritable.VARCHAR; /* * This section passes the values of the data. Modify as necessary. */ GPDBWritable gw = new GPDBWritable(colType); gw.setLong (0, (long)12345); gw.setBoolean(1, true); gw.setString (2, "abcdef"); return gw; } /* * DEMO Map/Reduce class test1 * -- Regardless of the input, this section dumps the generic record * into GPDBFormat/ */ public static class Map_test1 extends Mapper<LongWritable, Text, LongWritable, GPDBWritable> { private LongWritable word = new LongWritable(1); public void map(LongWritable key, Text value, Context context) throws IOException { try { GPDBWritable gw = generateGenericRecord(); context.write(word, gw); } catch (Exception e) { throw new IOException (e.getMessage()); } } } Configuration conf = new Configuration(true); Job job = new Job(conf, "test1"); job.setJarByClass(demoMR.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputKeyClass (LongWritable.class); job.setOutputValueClass (GPDBWritable.class); job.setOutputFormatClass(GPDBOutputFormat.class); job.setMapperClass(Map_test1.class); FileInputFormat.setInputPaths (job, new Path("/demo/data/tmp")); GPDBOutputFormat.setOutputPath(job, new Path("/demo/data/MRTest1")); job.waitForCompletion(true); }