View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import java.io.IOException;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.hbase.classification.InterfaceAudience;
26  import org.apache.hadoop.hbase.classification.InterfaceStability;
27  import org.apache.hadoop.conf.Configurable;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.hbase.HBaseConfiguration;
30  import org.apache.hadoop.hbase.HConstants;
31  import org.apache.hadoop.hbase.TableName;
32  import org.apache.hadoop.hbase.TableNotEnabledException;
33  import org.apache.hadoop.hbase.TableNotFoundException;
34  import org.apache.hadoop.hbase.client.Admin;
35  import org.apache.hadoop.hbase.client.BufferedMutator;
36  import org.apache.hadoop.hbase.client.Connection;
37  import org.apache.hadoop.hbase.client.ConnectionFactory;
38  import org.apache.hadoop.hbase.client.Delete;
39  import org.apache.hadoop.hbase.client.HTable;
40  import org.apache.hadoop.hbase.client.Mutation;
41  import org.apache.hadoop.hbase.client.Put;
42  import org.apache.hadoop.mapreduce.JobContext;
43  import org.apache.hadoop.mapreduce.OutputCommitter;
44  import org.apache.hadoop.mapreduce.OutputFormat;
45  import org.apache.hadoop.mapreduce.RecordWriter;
46  import org.apache.hadoop.mapreduce.TaskAttemptContext;
47  
48  /**
49   * Convert Map/Reduce output and write it to an HBase table. The KEY is ignored
50   * while the output value <u>must</u> be either a {@link Put} or a
51   * {@link Delete} instance.
52   */
53  @InterfaceAudience.Public
54  @InterfaceStability.Stable
55  public class TableOutputFormat<KEY> extends OutputFormat<KEY, Mutation>
56  implements Configurable {
57  
58    private static final Log LOG = LogFactory.getLog(TableOutputFormat.class);
59  
60    /** Job parameter that specifies the output table. */
61    public static final String OUTPUT_TABLE = "hbase.mapred.outputtable";
62  
63    /**
64     * Prefix for configuration property overrides to apply in {@link #setConf(Configuration)}.
65     * For keys matching this prefix, the prefix is stripped, and the value is set in the
66     * configuration with the resulting key, ie. the entry "hbase.mapred.output.key1 = value1"
67     * would be set in the configuration as "key1 = value1".  Use this to set properties
68     * which should only be applied to the {@code TableOutputFormat} configuration and not the
69     * input configuration.
70     */
71    public static final String OUTPUT_CONF_PREFIX = "hbase.mapred.output.";
72  
73    /**
74     * Optional job parameter to specify a peer cluster.
75     * Used specifying remote cluster when copying between hbase clusters (the
76     * source is picked up from <code>hbase-site.xml</code>).
77     * @see TableMapReduceUtil#initTableReducerJob(String, Class, org.apache.hadoop.mapreduce.Job, Class, String, String, String)
78     */
79    public static final String QUORUM_ADDRESS = OUTPUT_CONF_PREFIX + "quorum";
80  
81    /** Optional job parameter to specify peer cluster's ZK client port */
82    public static final String QUORUM_PORT = OUTPUT_CONF_PREFIX + "quorum.port";
83  
84    /** Optional specification of the rs class name of the peer cluster */
85    public static final String
86        REGION_SERVER_CLASS = OUTPUT_CONF_PREFIX + "rs.class";
87    /** Optional specification of the rs impl name of the peer cluster */
88    public static final String
89        REGION_SERVER_IMPL = OUTPUT_CONF_PREFIX + "rs.impl";
90  
91    /** The configuration. */
92    private Configuration conf = null;
93  
94    /**
95     * Writes the reducer output to an HBase table.
96     */
97    protected class TableRecordWriter
98    extends RecordWriter<KEY, Mutation> {
99  
100     private Connection connection;
101     private BufferedMutator mutator;
102 
103     /**
104      * @throws IOException 
105      * 
106      */
107     public TableRecordWriter() throws IOException {
108       String tableName = conf.get(OUTPUT_TABLE);
109       this.connection = ConnectionFactory.createConnection(conf);
110       this.mutator = connection.getBufferedMutator(TableName.valueOf(tableName));
111       LOG.info("Created table instance for "  + tableName);
112     }
113     /**
114      * Closes the writer, in this case flush table commits.
115      *
116      * @param context  The context.
117      * @throws IOException When closing the writer fails.
118      * @see RecordWriter#close(TaskAttemptContext)
119      */
120     @Override
121     public void close(TaskAttemptContext context) throws IOException {
122       try {
123         if (mutator != null) {
124           mutator.close();
125         }
126       } finally {
127         if (connection != null) {
128           connection.close();
129         }
130       }
131     }
132 
133     /**
134      * Writes a key/value pair into the table.
135      *
136      * @param key  The key.
137      * @param value  The value.
138      * @throws IOException When writing fails.
139      * @see RecordWriter#write(Object, Object)
140      */
141     @Override
142     public void write(KEY key, Mutation value)
143     throws IOException {
144       if (!(value instanceof Put) && !(value instanceof Delete)) {
145         throw new IOException("Pass a Delete or a Put");
146       }
147       mutator.mutate(value);
148     }
149   }
150 
151   /**
152    * Creates a new record writer.
153    *
154    * @param context  The current task context.
155    * @return The newly created writer instance.
156    * @throws IOException When creating the writer fails.
157    * @throws InterruptedException When the jobs is cancelled.
158    */
159   @Override
160   public RecordWriter<KEY, Mutation> getRecordWriter(TaskAttemptContext context)
161   throws IOException, InterruptedException {
162     return new TableRecordWriter();
163   }
164 
165   /**
166    * Checks if the output table exists and is enabled.
167    *
168    * @param context  The current context.
169    * @throws IOException When the check fails.
170    * @throws InterruptedException When the job is aborted.
171    * @see OutputFormat#checkOutputSpecs(JobContext)
172    */
173   @Override
174   public void checkOutputSpecs(JobContext context) throws IOException,
175       InterruptedException {
176     try (Connection conn = ConnectionFactory.createConnection(getConf());
177          Admin admin = conn.getAdmin()) {
178       TableName tableName = TableName.valueOf(this.conf.get(OUTPUT_TABLE));
179       if (!admin.tableExists(tableName)) {
180         throw new TableNotFoundException("Can't write, table does not exist:" +
181             tableName.getNameAsString());
182       }
183 
184       if (!admin.isTableEnabled(tableName)) {
185         throw new TableNotEnabledException("Can't write, table is not enabled: " +
186             tableName.getNameAsString());
187       }
188     }
189   }
190 
191   /**
192    * Returns the output committer.
193    *
194    * @param context  The current context.
195    * @return The committer.
196    * @throws IOException When creating the committer fails.
197    * @throws InterruptedException When the job is aborted.
198    * @see OutputFormat#getOutputCommitter(TaskAttemptContext)
199    */
200   @Override
201   public OutputCommitter getOutputCommitter(TaskAttemptContext context)
202   throws IOException, InterruptedException {
203     return new TableOutputCommitter();
204   }
205 
206   @Override
207   public Configuration getConf() {
208     return conf;
209   }
210 
211   @Override
212   public void setConf(Configuration otherConf) {
213     String tableName = otherConf.get(OUTPUT_TABLE);
214     if(tableName == null || tableName.length() <= 0) {
215       throw new IllegalArgumentException("Must specify table name");
216     }
217 
218     String address = otherConf.get(QUORUM_ADDRESS);
219     int zkClientPort = otherConf.getInt(QUORUM_PORT, 0);
220     String serverClass = otherConf.get(REGION_SERVER_CLASS);
221     String serverImpl = otherConf.get(REGION_SERVER_IMPL);
222 
223     try {
224       this.conf = HBaseConfiguration.createClusterConf(otherConf, address, OUTPUT_CONF_PREFIX);
225 
226       if (serverClass != null) {
227         this.conf.set(HConstants.REGION_SERVER_IMPL, serverImpl);
228       }
229       if (zkClientPort != 0) {
230         this.conf.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, zkClientPort);
231       }
232     } catch(IOException e) {
233       LOG.error(e);
234       throw new RuntimeException(e);
235     }
236   }
237 }