001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mapreduce; 019 020import java.io.IOException; 021import org.apache.hadoop.conf.Configurable; 022import org.apache.hadoop.conf.Configuration; 023import org.apache.hadoop.hbase.HBaseConfiguration; 024import org.apache.hadoop.hbase.HConstants; 025import org.apache.hadoop.hbase.TableName; 026import org.apache.hadoop.hbase.TableNotEnabledException; 027import org.apache.hadoop.hbase.TableNotFoundException; 028import org.apache.hadoop.hbase.client.Admin; 029import org.apache.hadoop.hbase.client.BufferedMutator; 030import org.apache.hadoop.hbase.client.Connection; 031import org.apache.hadoop.hbase.client.ConnectionFactory; 032import org.apache.hadoop.hbase.client.Delete; 033import org.apache.hadoop.hbase.client.Mutation; 034import org.apache.hadoop.hbase.client.Put; 035import org.apache.hadoop.mapreduce.JobContext; 036import org.apache.hadoop.mapreduce.OutputCommitter; 037import org.apache.hadoop.mapreduce.OutputFormat; 038import org.apache.hadoop.mapreduce.RecordWriter; 039import org.apache.hadoop.mapreduce.TaskAttemptContext; 040import org.apache.yetus.audience.InterfaceAudience; 041import org.slf4j.Logger; 042import org.slf4j.LoggerFactory; 043 044/** 045 * Convert Map/Reduce output and write it to an HBase table. The KEY is ignored while the output 046 * value <u>must</u> be either a {@link Put} or a {@link Delete} instance. 047 */ 048@InterfaceAudience.Public 049public class TableOutputFormat<KEY> extends OutputFormat<KEY, Mutation> implements Configurable { 050 051 private static final Logger LOG = LoggerFactory.getLogger(TableOutputFormat.class); 052 053 /** Job parameter that specifies the output table. */ 054 public static final String OUTPUT_TABLE = "hbase.mapred.outputtable"; 055 056 /** 057 * Prefix for configuration property overrides to apply in {@link #setConf(Configuration)}. For 058 * keys matching this prefix, the prefix is stripped, and the value is set in the configuration 059 * with the resulting key, ie. the entry "hbase.mapred.output.key1 = value1" would be set in the 060 * configuration as "key1 = value1". Use this to set properties which should only be applied to 061 * the {@code TableOutputFormat} configuration and not the input configuration. 062 */ 063 public static final String OUTPUT_CONF_PREFIX = "hbase.mapred.output."; 064 065 /** 066 * Optional job parameter to specify a peer cluster. Used specifying remote cluster when copying 067 * between hbase clusters (the source is picked up from <code>hbase-site.xml</code>). 068 * @see TableMapReduceUtil#initTableReducerJob(String, Class, org.apache.hadoop.mapreduce.Job, 069 * Class, String) 070 */ 071 public static final String QUORUM_ADDRESS = OUTPUT_CONF_PREFIX + "quorum"; 072 073 /** Optional job parameter to specify peer cluster's ZK client port */ 074 public static final String QUORUM_PORT = OUTPUT_CONF_PREFIX + "quorum.port"; 075 076 /** 077 * Optional specification of the rs class name of the peer cluster. 078 * @deprecated Since 2.5.9, 2.6.1 and 2.7.0, will be removed in 4.0.0. Does not take effect from 079 * long ago, see HBASE-6044. 080 */ 081 @Deprecated 082 public static final String REGION_SERVER_CLASS = OUTPUT_CONF_PREFIX + "rs.class"; 083 /** 084 * Optional specification of the rs impl name of the peer cluster 085 * @deprecated Since 2.5.9, 2.6.1 and 2.7.0, will be removed in 4.0.0. Does not take effect from 086 * long ago, see HBASE-6044. 087 */ 088 @Deprecated 089 public static final String REGION_SERVER_IMPL = OUTPUT_CONF_PREFIX + "rs.impl"; 090 091 /** The configuration. */ 092 private Configuration conf = null; 093 094 /** 095 * Writes the reducer output to an HBase table. 096 */ 097 protected class TableRecordWriter extends RecordWriter<KEY, Mutation> { 098 099 private Connection connection; 100 private BufferedMutator mutator; 101 102 /** 103 * 104 * 105 */ 106 public TableRecordWriter() throws IOException { 107 String tableName = conf.get(OUTPUT_TABLE); 108 this.connection = ConnectionFactory.createConnection(conf); 109 this.mutator = connection.getBufferedMutator(TableName.valueOf(tableName)); 110 LOG.info("Created table instance for " + tableName); 111 } 112 113 /** 114 * Closes the writer, in this case flush table commits. 115 * @param context The context. 116 * @throws IOException When closing the writer fails. 117 * @see RecordWriter#close(TaskAttemptContext) 118 */ 119 @Override 120 public void close(TaskAttemptContext context) throws IOException { 121 try { 122 if (mutator != null) { 123 mutator.close(); 124 } 125 } finally { 126 if (connection != null) { 127 connection.close(); 128 } 129 } 130 } 131 132 /** 133 * Writes a key/value pair into the table. 134 * @param key The key. 135 * @param value The value. 136 * @throws IOException When writing fails. 137 * @see RecordWriter#write(Object, Object) 138 */ 139 @Override 140 public void write(KEY key, Mutation value) throws IOException { 141 if (!(value instanceof Put) && !(value instanceof Delete)) { 142 throw new IOException("Pass a Delete or a Put"); 143 } 144 mutator.mutate(value); 145 } 146 } 147 148 /** 149 * Creates a new record writer. Be aware that the baseline javadoc gives the impression that there 150 * is a single {@link RecordWriter} per job but in HBase, it is more natural if we give you a new 151 * RecordWriter per call of this method. You must close the returned RecordWriter when done. 152 * Failure to do so will drop writes. 153 * @param context The current task context. 154 * @return The newly created writer instance. 155 * @throws IOException When creating the writer fails. 156 * @throws InterruptedException When the job is cancelled. 157 */ 158 @Override 159 public RecordWriter<KEY, Mutation> getRecordWriter(TaskAttemptContext context) 160 throws IOException, InterruptedException { 161 return new TableRecordWriter(); 162 } 163 164 /** 165 * Checks if the output table exists and is enabled. 166 * @param context The current context. 167 * @throws IOException When the check fails. 168 * @throws InterruptedException When the job is aborted. 169 * @see OutputFormat#checkOutputSpecs(JobContext) 170 */ 171 @Override 172 public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { 173 Configuration hConf = getConf(); 174 if (hConf == null) { 175 hConf = context.getConfiguration(); 176 } 177 178 try (Connection connection = ConnectionFactory.createConnection(hConf); 179 Admin admin = connection.getAdmin()) { 180 TableName tableName = TableName.valueOf(hConf.get(OUTPUT_TABLE)); 181 if (!admin.tableExists(tableName)) { 182 throw new TableNotFoundException( 183 "Can't write, table does not exist:" + tableName.getNameAsString()); 184 } 185 186 if (!admin.isTableEnabled(tableName)) { 187 throw new TableNotEnabledException( 188 "Can't write, table is not enabled: " + tableName.getNameAsString()); 189 } 190 } 191 } 192 193 /** 194 * Returns the output committer. 195 * @param context The current context. 196 * @return The committer. 197 * @throws IOException When creating the committer fails. 198 * @throws InterruptedException When the job is aborted. 199 * @see OutputFormat#getOutputCommitter(TaskAttemptContext) 200 */ 201 @Override 202 public OutputCommitter getOutputCommitter(TaskAttemptContext context) 203 throws IOException, InterruptedException { 204 return new TableOutputCommitter(); 205 } 206 207 @Override 208 public Configuration getConf() { 209 return conf; 210 } 211 212 @Override 213 public void setConf(Configuration otherConf) { 214 String tableName = otherConf.get(OUTPUT_TABLE); 215 if (tableName == null || tableName.length() <= 0) { 216 throw new IllegalArgumentException("Must specify table name"); 217 } 218 219 String address = otherConf.get(QUORUM_ADDRESS); 220 int zkClientPort = otherConf.getInt(QUORUM_PORT, 0); 221 222 try { 223 this.conf = HBaseConfiguration.createClusterConf(otherConf, address, OUTPUT_CONF_PREFIX); 224 if (zkClientPort != 0) { 225 this.conf.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, zkClientPort); 226 } 227 } catch (IOException e) { 228 LOG.error(e.toString(), e); 229 throw new RuntimeException(e); 230 } 231 } 232}