View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import java.io.DataInput;
22  import java.io.DataOutput;
23  import java.io.IOException;
24  import java.util.Arrays;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.hbase.HConstants;
29  import org.apache.hadoop.hbase.TableName;
30  import org.apache.hadoop.hbase.classification.InterfaceAudience;
31  import org.apache.hadoop.hbase.classification.InterfaceStability;
32  import org.apache.hadoop.hbase.client.Scan;
33  import org.apache.hadoop.hbase.util.Bytes;
34  import org.apache.hadoop.io.Writable;
35  import org.apache.hadoop.io.WritableUtils;
36  import org.apache.hadoop.mapreduce.InputSplit;
37  
38  /**
39   * A table split corresponds to a key range (low, high) and an optional scanner.
40   * All references to row below refer to the key of the row.
41   */
42  @InterfaceAudience.Public
43  @InterfaceStability.Evolving
44  public class TableSplit extends InputSplit
45    implements Writable, Comparable<TableSplit> {
46    /** @deprecated LOG variable would be made private. */
47    @Deprecated
48    public static final Log LOG = LogFactory.getLog(TableSplit.class);
49    
50    // should be < 0 (@see #readFields(DataInput))
51    // version 1 supports Scan data member
52    enum Version {
53      UNVERSIONED(0),
54      // Initial number we put on TableSplit when we introduced versioning.
55      INITIAL(-1),
56      // Added an encoded region name field for easier identification of split -> region
57      WITH_ENCODED_REGION_NAME(-2);
58  
59      final int code;
60      static final Version[] byCode;
61      static {
62        byCode = Version.values();
63        for (int i = 0; i < byCode.length; i++) {
64          if (byCode[i].code != -1 * i) {
65            throw new AssertionError("Values in this enum should be descending by one");
66          }
67        }
68      }
69  
70      Version(int code) {
71        this.code = code;
72      }
73  
74      boolean atLeast(Version other) {
75        return code <= other.code;
76      }
77  
78      static Version fromCode(int code) {
79        return byCode[code * -1];
80      }
81    }
82    
83    private static final Version VERSION = Version.WITH_ENCODED_REGION_NAME;
84    private TableName tableName;
85    private byte [] startRow;
86    private byte [] endRow;
87    private String regionLocation;
88    private String encodedRegionName = "";
89  
90    /**
91     * The scan object may be null but the serialized form of scan is never null
92     * or empty since we serialize the scan object with default values then.
93     * Having no scanner in TableSplit doesn't necessarily mean there is no scanner
94     * for mapreduce job, it just means that we do not need to set it for each split.
95     * For example, it is not required to have a scan object for
96     * {@link org.apache.hadoop.hbase.mapred.TableInputFormatBase} since we use the scan from the
97     * job conf and scanner is supposed to be same for all the splits of table.
98     */
99    private String scan = ""; // stores the serialized form of the Scan
100 
101   private long length; // Contains estimation of region size in bytes
102 
103   /** Default constructor. */
104   public TableSplit() {
105     this((TableName)null, null, HConstants.EMPTY_BYTE_ARRAY,
106       HConstants.EMPTY_BYTE_ARRAY, "");
107   }
108 
109   /**
110    * @deprecated As of release 0.96
111    *             (<a href="https://issues.apache.org/jira/browse/HBASE-9508">HBASE-9508</a>).
112    *             This will be removed in HBase 2.0.0.
113    *             Use {@link TableSplit#TableSplit(TableName, byte[], byte[], String)}.
114    */
115   @Deprecated
116   public TableSplit(final byte [] tableName, Scan scan, byte [] startRow, byte [] endRow,
117       final String location) {
118     this(TableName.valueOf(tableName), scan, startRow, endRow, location);
119   }
120 
121   /**
122    * Creates a new instance while assigning all variables.
123    * Length of region is set to 0
124    * Encoded name of the region is set to blank
125    *
126    * @param tableName  The name of the current table.
127    * @param scan The scan associated with this split.
128    * @param startRow  The start row of the split.
129    * @param endRow  The end row of the split.
130    * @param location  The location of the region.
131    */
132   public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
133                     final String location) {
134     this(tableName, scan, startRow, endRow, location, 0L);
135   }
136 
137   /**
138    * Creates a new instance while assigning all variables.
139    * Encoded name of region is set to blank
140    *
141    * @param tableName  The name of the current table.
142    * @param scan The scan associated with this split.
143    * @param startRow  The start row of the split.
144    * @param endRow  The end row of the split.
145    * @param location  The location of the region.
146    */
147   public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
148       final String location, long length) {
149     this(tableName, scan, startRow, endRow, location, "", length);
150   }
151 
152   /**
153    * Creates a new instance while assigning all variables.
154    *
155    * @param tableName  The name of the current table.
156    * @param scan The scan associated with this split.
157    * @param startRow  The start row of the split.
158    * @param endRow  The end row of the split.
159    * @param encodedRegionName The region ID.
160    * @param location  The location of the region.
161    */
162   public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
163       final String location, final String encodedRegionName, long length) {
164     this.tableName = tableName;
165     try {
166       this.scan =
167         (null == scan) ? "" : TableMapReduceUtil.convertScanToString(scan);
168     } catch (IOException e) {
169       LOG.warn("Failed to convert Scan to String", e);
170     }
171     this.startRow = startRow;
172     this.endRow = endRow;
173     this.regionLocation = location;
174     this.encodedRegionName = encodedRegionName;
175     this.length = length;
176   }
177 
178   /**
179    * @deprecated As of release 0.96
180    *             (<a href="https://issues.apache.org/jira/browse/HBASE-9508">HBASE-9508</a>).
181    *             This will be removed in HBase 2.0.0.
182    *             Use {@link TableSplit#TableSplit(TableName, byte[], byte[], String)}.
183    */
184   @Deprecated
185   public TableSplit(final byte [] tableName, byte[] startRow, byte[] endRow,
186       final String location) {
187     this(TableName.valueOf(tableName), startRow, endRow, location);
188   }
189 
190   /**
191    * Creates a new instance without a scanner.
192    * Length of region is set to 0
193    *
194    * @param tableName The name of the current table.
195    * @param startRow The start row of the split.
196    * @param endRow The end row of the split.
197    * @param location The location of the region.
198    */
199   public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
200       final String location) {
201     this(tableName, null, startRow, endRow, location);
202   }
203 
204   /**
205    * Creates a new instance without a scanner.
206    *
207    * @param tableName The name of the current table.
208    * @param startRow The start row of the split.
209    * @param endRow The end row of the split.
210    * @param location The location of the region.
211    * @param length Size of region in bytes
212    */
213   public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
214                     final String location, long length) {
215     this(tableName, null, startRow, endRow, location, length);
216   }
217 
218   /**
219    * Returns a Scan object from the stored string representation.
220    *
221    * @return Returns a Scan object based on the stored scanner.
222    * @throws IOException throws IOException if deserialization fails
223    */
224   public Scan getScan() throws IOException {
225     return TableMapReduceUtil.convertStringToScan(this.scan);
226   }
227 
228   /**
229    * Returns a scan string
230    * @return scan as string. Should be noted that this is not same as getScan().toString()
231    *    because Scan object will have the default values when empty scan string is
232    *    deserialized. Thus, getScan().toString() can never be empty
233    */
234   @InterfaceAudience.Private
235   public String getScanAsString() {
236     return this.scan;
237   }
238 
239   /**
240    * Returns the table name converted to a byte array.
241    * @see #getTable()
242    * @return The table name.
243    */
244   public byte [] getTableName() {
245     return tableName.getName();
246   }
247 
248   /**
249    * Returns the table name.
250    *
251    * @return The table name.
252    */
253   public TableName getTable() {
254     // It is ugly that usually to get a TableName, the method is called getTableName.  We can't do
255     // that in here though because there was an existing getTableName in place already since
256     // deprecated.
257     return tableName;
258   }
259 
260   /**
261    * Returns the start row.
262    *
263    * @return The start row.
264    */
265   public byte [] getStartRow() {
266     return startRow;
267   }
268 
269   /**
270    * Returns the end row.
271    *
272    * @return The end row.
273    */
274   public byte [] getEndRow() {
275     return endRow;
276   }
277 
278   /**
279    * Returns the region location.
280    *
281    * @return The region's location.
282    */
283   public String getRegionLocation() {
284     return regionLocation;
285   }
286 
287   /**
288    * Returns the region's location as an array.
289    *
290    * @return The array containing the region location.
291    * @see org.apache.hadoop.mapreduce.InputSplit#getLocations()
292    */
293   @Override
294   public String[] getLocations() {
295     return new String[] {regionLocation};
296   }
297 
298   /**
299    * Returns the region's encoded name.
300    *
301    * @return The region's encoded name.
302    */
303   public String getEncodedRegionName() {
304     return encodedRegionName;
305   }
306 
307   /**
308    * Returns the length of the split.
309    *
310    * @return The length of the split.
311    * @see org.apache.hadoop.mapreduce.InputSplit#getLength()
312    */
313   @Override
314   public long getLength() {
315     return length;
316   }
317 
318   /**
319    * Reads the values of each field.
320    *
321    * @param in  The input to read from.
322    * @throws IOException When reading the input fails.
323    */
324   @Override
325   public void readFields(DataInput in) throws IOException {
326     Version version = Version.UNVERSIONED;
327     // TableSplit was not versioned in the beginning.
328     // In order to introduce it now, we make use of the fact
329     // that tableName was written with Bytes.writeByteArray,
330     // which encodes the array length as a vint which is >= 0.
331     // Hence if the vint is >= 0 we have an old version and the vint
332     // encodes the length of tableName.
333     // If < 0 we just read the version and the next vint is the length.
334     // @see Bytes#readByteArray(DataInput)
335     int len = WritableUtils.readVInt(in);
336     if (len < 0) {
337       // what we just read was the version
338       version = Version.fromCode(len);
339       len = WritableUtils.readVInt(in);
340     }
341     byte[] tableNameBytes = new byte[len];
342     in.readFully(tableNameBytes);
343     tableName = TableName.valueOf(tableNameBytes);
344     startRow = Bytes.readByteArray(in);
345     endRow = Bytes.readByteArray(in);
346     regionLocation = Bytes.toString(Bytes.readByteArray(in));
347     if (version.atLeast(Version.INITIAL)) {
348       scan = Bytes.toString(Bytes.readByteArray(in));
349     }
350     length = WritableUtils.readVLong(in);
351     if (version.atLeast(Version.WITH_ENCODED_REGION_NAME)) {
352       encodedRegionName = Bytes.toString(Bytes.readByteArray(in));
353     }
354   }
355 
356   /**
357    * Writes the field values to the output.
358    *
359    * @param out  The output to write to.
360    * @throws IOException When writing the values to the output fails.
361    */
362   @Override
363   public void write(DataOutput out) throws IOException {
364     WritableUtils.writeVInt(out, VERSION.code);
365     Bytes.writeByteArray(out, tableName.getName());
366     Bytes.writeByteArray(out, startRow);
367     Bytes.writeByteArray(out, endRow);
368     Bytes.writeByteArray(out, Bytes.toBytes(regionLocation));
369     Bytes.writeByteArray(out, Bytes.toBytes(scan));
370     WritableUtils.writeVLong(out, length);
371     Bytes.writeByteArray(out, Bytes.toBytes(encodedRegionName));
372   }
373 
374   /**
375    * Returns the details about this instance as a string.
376    *
377    * @return The values of this instance as a string.
378    * @see java.lang.Object#toString()
379    */
380   @Override
381   public String toString() {
382     StringBuilder sb = new StringBuilder();
383     sb.append("HBase table split(");
384     sb.append("table name: ").append(tableName);
385     // null scan input is represented by ""
386     String printScan = "";
387     if (!scan.equals("")) {
388       try {
389         // get the real scan here in toString, not the Base64 string
390         printScan = TableMapReduceUtil.convertStringToScan(scan).toString();
391       }
392       catch (IOException e) {
393         printScan = "";
394       }
395     }
396     sb.append(", scan: ").append(printScan);
397     sb.append(", start row: ").append(Bytes.toStringBinary(startRow));
398     sb.append(", end row: ").append(Bytes.toStringBinary(endRow));
399     sb.append(", region location: ").append(regionLocation);
400     sb.append(", encoded region name: ").append(encodedRegionName);
401     sb.append(")");
402     return sb.toString();
403   }
404 
405   /**
406    * Compares this split against the given one.
407    *
408    * @param split  The split to compare to.
409    * @return The result of the comparison.
410    * @see java.lang.Comparable#compareTo(java.lang.Object)
411    */
412   @Override
413   public int compareTo(TableSplit split) {
414     // If The table name of the two splits is the same then compare start row
415     // otherwise compare based on table names
416     int tableNameComparison =
417         getTable().compareTo(split.getTable());
418     return tableNameComparison != 0 ? tableNameComparison : Bytes.compareTo(
419         getStartRow(), split.getStartRow());
420   }
421 
422   @Override
423   public boolean equals(Object o) {
424     if (o == null || !(o instanceof TableSplit)) {
425       return false;
426     }
427     return tableName.equals(((TableSplit)o).tableName) &&
428       Bytes.equals(startRow, ((TableSplit)o).startRow) &&
429       Bytes.equals(endRow, ((TableSplit)o).endRow) &&
430       regionLocation.equals(((TableSplit)o).regionLocation);
431   }
432 
433   @Override
434   public int hashCode() {
435     int result = tableName != null ? tableName.hashCode() : 0;
436     result = 31 * result + (scan != null ? scan.hashCode() : 0);
437     result = 31 * result + (startRow != null ? Arrays.hashCode(startRow) : 0);
438     result = 31 * result + (endRow != null ? Arrays.hashCode(endRow) : 0);
439     result = 31 * result + (regionLocation != null ? regionLocation.hashCode() : 0);
440     result = 31 * result + (encodedRegionName != null ? encodedRegionName.hashCode() : 0);
441     return result;
442   }
443 }