View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.mapred;
20  
21  import org.apache.hadoop.fs.Path;
22  import org.apache.hadoop.hbase.HRegionInfo;
23  import org.apache.hadoop.hbase.HTableDescriptor;
24  import org.apache.hadoop.hbase.classification.InterfaceAudience;
25  import org.apache.hadoop.hbase.classification.InterfaceStability;
26  import org.apache.hadoop.hbase.client.Result;
27  import org.apache.hadoop.hbase.client.Scan;
28  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
29  import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl;
30  import org.apache.hadoop.hbase.util.RegionSplitter;
31  import org.apache.hadoop.mapred.InputFormat;
32  import org.apache.hadoop.mapred.InputSplit;
33  import org.apache.hadoop.mapred.JobConf;
34  import org.apache.hadoop.mapred.RecordReader;
35  import org.apache.hadoop.mapred.Reporter;
36  import org.apache.hadoop.mapreduce.Job;
37  
38  import java.io.DataInput;
39  import java.io.DataOutput;
40  import java.io.IOException;
41  import java.util.List;
42  
43  /**
44   * TableSnapshotInputFormat allows a MapReduce job to run over a table snapshot. Further
45   * documentation available on {@link org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat}.
46   *
47   * @see org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat
48   */
49  @InterfaceAudience.Public
50  @InterfaceStability.Evolving
51  public class TableSnapshotInputFormat implements InputFormat<ImmutableBytesWritable, Result> {
52  
53    public static class TableSnapshotRegionSplit implements InputSplit {
54      private TableSnapshotInputFormatImpl.InputSplit delegate;
55  
56      // constructor for mapreduce framework / Writable
57      public TableSnapshotRegionSplit() {
58        this.delegate = new TableSnapshotInputFormatImpl.InputSplit();
59      }
60  
61      public TableSnapshotRegionSplit(TableSnapshotInputFormatImpl.InputSplit delegate) {
62        this.delegate = delegate;
63      }
64  
65      public TableSnapshotRegionSplit(HTableDescriptor htd, HRegionInfo regionInfo,
66          List<String> locations, Scan scan, Path restoreDir) {
67        this.delegate =
68            new TableSnapshotInputFormatImpl.InputSplit(htd, regionInfo, locations, scan, restoreDir);
69      }
70  
71      @Override
72      public long getLength() throws IOException {
73        return delegate.getLength();
74      }
75  
76      @Override
77      public String[] getLocations() throws IOException {
78        return delegate.getLocations();
79      }
80  
81      @Override
82      public void write(DataOutput out) throws IOException {
83        delegate.write(out);
84      }
85  
86      @Override
87      public void readFields(DataInput in) throws IOException {
88        delegate.readFields(in);
89      }
90    }
91  
92    static class TableSnapshotRecordReader
93      implements RecordReader<ImmutableBytesWritable, Result> {
94  
95      private TableSnapshotInputFormatImpl.RecordReader delegate;
96  
97      public TableSnapshotRecordReader(TableSnapshotRegionSplit split, JobConf job)
98          throws IOException {
99        delegate = new TableSnapshotInputFormatImpl.RecordReader();
100       delegate.initialize(split.delegate, job);
101     }
102 
103     @Override
104     public boolean next(ImmutableBytesWritable key, Result value) throws IOException {
105       if (!delegate.nextKeyValue()) {
106         return false;
107       }
108       ImmutableBytesWritable currentKey = delegate.getCurrentKey();
109       key.set(currentKey.get(), currentKey.getOffset(), currentKey.getLength());
110       value.copyFrom(delegate.getCurrentValue());
111       return true;
112     }
113 
114     @Override
115     public ImmutableBytesWritable createKey() {
116       return new ImmutableBytesWritable();
117     }
118 
119     @Override
120     public Result createValue() {
121       return new Result();
122     }
123 
124     @Override
125     public long getPos() throws IOException {
126       return delegate.getPos();
127     }
128 
129     @Override
130     public void close() throws IOException {
131       delegate.close();
132     }
133 
134     @Override
135     public float getProgress() throws IOException {
136       return delegate.getProgress();
137     }
138   }
139 
140   @Override
141   public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
142     List<TableSnapshotInputFormatImpl.InputSplit> splits =
143       TableSnapshotInputFormatImpl.getSplits(job);
144     InputSplit[] results = new InputSplit[splits.size()];
145     for (int i = 0; i < splits.size(); i++) {
146       results[i] = new TableSnapshotRegionSplit(splits.get(i));
147     }
148     return results;
149   }
150 
151   @Override
152   public RecordReader<ImmutableBytesWritable, Result>
153   getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
154     return new TableSnapshotRecordReader((TableSnapshotRegionSplit) split, job);
155   }
156 
157   /**
158    * Configures the job to use TableSnapshotInputFormat to read from a snapshot.
159    * @param job the job to configure
160    * @param snapshotName the name of the snapshot to read from
161    * @param restoreDir a temporary directory to restore the snapshot into. Current user should
162    * have write permissions to this directory, and this should not be a subdirectory of rootdir.
163    * After the job is finished, restoreDir can be deleted.
164    * @throws IOException if an error occurs
165    */
166   public static void setInput(JobConf job, String snapshotName, Path restoreDir)
167       throws IOException {
168     TableSnapshotInputFormatImpl.setInput(job, snapshotName, restoreDir);
169   }
170 
171   /**
172    * Configures the job to use TableSnapshotInputFormat to read from a snapshot.
173    * @param job the job to configure
174    * @param snapshotName the name of the snapshot to read from
175    * @param restoreDir a temporary directory to restore the snapshot into. Current user should
176    * have write permissions to this directory, and this should not be a subdirectory of rootdir.
177    * After the job is finished, restoreDir can be deleted.
178    * @param splitAlgo split algorithm to generate splits from region
179    * @param numSplitsPerRegion how many input splits to generate per one region
180    * @throws IOException if an error occurs
181    */
182   public static void setInput(JobConf job, String snapshotName, Path restoreDir,
183                               RegionSplitter.SplitAlgorithm splitAlgo, int numSplitsPerRegion) throws IOException {
184     TableSnapshotInputFormatImpl.setInput(job, snapshotName, restoreDir, splitAlgo, numSplitsPerRegion);
185   }
186 }