View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.client;
20  
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.Collections;
24  import java.util.List;
25  import java.util.UUID;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FileSystem;
31  import org.apache.hadoop.fs.Path;
32  import org.apache.hadoop.hbase.CellUtil;
33  import org.apache.hadoop.hbase.HRegionInfo;
34  import org.apache.hadoop.hbase.HTableDescriptor;
35  import org.apache.hadoop.hbase.classification.InterfaceAudience;
36  import org.apache.hadoop.hbase.classification.InterfaceStability;
37  import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
38  import org.apache.hadoop.hbase.util.FSUtils;
39  
40  /**
41   * A Scanner which performs a scan over snapshot files. Using this class requires copying the
42   * snapshot to a temporary empty directory, which will copy the snapshot reference files into that
43   * directory. Actual data files are not copied.
44   *
45   * <p>
46   * This also allows one to run the scan from an
47   * online or offline hbase cluster. The snapshot files can be exported by using the
48   * {@link org.apache.hadoop.hbase.snapshot.ExportSnapshot} tool,
49   * to a pure-hdfs cluster, and this scanner can be used to
50   * run the scan directly over the snapshot files. The snapshot should not be deleted while there
51   * are open scanners reading from snapshot files.
52   *
53   * <p>
54   * An internal RegionScanner is used to execute the {@link Scan} obtained
55   * from the user for each region in the snapshot.
56   * <p>
57   * HBase owns all the data and snapshot files on the filesystem. Only the HBase user can read from
58   * snapshot files and data files. HBase also enforces security because all the requests are handled
59   * by the server layer, and the user cannot read from the data files directly. To read from snapshot
60   * files directly from the file system, the user who is running the MR job must have sufficient
61   * permissions to access snapshot and reference files. This means that to run mapreduce over
62   * snapshot files, the job has to be run as the HBase user or the user must have group or other
63   * priviledges in the filesystem (See HBASE-8369). Note that, given other users access to read from
64   * snapshot/data files will completely circumvent the access control enforced by HBase.
65   * @see org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat
66   */
67  @InterfaceAudience.Public
68  @InterfaceStability.Evolving
69  public class TableSnapshotScanner extends AbstractClientScanner {
70  
71    private static final Log LOG = LogFactory.getLog(TableSnapshotScanner.class);
72  
73    private Configuration conf;
74    private String snapshotName;
75    private FileSystem fs;
76    private Path rootDir;
77    private Path restoreDir;
78    private Scan scan;
79    private ArrayList<HRegionInfo> regions;
80    private HTableDescriptor htd;
81  
82    private ClientSideRegionScanner currentRegionScanner  = null;
83    private int currentRegion = -1;
84  
85    private int numOfCompleteRows = 0;
86    /**
87     * Creates a TableSnapshotScanner.
88     * @param conf the configuration
89     * @param restoreDir a temporary directory to copy the snapshot files into. Current user should
90     * have write permissions to this directory, and this should not be a subdirectory of rootdir.
91     * The scanner deletes the contents of the directory once the scanner is closed.
92     * @param snapshotName the name of the snapshot to read from
93     * @param scan a Scan representing scan parameters
94     * @throws IOException in case of error
95     */
96    public TableSnapshotScanner(Configuration conf, Path restoreDir,
97        String snapshotName, Scan scan) throws IOException {
98      this(conf, FSUtils.getRootDir(conf), restoreDir, snapshotName, scan);
99    }
100 
101   /**
102    * Creates a TableSnapshotScanner.
103    * @param conf the configuration
104    * @param rootDir root directory for HBase.
105    * @param restoreDir a temporary directory to copy the snapshot files into. Current user should
106    * have write permissions to this directory, and this should not be a subdirectory of rootdir.
107    * The scanner deletes the contents of the directory once the scanner is closed.
108    * @param snapshotName the name of the snapshot to read from
109    * @param scan a Scan representing scan parameters
110    * @throws IOException in case of error
111    */
112   public TableSnapshotScanner(Configuration conf, Path rootDir,
113       Path restoreDir, String snapshotName, Scan scan) throws IOException {
114     this.conf = conf;
115     this.snapshotName = snapshotName;
116     this.rootDir = rootDir;
117     // restoreDir will be deleted in close(), use a unique sub directory
118     this.restoreDir = new Path(restoreDir, UUID.randomUUID().toString());
119     this.scan = scan;
120     this.fs = rootDir.getFileSystem(conf);
121     init();
122   }
123 
124   private void init() throws IOException {
125     final RestoreSnapshotHelper.RestoreMetaChanges meta =
126       RestoreSnapshotHelper.copySnapshotForScanner(
127         conf, fs, rootDir, restoreDir, snapshotName);
128     final List<HRegionInfo> restoredRegions = meta.getRegionsToAdd();
129 
130     htd = meta.getTableDescriptor();
131     regions = new ArrayList<HRegionInfo>(restoredRegions.size());
132     for (HRegionInfo hri : restoredRegions) {
133       if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) {
134         continue;
135       }
136       if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(),
137           hri.getStartKey(), hri.getEndKey())) {
138         regions.add(hri);
139       }
140     }
141 
142     // sort for regions according to startKey.
143     Collections.sort(regions);
144     initScanMetrics(scan);
145   }
146 
147   @Override
148   public Result next() throws IOException {
149     Result result = null;
150     while (true) {
151       if (currentRegionScanner == null) {
152         currentRegion++;
153         if (currentRegion >= regions.size()) {
154           return null;
155         }
156 
157         HRegionInfo hri = regions.get(currentRegion);
158         currentRegionScanner = new ClientSideRegionScanner(conf, fs,
159           restoreDir, htd, hri, scan, scanMetrics);
160         if (this.scanMetrics != null) {
161           this.scanMetrics.countOfRegions.incrementAndGet();
162         }
163       }
164 
165       try {
166         result = currentRegionScanner.next();
167         if (result != null) {
168           if (scan.getLimit() > 0 && ++this.numOfCompleteRows > scan.getLimit()) {
169             result = null;
170           }
171           return result;
172         }
173       } finally {
174         if (result == null) {
175           currentRegionScanner.close();
176           currentRegionScanner = null;
177         }
178       }
179     }
180   }
181 
182   @Override
183   public void close() {
184     if (currentRegionScanner != null) {
185       currentRegionScanner.close();
186     }
187     try {
188       fs.delete(this.restoreDir, true);
189     } catch (IOException ex) {
190       LOG.warn("Could not delete restore directory for the snapshot:" + ex);
191     }
192   }
193 
194   @Override
195   public boolean renewLease() {
196     throw new UnsupportedOperationException();
197   }
198 
199 }