1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 package org.apache.hadoop.hbase.client;
20
21 import java.io.IOException;
22 import java.util.ArrayList;
23 import java.util.Collections;
24 import java.util.List;
25 import java.util.UUID;
26
27 import org.apache.commons.logging.Log;
28 import org.apache.commons.logging.LogFactory;
29 import org.apache.hadoop.conf.Configuration;
30 import org.apache.hadoop.fs.FileSystem;
31 import org.apache.hadoop.fs.Path;
32 import org.apache.hadoop.hbase.CellUtil;
33 import org.apache.hadoop.hbase.HRegionInfo;
34 import org.apache.hadoop.hbase.HTableDescriptor;
35 import org.apache.hadoop.hbase.classification.InterfaceAudience;
36 import org.apache.hadoop.hbase.classification.InterfaceStability;
37 import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
38 import org.apache.hadoop.hbase.util.FSUtils;
39
40 /**
41 * A Scanner which performs a scan over snapshot files. Using this class requires copying the
42 * snapshot to a temporary empty directory, which will copy the snapshot reference files into that
43 * directory. Actual data files are not copied.
44 *
45 * <p>
46 * This also allows one to run the scan from an
47 * online or offline hbase cluster. The snapshot files can be exported by using the
48 * {@link org.apache.hadoop.hbase.snapshot.ExportSnapshot} tool,
49 * to a pure-hdfs cluster, and this scanner can be used to
50 * run the scan directly over the snapshot files. The snapshot should not be deleted while there
51 * are open scanners reading from snapshot files.
52 *
53 * <p>
54 * An internal RegionScanner is used to execute the {@link Scan} obtained
55 * from the user for each region in the snapshot.
56 * <p>
57 * HBase owns all the data and snapshot files on the filesystem. Only the HBase user can read from
58 * snapshot files and data files. HBase also enforces security because all the requests are handled
59 * by the server layer, and the user cannot read from the data files directly. To read from snapshot
60 * files directly from the file system, the user who is running the MR job must have sufficient
61 * permissions to access snapshot and reference files. This means that to run mapreduce over
62 * snapshot files, the job has to be run as the HBase user or the user must have group or other
63 * priviledges in the filesystem (See HBASE-8369). Note that, given other users access to read from
64 * snapshot/data files will completely circumvent the access control enforced by HBase.
65 * @see org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat
66 */
67 @InterfaceAudience.Public
68 @InterfaceStability.Evolving
69 public class TableSnapshotScanner extends AbstractClientScanner {
70
71 private static final Log LOG = LogFactory.getLog(TableSnapshotScanner.class);
72
73 private Configuration conf;
74 private String snapshotName;
75 private FileSystem fs;
76 private Path rootDir;
77 private Path restoreDir;
78 private Scan scan;
79 private ArrayList<HRegionInfo> regions;
80 private HTableDescriptor htd;
81
82 private ClientSideRegionScanner currentRegionScanner = null;
83 private int currentRegion = -1;
84
85 private int numOfCompleteRows = 0;
86 /**
87 * Creates a TableSnapshotScanner.
88 * @param conf the configuration
89 * @param restoreDir a temporary directory to copy the snapshot files into. Current user should
90 * have write permissions to this directory, and this should not be a subdirectory of rootdir.
91 * The scanner deletes the contents of the directory once the scanner is closed.
92 * @param snapshotName the name of the snapshot to read from
93 * @param scan a Scan representing scan parameters
94 * @throws IOException in case of error
95 */
96 public TableSnapshotScanner(Configuration conf, Path restoreDir,
97 String snapshotName, Scan scan) throws IOException {
98 this(conf, FSUtils.getRootDir(conf), restoreDir, snapshotName, scan);
99 }
100
101 /**
102 * Creates a TableSnapshotScanner.
103 * @param conf the configuration
104 * @param rootDir root directory for HBase.
105 * @param restoreDir a temporary directory to copy the snapshot files into. Current user should
106 * have write permissions to this directory, and this should not be a subdirectory of rootdir.
107 * The scanner deletes the contents of the directory once the scanner is closed.
108 * @param snapshotName the name of the snapshot to read from
109 * @param scan a Scan representing scan parameters
110 * @throws IOException in case of error
111 */
112 public TableSnapshotScanner(Configuration conf, Path rootDir,
113 Path restoreDir, String snapshotName, Scan scan) throws IOException {
114 this.conf = conf;
115 this.snapshotName = snapshotName;
116 this.rootDir = rootDir;
117 // restoreDir will be deleted in close(), use a unique sub directory
118 this.restoreDir = new Path(restoreDir, UUID.randomUUID().toString());
119 this.scan = scan;
120 this.fs = rootDir.getFileSystem(conf);
121 init();
122 }
123
124 private void init() throws IOException {
125 final RestoreSnapshotHelper.RestoreMetaChanges meta =
126 RestoreSnapshotHelper.copySnapshotForScanner(
127 conf, fs, rootDir, restoreDir, snapshotName);
128 final List<HRegionInfo> restoredRegions = meta.getRegionsToAdd();
129
130 htd = meta.getTableDescriptor();
131 regions = new ArrayList<HRegionInfo>(restoredRegions.size());
132 for (HRegionInfo hri : restoredRegions) {
133 if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) {
134 continue;
135 }
136 if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(),
137 hri.getStartKey(), hri.getEndKey())) {
138 regions.add(hri);
139 }
140 }
141
142 // sort for regions according to startKey.
143 Collections.sort(regions);
144 initScanMetrics(scan);
145 }
146
147 @Override
148 public Result next() throws IOException {
149 Result result = null;
150 while (true) {
151 if (currentRegionScanner == null) {
152 currentRegion++;
153 if (currentRegion >= regions.size()) {
154 return null;
155 }
156
157 HRegionInfo hri = regions.get(currentRegion);
158 currentRegionScanner = new ClientSideRegionScanner(conf, fs,
159 restoreDir, htd, hri, scan, scanMetrics);
160 if (this.scanMetrics != null) {
161 this.scanMetrics.countOfRegions.incrementAndGet();
162 }
163 }
164
165 try {
166 result = currentRegionScanner.next();
167 if (result != null) {
168 if (scan.getLimit() > 0 && ++this.numOfCompleteRows > scan.getLimit()) {
169 result = null;
170 }
171 return result;
172 }
173 } finally {
174 if (result == null) {
175 currentRegionScanner.close();
176 currentRegionScanner = null;
177 }
178 }
179 }
180 }
181
182 @Override
183 public void close() {
184 if (currentRegionScanner != null) {
185 currentRegionScanner.close();
186 }
187 try {
188 fs.delete(this.restoreDir, true);
189 } catch (IOException ex) {
190 LOG.warn("Could not delete restore directory for the snapshot:" + ex);
191 }
192 }
193
194 @Override
195 public boolean renewLease() {
196 throw new UnsupportedOperationException();
197 }
198
199 }