View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.mapreduce;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertFalse;
22  import static org.junit.Assert.assertTrue;
23  
24  import java.util.List;
25  import java.util.concurrent.atomic.AtomicLong;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FileSystem;
31  import org.apache.hadoop.fs.Path;
32  import org.apache.hadoop.hbase.Cell;
33  import org.apache.hadoop.hbase.TableName;
34  import org.apache.hadoop.hbase.HBaseTestingUtility;
35  import org.apache.hadoop.hbase.HColumnDescriptor;
36  import org.apache.hadoop.hbase.HConstants;
37  import org.apache.hadoop.hbase.HRegionInfo;
38  import org.apache.hadoop.hbase.HTableDescriptor;
39  import org.apache.hadoop.hbase.KeyValue;
40  import org.apache.hadoop.hbase.testclassification.MediumTests;
41  import org.apache.hadoop.hbase.mapreduce.WALInputFormat.WALKeyRecordReader;
42  import org.apache.hadoop.hbase.mapreduce.WALInputFormat.WALRecordReader;
43  import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl;
44  import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
45  import org.apache.hadoop.hbase.util.FSUtils;
46  import org.apache.hadoop.hbase.wal.WAL;
47  import org.apache.hadoop.hbase.wal.WALFactory;
48  import org.apache.hadoop.hbase.wal.WALKey;
49  import org.apache.hadoop.hbase.util.Bytes;
50  import org.apache.hadoop.mapreduce.InputSplit;
51  import org.apache.hadoop.mapreduce.MapReduceTestUtil;
52  import org.junit.AfterClass;
53  import org.junit.Before;
54  import org.junit.BeforeClass;
55  import org.junit.Test;
56  import org.junit.experimental.categories.Category;
57  
58  /**
59   * JUnit tests for the WALRecordReader
60   */
61  @Category(MediumTests.class)
62  public class TestWALRecordReader {
63    private static final Log LOG = LogFactory.getLog(TestWALRecordReader.class);
64    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
65    private static Configuration conf;
66    private static FileSystem fs;
67    private static Path hbaseDir;
68    private static FileSystem walFs;
69    private static Path walRootDir;
70    // visible for TestHLogRecordReader
71    static final TableName tableName = TableName.valueOf(getName());
72    private static final byte [] rowName = tableName.getName();
73    // visible for TestHLogRecordReader
74    static final HRegionInfo info = new HRegionInfo(tableName,
75        Bytes.toBytes(""), Bytes.toBytes(""), false);
76    private static final byte [] family = Bytes.toBytes("column");
77    private static final byte [] value = Bytes.toBytes("value");
78    private static HTableDescriptor htd;
79    private static Path logDir;
80    protected MultiVersionConcurrencyControl mvcc;
81  
82    private static String getName() {
83      return "TestWALRecordReader";
84    }
85  
86    @Before
87    public void setUp() throws Exception {
88      fs.delete(hbaseDir, true);
89      walFs.delete(walRootDir, true);
90      mvcc = new MultiVersionConcurrencyControl();
91    }
92    @BeforeClass
93    public static void setUpBeforeClass() throws Exception {
94      // Make block sizes small.
95      conf = TEST_UTIL.getConfiguration();
96      conf.setInt("dfs.blocksize", 1024 * 1024);
97      conf.setInt("dfs.replication", 1);
98      TEST_UTIL.startMiniDFSCluster(1);
99  
100     conf = TEST_UTIL.getConfiguration();
101     fs = TEST_UTIL.getDFSCluster().getFileSystem();
102 
103     hbaseDir = TEST_UTIL.createRootDir();
104 
105     walRootDir = TEST_UTIL.createWALRootDir();
106     walFs = FSUtils.getWALFileSystem(conf);
107     logDir = new Path(walRootDir, HConstants.HREGION_LOGDIR_NAME);
108 
109     htd = new HTableDescriptor(tableName);
110     htd.addFamily(new HColumnDescriptor(family));
111   }
112 
113   @AfterClass
114   public static void tearDownAfterClass() throws Exception {
115     fs.delete(hbaseDir, true);
116     walFs.delete(walRootDir, true);
117     TEST_UTIL.shutdownMiniCluster();
118   }
119 
120   /**
121    * Test partial reads from the log based on passed time range
122    * @throws Exception
123    */
124   @Test
125   public void testPartialRead() throws Exception {
126     final WALFactory walfactory = new WALFactory(conf, null, getName());
127     WAL log = walfactory.getWAL(info.getEncodedNameAsBytes(), info.getTable().getNamespace());
128     // This test depends on timestamp being millisecond based and the filename of the WAL also
129     // being millisecond based.
130     long ts = System.currentTimeMillis();
131     WALEdit edit = new WALEdit();
132     edit.add(new KeyValue(rowName, family, Bytes.toBytes("1"), ts, value));
133     log.append(htd, info, getWalKey(ts), edit, true);
134     edit = new WALEdit();
135     edit.add(new KeyValue(rowName, family, Bytes.toBytes("2"), ts+1, value));
136     log.append(htd, info, getWalKey(ts+1), edit, true);
137     log.sync();
138     LOG.info("Before 1st WAL roll " + log.toString());
139     log.rollWriter();
140     LOG.info("Past 1st WAL roll " + log.toString());
141 
142     Thread.sleep(1);
143     long ts1 = System.currentTimeMillis();
144 
145     edit = new WALEdit();
146     edit.add(new KeyValue(rowName, family, Bytes.toBytes("3"), ts1+1, value));
147     log.append(htd, info, getWalKey(ts1+1), edit, true);
148     edit = new WALEdit();
149     edit.add(new KeyValue(rowName, family, Bytes.toBytes("4"), ts1+2, value));
150     log.append(htd, info, getWalKey(ts1+2), edit, true);
151     log.sync();
152     log.shutdown();
153     walfactory.shutdown();
154     LOG.info("Closed WAL " + log.toString());
155 
156  
157     WALInputFormat input = new WALInputFormat();
158     Configuration jobConf = new Configuration(conf);
159     jobConf.set("mapreduce.input.fileinputformat.inputdir", logDir.toString());
160     jobConf.setLong(WALInputFormat.END_TIME_KEY, ts);
161 
162     // only 1st file is considered, and only its 1st entry is used
163     List<InputSplit> splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
164 
165     assertEquals(1, splits.size());
166     testSplit(splits.get(0), Bytes.toBytes("1"));
167 
168     jobConf.setLong(WALInputFormat.START_TIME_KEY, ts+1);
169     jobConf.setLong(WALInputFormat.END_TIME_KEY, ts1+1);
170     splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
171     // both files need to be considered
172     assertEquals(2, splits.size());
173     // only the 2nd entry from the 1st file is used
174     testSplit(splits.get(0), Bytes.toBytes("2"));
175     // only the 1nd entry from the 2nd file is used
176     testSplit(splits.get(1), Bytes.toBytes("3"));
177   }
178 
179   /**
180    * Test basic functionality
181    * @throws Exception
182    */
183   @Test
184   public void testWALRecordReader() throws Exception {
185     final WALFactory walfactory = new WALFactory(conf, null, getName());
186     WAL log = walfactory.getWAL(info.getEncodedNameAsBytes(), info.getTable().getNamespace());
187     byte [] value = Bytes.toBytes("value");
188     WALEdit edit = new WALEdit();
189     edit.add(new KeyValue(rowName, family, Bytes.toBytes("1"),
190         System.currentTimeMillis(), value));
191     long txid = log.append(htd, info, getWalKey(System.currentTimeMillis()), edit, true);
192     log.sync(txid);
193 
194     Thread.sleep(1); // make sure 2nd log gets a later timestamp
195     long secondTs = System.currentTimeMillis();
196     log.rollWriter();
197 
198     edit = new WALEdit();
199     edit.add(new KeyValue(rowName, family, Bytes.toBytes("2"),
200         System.currentTimeMillis(), value));
201     txid = log.append(htd, info, getWalKey(System.currentTimeMillis()), edit, true);
202     log.sync(txid);
203     log.shutdown();
204     walfactory.shutdown();
205     long thirdTs = System.currentTimeMillis();
206 
207     // should have 2 log files now
208     WALInputFormat input = new WALInputFormat();
209     Configuration jobConf = new Configuration(conf);
210     jobConf.set("mapreduce.input.fileinputformat.inputdir", logDir.toString());
211 
212     // make sure both logs are found
213     List<InputSplit> splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
214     assertEquals(2, splits.size());
215 
216     // should return exactly one KV
217     testSplit(splits.get(0), Bytes.toBytes("1"));
218     // same for the 2nd split
219     testSplit(splits.get(1), Bytes.toBytes("2"));
220 
221     // now test basic time ranges:
222 
223     // set an endtime, the 2nd log file can be ignored completely.
224     jobConf.setLong(WALInputFormat.END_TIME_KEY, secondTs-1);
225     splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
226     assertEquals(1, splits.size());
227     testSplit(splits.get(0), Bytes.toBytes("1"));
228 
229     // now set a start time
230     jobConf.setLong(WALInputFormat.END_TIME_KEY, Long.MAX_VALUE);
231     jobConf.setLong(WALInputFormat.START_TIME_KEY, thirdTs);
232     splits = input.getSplits(MapreduceTestingShim.createJobContext(jobConf));
233     // both logs need to be considered
234     assertEquals(2, splits.size());
235     // but both readers skip all edits
236     testSplit(splits.get(0));
237     testSplit(splits.get(1));
238   }
239 
240   protected WALKey getWalKey(final long time) {
241     return new WALKey(info.getEncodedNameAsBytes(), tableName, time, mvcc);
242   }
243 
244   protected WALRecordReader getReader() {
245     return new WALKeyRecordReader();
246   }
247 
248   /**
249    * Create a new reader from the split, and match the edits against the passed columns.
250    */
251   private void testSplit(InputSplit split, byte[]... columns) throws Exception {
252     final WALRecordReader reader = getReader();
253     reader.initialize(split, MapReduceTestUtil.createDummyMapTaskAttemptContext(conf));
254 
255     for (byte[] column : columns) {
256       assertTrue(reader.nextKeyValue());
257       Cell cell = reader.getCurrentValue().getCells().get(0);
258       if (!Bytes.equals(column, cell.getQualifier())) {
259         assertTrue("expected [" + Bytes.toString(column) + "], actual ["
260             + Bytes.toString(cell.getQualifier()) + "]", false);
261       }
262     }
263     assertFalse(reader.nextKeyValue());
264     reader.close();
265   }
266 
267 }