1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18 package org.apache.hadoop.hbase.io.hfile;
19
20 import java.io.ByteArrayOutputStream;
21 import java.io.IOException;
22 import java.nio.ByteBuffer;
23
24 import org.apache.commons.logging.Log;
25 import org.apache.commons.logging.LogFactory;
26 import org.apache.hadoop.fs.ChecksumException;
27 import org.apache.hadoop.hbase.classification.InterfaceAudience;
28 import org.apache.hadoop.hbase.util.ChecksumType;
29 import org.apache.hadoop.util.DataChecksum;
30
31 /**
32 * Utility methods to compute and validate checksums.
33 */
34 @InterfaceAudience.Private
35 public class ChecksumUtil {
36 public static final Log LOG = LogFactory.getLog(ChecksumUtil.class);
37
38 /** This is used to reserve space in a byte buffer */
39 private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE];
40
41 /**
42 * This is used by unit tests to make checksum failures throw an
43 * exception instead of returning null. Returning a null value from
44 * checksum validation will cause the higher layer to retry that
45 * read with hdfs-level checksums. Instead, we would like checksum
46 * failures to cause the entire unit test to fail.
47 */
48 private static boolean generateExceptions = false;
49
50 /**
51 * Generates a checksum for all the data in indata. The checksum is
52 * written to outdata.
53 * @param indata input data stream
54 * @param startOffset starting offset in the indata stream from where to
55 * compute checkums from
56 * @param endOffset ending offset in the indata stream upto
57 * which checksums needs to be computed
58 * @param outdata the output buffer where checksum values are written
59 * @param outOffset the starting offset in the outdata where the
60 * checksum values are written
61 * @param checksumType type of checksum
62 * @param bytesPerChecksum number of bytes per checksum value
63 */
64 static void generateChecksums(byte[] indata, int startOffset, int endOffset,
65 byte[] outdata, int outOffset, ChecksumType checksumType,
66 int bytesPerChecksum) throws IOException {
67
68 if (checksumType == ChecksumType.NULL) {
69 return; // No checksum for this block.
70 }
71
72 DataChecksum checksum = DataChecksum.newDataChecksum(
73 checksumType.getDataChecksumType(), bytesPerChecksum);
74
75 checksum.calculateChunkedSums(
76 ByteBuffer.wrap(indata, startOffset, endOffset - startOffset),
77 ByteBuffer.wrap(outdata, outOffset, outdata.length - outOffset));
78 }
79
80 /**
81 * Validates that the data in the specified HFileBlock matches the checksum. Generates the
82 * checksums for the data and then validate that it matches those stored in the end of the data.
83 * @param buffer Contains the data in following order: HFileBlock header, data, checksums.
84 * @param pathName Path of the HFile to which the {@code data} belongs. Only used for logging.
85 * @param offset offset of the data being validated. Only used for logging.
86 * @param hdrSize Size of the block header in {@code data}. Only used for logging.
87 * @return True if checksum matches, else false.
88 */
89 static boolean validateChecksum(ByteBuffer buffer, String pathName, long offset, int hdrSize)
90 throws IOException {
91 // A ChecksumType.NULL indicates that the caller is not interested in validating checksums,
92 // so we always return true.
93 ChecksumType cktype =
94 ChecksumType.codeToType(buffer.get(HFileBlock.Header.CHECKSUM_TYPE_INDEX));
95 if (cktype == ChecksumType.NULL) {
96 return true; // No checksum validations needed for this block.
97 }
98
99 // read in the stored value of the checksum size from the header.
100 int bytesPerChecksum = buffer.getInt(HFileBlock.Header.BYTES_PER_CHECKSUM_INDEX);
101
102 DataChecksum dataChecksum = DataChecksum.newDataChecksum(
103 cktype.getDataChecksumType(), bytesPerChecksum);
104 assert dataChecksum != null;
105 int onDiskDataSizeWithHeader =
106 buffer.getInt(HFileBlock.Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX);
107 if (LOG.isTraceEnabled()) {
108 LOG.info("dataLength=" + buffer.capacity()
109 + ", sizeWithHeader=" + onDiskDataSizeWithHeader
110 + ", checksumType=" + cktype.getName()
111 + ", file=" + pathName
112 + ", offset=" + offset
113 + ", headerSize=" + hdrSize
114 + ", bytesPerChecksum=" + bytesPerChecksum);
115 }
116 try {
117 ByteBuffer data = (ByteBuffer) buffer.duplicate().position(0).limit(onDiskDataSizeWithHeader);
118 ByteBuffer checksums = (ByteBuffer) buffer.duplicate().position(onDiskDataSizeWithHeader)
119 .limit(buffer.capacity());
120 dataChecksum.verifyChunkedSums(data, checksums, pathName, 0);
121 } catch (ChecksumException e) {
122 return false;
123 }
124 return true; // checksum is valid
125 }
126
127 /**
128 * Returns the number of bytes needed to store the checksums for
129 * a specified data size
130 * @param datasize number of bytes of data
131 * @param bytesPerChecksum number of bytes in a checksum chunk
132 * @return The number of bytes needed to store the checksum values
133 */
134 static long numBytes(long datasize, int bytesPerChecksum) {
135 return numChunks(datasize, bytesPerChecksum) * HFileBlock.CHECKSUM_SIZE;
136 }
137
138 /**
139 * Returns the number of checksum chunks needed to store the checksums for
140 * a specified data size
141 * @param datasize number of bytes of data
142 * @param bytesPerChecksum number of bytes in a checksum chunk
143 * @return The number of checksum chunks
144 */
145 static long numChunks(long datasize, int bytesPerChecksum) {
146 long numChunks = datasize/bytesPerChecksum;
147 if (datasize % bytesPerChecksum != 0) {
148 numChunks++;
149 }
150 return numChunks;
151 }
152
153 /**
154 * Write dummy checksums to the end of the specified bytes array
155 * to reserve space for writing checksums later
156 * @param baos OutputStream to write dummy checkum values
157 * @param numBytes Number of bytes of data for which dummy checksums
158 * need to be generated
159 * @param bytesPerChecksum Number of bytes per checksum value
160 */
161 static void reserveSpaceForChecksums(ByteArrayOutputStream baos,
162 int numBytes, int bytesPerChecksum) throws IOException {
163 long numChunks = numChunks(numBytes, bytesPerChecksum);
164 long bytesLeft = numChunks * HFileBlock.CHECKSUM_SIZE;
165 while (bytesLeft > 0) {
166 long count = Math.min(bytesLeft, DUMMY_VALUE.length);
167 baos.write(DUMMY_VALUE, 0, (int)count);
168 bytesLeft -= count;
169 }
170 }
171
172 /**
173 * Mechanism to throw an exception in case of hbase checksum
174 * failure. This is used by unit tests only.
175 * @param value Setting this to true will cause hbase checksum
176 * verification failures to generate exceptions.
177 */
178 public static void generateExceptionForChecksumFailureForTest(boolean value) {
179 generateExceptions = value;
180 }
181 }
182