View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.regionserver.querymatcher;
20  
21  import java.io.IOException;
22  
23  import org.apache.hadoop.hbase.CellUtil;
24  import org.apache.hadoop.hbase.HConstants;
25  import org.apache.hadoop.hbase.classification.InterfaceAudience;
26  import org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher.MatchCode;
27  import org.apache.hadoop.hbase.util.Bytes;
28  
29  /**
30   * Keeps track of the columns for a scan if they are not explicitly specified
31   */
32  @InterfaceAudience.Private
33  public class ScanWildcardColumnTracker implements ColumnTracker {
34    private byte[] columnBuffer = null;
35    private int columnOffset = 0;
36    private int columnLength = 0;
37    private int currentCount = 0;
38    private int maxVersions;
39    private int minVersions;
40    /*
41     * Keeps track of the latest timestamp and type included for current column. Used to eliminate
42     * duplicates.
43     */
44    private long latestTSOfCurrentColumn;
45    private byte latestTypeOfCurrentColumn;
46  
47    private long oldestStamp;
48  
49    /**
50     * Return maxVersions of every row.
51     * @param minVersion Minimum number of versions to keep
52     * @param maxVersion Maximum number of versions to return
53     * @param oldestUnexpiredTS oldest timestamp that has not expired according to the TTL.
54     */
55    public ScanWildcardColumnTracker(int minVersion, int maxVersion, long oldestUnexpiredTS) {
56      this.maxVersions = maxVersion;
57      this.minVersions = minVersion;
58      this.oldestStamp = oldestUnexpiredTS;
59    }
60  
61    /**
62     * {@inheritDoc} This receives puts *and* deletes.
63     */
64    @Override
65    public MatchCode checkColumn(byte[] bytes, int offset, int length, byte type) throws IOException {
66      return MatchCode.INCLUDE;
67    }
68  
69    /**
70     * {@inheritDoc} This receives puts *and* deletes. Deletes do not count as a version, but rather
71     * take the version of the previous put (so eventually all but the last can be reclaimed).
72     */
73    @Override
74    public ScanQueryMatcher.MatchCode checkVersions(byte[] bytes, int offset, int length,
75        long timestamp, byte type, boolean ignoreCount) throws IOException {
76  
77      if (columnBuffer == null) {
78        // first iteration.
79        resetBuffer(bytes, offset, length);
80        if (ignoreCount) {
81          return ScanQueryMatcher.MatchCode.INCLUDE;
82        }
83        // do not count a delete marker as another version
84        return checkVersion(type, timestamp);
85      }
86      int cmp = Bytes.compareTo(bytes, offset, length, columnBuffer, columnOffset, columnLength);
87      if (cmp == 0) {
88        if (ignoreCount) {
89          return ScanQueryMatcher.MatchCode.INCLUDE;
90        }
91  
92        // If column matches, check if it is a duplicate timestamp
93        if (sameAsPreviousTSAndType(timestamp, type)) {
94          return ScanQueryMatcher.MatchCode.SKIP;
95        }
96        return checkVersion(type, timestamp);
97      }
98  
99      resetTSAndType();
100 
101     // new col > old col
102     if (cmp > 0) {
103       // switched columns, lets do something.x
104       resetBuffer(bytes, offset, length);
105       if (ignoreCount) {
106         return ScanQueryMatcher.MatchCode.INCLUDE;
107       }
108       return checkVersion(type, timestamp);
109     }
110 
111     // new col < oldcol
112     // WARNING: This means that very likely an edit for some other family
113     // was incorrectly stored into the store for this one. Throw an exception,
114     // because this might lead to data corruption.
115     throw new IOException("ScanWildcardColumnTracker.checkColumn ran into a column actually "
116         + "smaller than the previous column: " + Bytes.toStringBinary(bytes, offset, length));
117   }
118 
119   private void resetBuffer(byte[] bytes, int offset, int length) {
120     columnBuffer = bytes;
121     columnOffset = offset;
122     columnLength = length;
123     currentCount = 0;
124   }
125 
126   /**
127    * Check whether this version should be retained. There are 4 variables considered: If this
128    * version is past max versions -> skip it If this kv has expired or was deleted, check min
129    * versions to decide whther to skip it or not. Increase the version counter unless this is a
130    * delete
131    */
132   private MatchCode checkVersion(byte type, long timestamp) {
133     if (!CellUtil.isDelete(type)) {
134       currentCount++;
135     }
136     if (currentCount > maxVersions) {
137       return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL; // skip to next col
138     }
139     // keep the KV if required by minversions or it is not expired, yet
140     if (currentCount <= minVersions || !isExpired(timestamp)) {
141       setTSAndType(timestamp, type);
142       return ScanQueryMatcher.MatchCode.INCLUDE;
143     } else {
144       return MatchCode.SEEK_NEXT_COL;
145     }
146 
147   }
148 
149   @Override
150   public void reset() {
151     columnBuffer = null;
152     resetTSAndType();
153   }
154 
155   private void resetTSAndType() {
156     latestTSOfCurrentColumn = HConstants.LATEST_TIMESTAMP;
157     latestTypeOfCurrentColumn = 0;
158   }
159 
160   private void setTSAndType(long timestamp, byte type) {
161     latestTSOfCurrentColumn = timestamp;
162     latestTypeOfCurrentColumn = type;
163   }
164 
165   private boolean sameAsPreviousTSAndType(long timestamp, byte type) {
166     return timestamp == latestTSOfCurrentColumn && type == latestTypeOfCurrentColumn;
167   }
168 
169   private boolean isExpired(long timestamp) {
170     return timestamp < oldestStamp;
171   }
172 
173   /**
174    * Used by matcher and scan/get to get a hint of the next column to seek to after checkColumn()
175    * returns SKIP. Returns the next interesting column we want, or NULL there is none (wildcard
176    * scanner).
177    * @return The column count.
178    */
179   public ColumnCount getColumnHint() {
180     return null;
181   }
182 
183   /**
184    * We can never know a-priori if we are done, so always return false.
185    * @return false
186    */
187   @Override
188   public boolean done() {
189     return false;
190   }
191 
192   public MatchCode getNextRowOrNextColumn(byte[] bytes, int offset, int qualLength) {
193     return MatchCode.SEEK_NEXT_COL;
194   }
195 
196   public boolean isDone(long timestamp) {
197     return minVersions <= 0 && isExpired(timestamp);
198   }
199 }