-
Notifications
You must be signed in to change notification settings - Fork 3.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
HBASE-28465 Implementation of framework for time-based priority bucket-cache #5793
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.hadoop.hbase.regionserver; | ||
|
||
import org.apache.yetus.audience.InterfaceAudience; | ||
|
||
@InterfaceAudience.Private | ||
public class DataTieringException extends Exception { | ||
DataTieringException(String reason) { | ||
super(reason); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.hadoop.hbase.regionserver; | ||
|
||
import java.util.HashSet; | ||
import java.util.Map; | ||
import java.util.OptionalLong; | ||
import java.util.Set; | ||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.hadoop.fs.Path; | ||
import org.apache.hadoop.hbase.io.hfile.BlockCacheKey; | ||
import org.apache.hadoop.hbase.util.Bytes; | ||
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; | ||
import org.apache.yetus.audience.InterfaceAudience; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
@InterfaceAudience.Private | ||
public class DataTieringManager { | ||
private static final Logger LOG = LoggerFactory.getLogger(DataTieringManager.class); | ||
public static final String DATATIERING_KEY = "hbase.hstore.datatiering.type"; | ||
public static final String DATATIERING_HOT_DATA_AGE_KEY = | ||
"hbase.hstore.datatiering.hot.age.millis"; | ||
public static final DataTieringType DEFAULT_DATATIERING = DataTieringType.NONE; | ||
public static final long DEFAULT_DATATIERING_HOT_DATA_AGE = 7 * 24 * 60 * 60 * 1000; // 7 Days | ||
private static DataTieringManager instance; | ||
private final Map<String, HRegion> onlineRegions; | ||
|
||
private DataTieringManager(Map<String, HRegion> onlineRegions) { | ||
this.onlineRegions = onlineRegions; | ||
} | ||
|
||
public static synchronized void instantiate(Map<String, HRegion> onlineRegions) { | ||
if (instance == null) { | ||
instance = new DataTieringManager(onlineRegions); | ||
LOG.info("DataTieringManager instantiated successfully."); | ||
} else { | ||
LOG.warn("DataTieringManager is already instantiated."); | ||
} | ||
} | ||
|
||
public static synchronized DataTieringManager getInstance() { | ||
if (instance == null) { | ||
throw new IllegalStateException( | ||
"DataTieringManager has not been instantiated. Call instantiate() first."); | ||
} | ||
return instance; | ||
} | ||
|
||
public boolean isDataTieringEnabled(BlockCacheKey key) throws DataTieringException { | ||
Path hFilePath = key.getFilePath(); | ||
if (hFilePath == null) { | ||
throw new DataTieringException("BlockCacheKey Doesn't Contain HFile Path"); | ||
} | ||
return isDataTieringEnabled(hFilePath); | ||
} | ||
|
||
public boolean isDataTieringEnabled(Path hFilePath) throws DataTieringException { | ||
Configuration configuration = getConfiguration(hFilePath); | ||
DataTieringType dataTieringType = getDataTieringType(configuration); | ||
return !dataTieringType.equals(DataTieringType.NONE); | ||
} | ||
|
||
public boolean isHotData(BlockCacheKey key) throws DataTieringException { | ||
Path hFilePath = key.getFilePath(); | ||
if (hFilePath == null) { | ||
throw new DataTieringException("BlockCacheKey Doesn't Contain HFile Path"); | ||
} | ||
return isHotData(hFilePath); | ||
} | ||
|
||
public boolean isHotData(Path hFilePath) throws DataTieringException { | ||
Configuration configuration = getConfiguration(hFilePath); | ||
DataTieringType dataTieringType = getDataTieringType(configuration); | ||
|
||
if (dataTieringType.equals(DataTieringType.TIME_RANGE)) { | ||
long hotDataAge = getDataTieringHotDataAge(configuration); | ||
|
||
HStoreFile hStoreFile = getHStoreFile(hFilePath); | ||
if (hStoreFile == null) { | ||
throw new DataTieringException( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rather than throwing exception, maybe just return false? I wonder if it would be possible that a compaction completed and moved the given file away just after we entered this method. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wouldn't it be better to throw the exception and let the caller decide what action to take based on the context? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm thinking in terms of responsibilities and cohesion. Who should know how to classify a block as hot or cold for a file that is not in the list of regions stores? Shouldn't this be done here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Got it. should we change in other places as well? |
||
"HStoreFile corresponding to " + hFilePath + " doesn't exist"); | ||
} | ||
OptionalLong maxTimestamp = hStoreFile.getMaximumTimestamp(); | ||
if (!maxTimestamp.isPresent()) { | ||
throw new DataTieringException("Maximum timestamp not present for " + hFilePath); | ||
wchevreuil marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
long currentTimestamp = EnvironmentEdgeManager.getDelegate().currentTime(); | ||
long diff = currentTimestamp - maxTimestamp.getAsLong(); | ||
return diff <= hotDataAge; | ||
} | ||
return false; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't we return "true" by default? I.e., if not using DataTieringType.TIME_RANGE, this should consider all data as hot and let the LFU logic decide on the eviction right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The logic reaches that point only when the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Ok, I think there's a misunderstanding of concepts. Today, without this DataTiering thing, the behaviour is to cache everything and evict based on LFU (in other words, everything is hot). If I don't turn on DataTieringType.TIME_RANGE, I want things to keep working as today (cache everything and evict based on LFU). Maybe we can change the default name from NONE to ALL. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Even with this implementation, it will function the same. For example, in eviction, we'll prioritize evicting the cold data files first, which are all the files that satisfy the condition (isDataTieringEnabled(file) && !isHotData(file)). Once I identify these files, I will proceed to evict them and then hand over control to the rest of the logic. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay, I get your point. then we can eliminate isDataTieringEnabled methods, right? since we are considering everything as hot by default. |
||
} | ||
|
||
public Set<String> getColdDataFiles(Set<BlockCacheKey> allCachedBlocks) | ||
throws DataTieringException { | ||
Set<String> coldHFiles = new HashSet<>(); | ||
for (BlockCacheKey key : allCachedBlocks) { | ||
if (coldHFiles.contains(key.getHfileName())) { | ||
continue; | ||
} | ||
if (isDataTieringEnabled(key) && !isHotData(key)) { | ||
coldHFiles.add(key.getHfileName()); | ||
} | ||
} | ||
return coldHFiles; | ||
} | ||
|
||
private HRegion getHRegion(Path hFilePath) throws DataTieringException { | ||
if (hFilePath.getParent() == null || hFilePath.getParent().getParent() == null) { | ||
throw new DataTieringException("Incorrect HFile Path: " + hFilePath); | ||
} | ||
String regionId = hFilePath.getParent().getParent().getName(); | ||
HRegion hRegion = this.onlineRegions.get(regionId); | ||
if (hRegion == null) { | ||
throw new DataTieringException("HRegion corresponding to " + hFilePath + " doesn't exist"); | ||
} | ||
return hRegion; | ||
} | ||
|
||
private HStore getHStore(Path hFilePath) throws DataTieringException { | ||
HRegion hRegion = getHRegion(hFilePath); | ||
String columnFamily = hFilePath.getParent().getName(); | ||
HStore hStore = hRegion.getStore(Bytes.toBytes(columnFamily)); | ||
if (hStore == null) { | ||
throw new DataTieringException("HStore corresponding to " + hFilePath + " doesn't exist"); | ||
} | ||
return hStore; | ||
} | ||
|
||
private HStoreFile getHStoreFile(Path hFilePath) throws DataTieringException { | ||
HStore hStore = getHStore(hFilePath); | ||
for (HStoreFile file : hStore.getStorefiles()) { | ||
if (file.getPath().equals(hFilePath)) { | ||
return file; | ||
} | ||
} | ||
return null; | ||
} | ||
|
||
private Configuration getConfiguration(Path hFilePath) throws DataTieringException { | ||
HStore hStore = getHStore(hFilePath); | ||
return hStore.getReadOnlyConfiguration(); | ||
} | ||
|
||
private DataTieringType getDataTieringType(Configuration conf) { | ||
return DataTieringType.valueOf(conf.get(DATATIERING_KEY, DEFAULT_DATATIERING.name())); | ||
} | ||
|
||
private long getDataTieringHotDataAge(Configuration conf) { | ||
return Long.parseLong( | ||
conf.get(DATATIERING_HOT_DATA_AGE_KEY, String.valueOf(DEFAULT_DATATIERING_HOT_DATA_AGE))); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.hadoop.hbase.regionserver; | ||
|
||
import org.apache.yetus.audience.InterfaceAudience; | ||
|
||
@InterfaceAudience.Public | ||
public enum DataTieringType { | ||
NONE, | ||
TIME_RANGE | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
getConfiguratin(hFilePath) already traverses through the regions to get to the HStore. We will do it again within getHStoreFile. We can slightly improve by avoiding duplicate traversal to get HStore.
One Option I can think is to getStore first.
HStore store = getStore(hFilePath); <= We traverse only once.
Then,
Configuration configuration = getConfiguration(store);
HStoreFile file = getHStoreFile(store);
With this, we only have regions traversal only once.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, it doesn't traverse through the regions. these are just map lookups.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
oh ok, then it looks ok.