-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
428 additions
and
77 deletions.
There are no files selected for viewing
98 changes: 98 additions & 0 deletions
98
parquet-hadoop/src/main/java/org/apache/parquet/hadoop/IndexCache.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
package org.apache.parquet.hadoop; | ||
|
||
import org.apache.parquet.column.values.bloomfilter.BloomFilter; | ||
import org.apache.parquet.hadoop.metadata.BlockMetaData; | ||
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; | ||
import org.apache.parquet.hadoop.metadata.ColumnPath; | ||
import org.apache.parquet.internal.column.columnindex.ColumnIndex; | ||
import org.apache.parquet.internal.column.columnindex.OffsetIndex; | ||
|
||
import java.io.IOException; | ||
import java.util.Set; | ||
|
||
/** | ||
* A cache for caching indexes(including: ColumnIndex, OffsetIndex and BloomFilter) | ||
*/ | ||
public interface IndexCache { | ||
|
||
enum CacheStrategy { | ||
NONE, /* No cache */ | ||
PRECACHE_BLOCK /* Precache for block indexes */ | ||
} | ||
|
||
/** | ||
* Create an index cache for the given file reader | ||
* | ||
* @param fileReader the file reader | ||
* @param columns the columns that need to do cache | ||
* @param cacheStrategy the cache strategy, supports NONE and PRECACHE_BLOCK | ||
* @return the index cache | ||
*/ | ||
static IndexCache create( | ||
ParquetFileReader fileReader, | ||
Set<ColumnPath> columns, | ||
CacheStrategy cacheStrategy) { | ||
if (cacheStrategy == CacheStrategy.NONE) { | ||
return new NoneIndexCache(fileReader); | ||
} else if (cacheStrategy == CacheStrategy.PRECACHE_BLOCK) { | ||
return new PrefetchIndexCache(fileReader, columns); | ||
} else { | ||
throw new UnsupportedOperationException("Unknown cache strategy: " + cacheStrategy); | ||
} | ||
} | ||
|
||
/** | ||
* Set the current BlockMetadata | ||
*/ | ||
void setBlockMetadata(BlockMetaData currentBlockMetadata) throws IOException; | ||
|
||
/** | ||
* Get the ColumnIndex for the given column in the set row group. | ||
* | ||
* @param chunk the given column chunk | ||
* @return the ColumnIndex for the given column | ||
* @throws IOException if any I/O error occurs during get the ColumnIndex | ||
*/ | ||
ColumnIndex getColumnIndex(ColumnChunkMetaData chunk) throws IOException; | ||
|
||
/** | ||
* Get the OffsetIndex for the given column in the set row group. | ||
* | ||
* @param chunk the given column chunk | ||
* @return the OffsetIndex for the given column | ||
* @throws IOException if any I/O error occurs during get the OffsetIndex | ||
*/ | ||
OffsetIndex getOffsetIndex(ColumnChunkMetaData chunk) throws IOException; | ||
|
||
/** | ||
* Get the BloomFilter for the given column in the set row group. | ||
* | ||
* @param chunk the given column chunk | ||
* @return the BloomFilter for the given column | ||
* @throws IOException if any I/O error occurs during get the BloomFilter | ||
*/ | ||
BloomFilter getBloomFilter(ColumnChunkMetaData chunk) throws IOException; | ||
|
||
/** | ||
* Clean the cache | ||
*/ | ||
void clean(); | ||
} |
63 changes: 63 additions & 0 deletions
63
parquet-hadoop/src/main/java/org/apache/parquet/hadoop/NoneIndexCache.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
package org.apache.parquet.hadoop; | ||
|
||
import org.apache.parquet.column.values.bloomfilter.BloomFilter; | ||
import org.apache.parquet.hadoop.metadata.BlockMetaData; | ||
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; | ||
import org.apache.parquet.internal.column.columnindex.ColumnIndex; | ||
import org.apache.parquet.internal.column.columnindex.OffsetIndex; | ||
|
||
import java.io.IOException; | ||
|
||
/** | ||
* Cache nothing. All the get methods are pushed to ParquetFileReader to read the given index. | ||
*/ | ||
class NoneIndexCache implements IndexCache { | ||
private final ParquetFileReader fileReader; | ||
|
||
NoneIndexCache(ParquetFileReader fileReader) { | ||
this.fileReader = fileReader; | ||
} | ||
|
||
@Override | ||
public void setBlockMetadata(BlockMetaData currentBlockMetadata) throws IOException { | ||
// Do nothing | ||
} | ||
|
||
@Override | ||
public ColumnIndex getColumnIndex(ColumnChunkMetaData chunk) throws IOException { | ||
return fileReader.readColumnIndex(chunk); | ||
} | ||
|
||
@Override | ||
public OffsetIndex getOffsetIndex(ColumnChunkMetaData chunk) throws IOException { | ||
return fileReader.readOffsetIndex(chunk); | ||
} | ||
|
||
@Override | ||
public BloomFilter getBloomFilter(ColumnChunkMetaData chunk) throws IOException { | ||
return fileReader.readBloomFilter(chunk); | ||
} | ||
|
||
@Override | ||
public void clean() { | ||
// Do nothing | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.