public class RewritePositionDeleteFilesSparkAction extends java.lang.Object implements RewritePositionDeleteFiles
RewritePositionDeleteFiles
.RewritePositionDeleteFiles.FileGroupInfo, RewritePositionDeleteFiles.FileGroupRewriteResult, RewritePositionDeleteFiles.Result
Modifier and Type | Field and Description |
---|---|
protected static org.apache.iceberg.relocated.com.google.common.base.Joiner |
COMMA_JOINER |
protected static org.apache.iceberg.relocated.com.google.common.base.Splitter |
COMMA_SPLITTER |
protected static java.lang.String |
FILE_PATH |
protected static java.lang.String |
LAST_MODIFIED |
protected static java.lang.String |
MANIFEST |
protected static java.lang.String |
MANIFEST_LIST |
protected static java.lang.String |
OTHERS |
protected static java.lang.String |
STATISTICS_FILES |
MAX_CONCURRENT_FILE_GROUP_REWRITES, MAX_CONCURRENT_FILE_GROUP_REWRITES_DEFAULT, PARTIAL_PROGRESS_ENABLED, PARTIAL_PROGRESS_ENABLED_DEFAULT, PARTIAL_PROGRESS_MAX_COMMITS, PARTIAL_PROGRESS_MAX_COMMITS_DEFAULT, REWRITE_JOB_ORDER, REWRITE_JOB_ORDER_DEFAULT
Modifier and Type | Method and Description |
---|---|
protected org.apache.spark.sql.Dataset<FileInfo> |
allReachableOtherMetadataFileDS(Table table) |
protected void |
commit(SnapshotUpdate<?> update) |
protected java.util.Map<java.lang.String,java.lang.String> |
commitSummary() |
protected org.apache.spark.sql.Dataset<FileInfo> |
contentFileDS(Table table) |
protected org.apache.spark.sql.Dataset<FileInfo> |
contentFileDS(Table table,
java.util.Set<java.lang.Long> snapshotIds) |
protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary |
deleteFiles(java.util.concurrent.ExecutorService executorService,
java.util.function.Consumer<java.lang.String> deleteFunc,
java.util.Iterator<FileInfo> files)
Deletes files and keeps track of how many files were removed for each file type.
|
protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary |
deleteFiles(SupportsBulkOperations io,
java.util.Iterator<FileInfo> files) |
RewritePositionDeleteFiles.Result |
execute()
Executes this action.
|
RewritePositionDeleteFilesSparkAction |
filter(Expression expression)
A filter for finding deletes to rewrite.
|
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
loadMetadataTable(Table table,
MetadataTableType type) |
protected org.apache.spark.sql.Dataset<FileInfo> |
manifestDS(Table table) |
protected org.apache.spark.sql.Dataset<FileInfo> |
manifestDS(Table table,
java.util.Set<java.lang.Long> snapshotIds) |
protected org.apache.spark.sql.Dataset<FileInfo> |
manifestListDS(Table table) |
protected org.apache.spark.sql.Dataset<FileInfo> |
manifestListDS(Table table,
java.util.Set<java.lang.Long> snapshotIds) |
protected JobGroupInfo |
newJobGroupInfo(java.lang.String groupId,
java.lang.String desc) |
protected Table |
newStaticTable(TableMetadata metadata,
FileIO io) |
ThisT |
option(java.lang.String name,
java.lang.String value) |
protected java.util.Map<java.lang.String,java.lang.String> |
options() |
ThisT |
options(java.util.Map<java.lang.String,java.lang.String> newOptions) |
protected org.apache.spark.sql.Dataset<FileInfo> |
otherMetadataFileDS(Table table) |
protected RewritePositionDeleteFilesSparkAction |
self() |
ThisT |
snapshotProperty(java.lang.String property,
java.lang.String value) |
protected org.apache.spark.sql.SparkSession |
spark() |
protected org.apache.spark.api.java.JavaSparkContext |
sparkContext() |
protected org.apache.spark.sql.Dataset<FileInfo> |
statisticsFileDS(Table table,
java.util.Set<java.lang.Long> snapshotIds) |
protected <T> T |
withJobGroupInfo(JobGroupInfo info,
java.util.function.Supplier<T> supplier) |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
snapshotProperty
protected static final java.lang.String MANIFEST
protected static final java.lang.String MANIFEST_LIST
protected static final java.lang.String STATISTICS_FILES
protected static final java.lang.String OTHERS
protected static final java.lang.String FILE_PATH
protected static final java.lang.String LAST_MODIFIED
protected static final org.apache.iceberg.relocated.com.google.common.base.Splitter COMMA_SPLITTER
protected static final org.apache.iceberg.relocated.com.google.common.base.Joiner COMMA_JOINER
protected RewritePositionDeleteFilesSparkAction self()
public RewritePositionDeleteFilesSparkAction filter(Expression expression)
RewritePositionDeleteFiles
The filter will be converted to a partition filter with an inclusive projection. Any file that may contain rows matching this filter will be used by the action. The matching delete files will be rewritten.
filter
in interface RewritePositionDeleteFiles
expression
- An iceberg expression used to find deletes.public RewritePositionDeleteFiles.Result execute()
Action
execute
in interface Action<RewritePositionDeleteFiles,RewritePositionDeleteFiles.Result>
public ThisT snapshotProperty(java.lang.String property, java.lang.String value)
protected void commit(SnapshotUpdate<?> update)
protected java.util.Map<java.lang.String,java.lang.String> commitSummary()
protected org.apache.spark.sql.SparkSession spark()
protected org.apache.spark.api.java.JavaSparkContext sparkContext()
public ThisT option(java.lang.String name, java.lang.String value)
public ThisT options(java.util.Map<java.lang.String,java.lang.String> newOptions)
protected java.util.Map<java.lang.String,java.lang.String> options()
protected <T> T withJobGroupInfo(JobGroupInfo info, java.util.function.Supplier<T> supplier)
protected JobGroupInfo newJobGroupInfo(java.lang.String groupId, java.lang.String desc)
protected Table newStaticTable(TableMetadata metadata, FileIO io)
protected org.apache.spark.sql.Dataset<FileInfo> contentFileDS(Table table, java.util.Set<java.lang.Long> snapshotIds)
protected org.apache.spark.sql.Dataset<FileInfo> manifestDS(Table table, java.util.Set<java.lang.Long> snapshotIds)
protected org.apache.spark.sql.Dataset<FileInfo> manifestListDS(Table table, java.util.Set<java.lang.Long> snapshotIds)
protected org.apache.spark.sql.Dataset<FileInfo> statisticsFileDS(Table table, java.util.Set<java.lang.Long> snapshotIds)
protected org.apache.spark.sql.Dataset<FileInfo> otherMetadataFileDS(Table table)
protected org.apache.spark.sql.Dataset<FileInfo> allReachableOtherMetadataFileDS(Table table)
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> loadMetadataTable(Table table, MetadataTableType type)
protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary deleteFiles(java.util.concurrent.ExecutorService executorService, java.util.function.Consumer<java.lang.String> deleteFunc, java.util.Iterator<FileInfo> files)
executorService
- an executor service to use for parallel deletesdeleteFunc
- a delete funcfiles
- an iterator of Spark rows of the structure (path: String, type: String)protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary deleteFiles(SupportsBulkOperations io, java.util.Iterator<FileInfo> files)