public class RewriteDataFilesSparkAction extends java.lang.Object implements RewriteDataFiles
RewriteDataFiles.FileGroupFailureResult, RewriteDataFiles.FileGroupInfo, RewriteDataFiles.FileGroupRewriteResult, RewriteDataFiles.Result
Modifier and Type | Field and Description |
---|---|
protected static org.apache.iceberg.relocated.com.google.common.base.Joiner |
COMMA_JOINER |
protected static org.apache.iceberg.relocated.com.google.common.base.Splitter |
COMMA_SPLITTER |
protected static java.lang.String |
FILE_PATH |
protected static java.lang.String |
LAST_MODIFIED |
protected static java.lang.String |
MANIFEST |
protected static java.lang.String |
MANIFEST_LIST |
protected static java.lang.String |
OTHERS |
protected static java.lang.String |
STATISTICS_FILES |
MAX_CONCURRENT_FILE_GROUP_REWRITES, MAX_CONCURRENT_FILE_GROUP_REWRITES_DEFAULT, MAX_FILE_GROUP_SIZE_BYTES, MAX_FILE_GROUP_SIZE_BYTES_DEFAULT, PARTIAL_PROGRESS_ENABLED, PARTIAL_PROGRESS_ENABLED_DEFAULT, PARTIAL_PROGRESS_MAX_COMMITS, PARTIAL_PROGRESS_MAX_COMMITS_DEFAULT, REWRITE_JOB_ORDER, REWRITE_JOB_ORDER_DEFAULT, TARGET_FILE_SIZE_BYTES, USE_STARTING_SEQUENCE_NUMBER, USE_STARTING_SEQUENCE_NUMBER_DEFAULT
Modifier and Type | Method and Description |
---|---|
protected org.apache.spark.sql.Dataset<FileInfo> |
allReachableOtherMetadataFileDS(Table table) |
RewriteDataFilesSparkAction |
binPack()
Choose BINPACK as a strategy for this rewrite operation
|
protected void |
commit(SnapshotUpdate<?> update) |
protected java.util.Map<java.lang.String,java.lang.String> |
commitSummary() |
protected org.apache.spark.sql.Dataset<FileInfo> |
contentFileDS(Table table) |
protected org.apache.spark.sql.Dataset<FileInfo> |
contentFileDS(Table table,
java.util.Set<java.lang.Long> snapshotIds) |
protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary |
deleteFiles(java.util.concurrent.ExecutorService executorService,
java.util.function.Consumer<java.lang.String> deleteFunc,
java.util.Iterator<FileInfo> files)
Deletes files and keeps track of how many files were removed for each file type.
|
protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary |
deleteFiles(SupportsBulkOperations io,
java.util.Iterator<FileInfo> files) |
RewriteDataFiles.Result |
execute()
Executes this action.
|
RewriteDataFilesSparkAction |
filter(Expression expression)
A user provided filter for determining which files will be considered by the rewrite strategy.
|
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
loadMetadataTable(Table table,
MetadataTableType type) |
protected org.apache.spark.sql.Dataset<FileInfo> |
manifestDS(Table table) |
protected org.apache.spark.sql.Dataset<FileInfo> |
manifestDS(Table table,
java.util.Set<java.lang.Long> snapshotIds) |
protected org.apache.spark.sql.Dataset<FileInfo> |
manifestListDS(Table table) |
protected org.apache.spark.sql.Dataset<FileInfo> |
manifestListDS(Table table,
java.util.Set<java.lang.Long> snapshotIds) |
protected JobGroupInfo |
newJobGroupInfo(java.lang.String groupId,
java.lang.String desc) |
protected Table |
newStaticTable(TableMetadata metadata,
FileIO io) |
ThisT |
option(java.lang.String name,
java.lang.String value) |
protected java.util.Map<java.lang.String,java.lang.String> |
options() |
ThisT |
options(java.util.Map<java.lang.String,java.lang.String> newOptions) |
protected org.apache.spark.sql.Dataset<FileInfo> |
otherMetadataFileDS(Table table) |
protected RewriteDataFilesSparkAction |
self() |
ThisT |
snapshotProperty(java.lang.String property,
java.lang.String value) |
RewriteDataFilesSparkAction |
sort()
Choose SORT as a strategy for this rewrite operation using the table's sortOrder
|
RewriteDataFilesSparkAction |
sort(SortOrder sortOrder)
Choose SORT as a strategy for this rewrite operation and manually specify the sortOrder to use
|
protected org.apache.spark.sql.SparkSession |
spark() |
protected org.apache.spark.api.java.JavaSparkContext |
sparkContext() |
protected org.apache.spark.sql.Dataset<FileInfo> |
statisticsFileDS(Table table,
java.util.Set<java.lang.Long> snapshotIds) |
protected <T> T |
withJobGroupInfo(JobGroupInfo info,
java.util.function.Supplier<T> supplier) |
RewriteDataFilesSparkAction |
zOrder(java.lang.String... columnNames)
Choose Z-ORDER as a strategy for this rewrite operation with a specified list of columns to use
|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
snapshotProperty
protected static final java.lang.String MANIFEST
protected static final java.lang.String MANIFEST_LIST
protected static final java.lang.String STATISTICS_FILES
protected static final java.lang.String OTHERS
protected static final java.lang.String FILE_PATH
protected static final java.lang.String LAST_MODIFIED
protected static final org.apache.iceberg.relocated.com.google.common.base.Splitter COMMA_SPLITTER
protected static final org.apache.iceberg.relocated.com.google.common.base.Joiner COMMA_JOINER
protected RewriteDataFilesSparkAction self()
public RewriteDataFilesSparkAction binPack()
RewriteDataFiles
binPack
in interface RewriteDataFiles
public RewriteDataFilesSparkAction sort(SortOrder sortOrder)
RewriteDataFiles
sort
in interface RewriteDataFiles
sortOrder
- user defined sortOrderpublic RewriteDataFilesSparkAction sort()
RewriteDataFiles
sort
in interface RewriteDataFiles
public RewriteDataFilesSparkAction zOrder(java.lang.String... columnNames)
RewriteDataFiles
zOrder
in interface RewriteDataFiles
columnNames
- Columns to be used to generate Z-Valuespublic RewriteDataFilesSparkAction filter(Expression expression)
RewriteDataFiles
filter
in interface RewriteDataFiles
expression
- An iceberg expression used to determine which files will be considered for
rewritingpublic RewriteDataFiles.Result execute()
Action
execute
in interface Action<RewriteDataFiles,RewriteDataFiles.Result>
public ThisT snapshotProperty(java.lang.String property, java.lang.String value)
protected void commit(SnapshotUpdate<?> update)
protected java.util.Map<java.lang.String,java.lang.String> commitSummary()
protected org.apache.spark.sql.SparkSession spark()
protected org.apache.spark.api.java.JavaSparkContext sparkContext()
public ThisT option(java.lang.String name, java.lang.String value)
public ThisT options(java.util.Map<java.lang.String,java.lang.String> newOptions)
protected java.util.Map<java.lang.String,java.lang.String> options()
protected <T> T withJobGroupInfo(JobGroupInfo info, java.util.function.Supplier<T> supplier)
protected JobGroupInfo newJobGroupInfo(java.lang.String groupId, java.lang.String desc)
protected Table newStaticTable(TableMetadata metadata, FileIO io)
protected org.apache.spark.sql.Dataset<FileInfo> contentFileDS(Table table, java.util.Set<java.lang.Long> snapshotIds)
protected org.apache.spark.sql.Dataset<FileInfo> manifestDS(Table table, java.util.Set<java.lang.Long> snapshotIds)
protected org.apache.spark.sql.Dataset<FileInfo> manifestListDS(Table table, java.util.Set<java.lang.Long> snapshotIds)
protected org.apache.spark.sql.Dataset<FileInfo> statisticsFileDS(Table table, java.util.Set<java.lang.Long> snapshotIds)
protected org.apache.spark.sql.Dataset<FileInfo> otherMetadataFileDS(Table table)
protected org.apache.spark.sql.Dataset<FileInfo> allReachableOtherMetadataFileDS(Table table)
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> loadMetadataTable(Table table, MetadataTableType type)
protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary deleteFiles(java.util.concurrent.ExecutorService executorService, java.util.function.Consumer<java.lang.String> deleteFunc, java.util.Iterator<FileInfo> files)
executorService
- an executor service to use for parallel deletesdeleteFunc
- a delete funcfiles
- an iterator of Spark rows of the structure (path: String, type: String)protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary deleteFiles(SupportsBulkOperations io, java.util.Iterator<FileInfo> files)