java.lang.Object

org.apache.iceberg.spark.actions.RewriteTablePathSparkAction

All Implemented Interfaces:: Action<RewriteTablePath,RewriteTablePath.Result>, RewriteTablePath

public class RewriteTablePathSparkAction extends Object implements RewriteTablePath

Nested Class Summary

Nested Classes

Modifier and Type

Class

Description

static class

RewriteTablePathSparkAction.RewriteContentFileResult

Nested classes/interfaces inherited from interface org.apache.iceberg.actions.RewriteTablePath
RewriteTablePath.Result
Field Summary

Fields

Modifier and Type

Field

Description

protected static final org.apache.iceberg.relocated.com.google.common.base.Joiner

COMMA_JOINER

protected static final org.apache.iceberg.relocated.com.google.common.base.Splitter

COMMA_SPLITTER

protected static final String

FILE_PATH

protected static final String

LAST_MODIFIED

protected static final String

MANIFEST

protected static final String

MANIFEST_LIST

protected static final String

OTHERS

protected static final String

STATISTICS_FILES
Method Summary

Modifier and Type

Method

Description

protected org.apache.spark.sql.Dataset<FileInfo>

allReachableOtherMetadataFileDS(Table table)

protected org.apache.spark.sql.Dataset<FileInfo>

contentFileDS(Table table)

protected org.apache.spark.sql.Dataset<FileInfo>

contentFileDS(Table table, Set<Long> snapshotIds)

protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary

deleteFiles(ExecutorService executorService, Consumer<String> deleteFunc, Iterator<FileInfo> files)

Deletes files and keeps track of how many files were removed for each file type.

protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary

deleteFiles(SupportsBulkOperations io, Iterator<FileInfo> files)

RewriteTablePath

endVersion(String eVersion)

Last metadata version to rewrite, identified by name of a metadata.json file in the table's metadata log.

RewriteTablePath.Result

execute()

Executes this action.

protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>

loadMetadataTable(Table table, MetadataTableType type)

protected org.apache.spark.sql.Dataset<FileInfo>

manifestDS(Table table)

protected org.apache.spark.sql.Dataset<FileInfo>

manifestDS(Table table, Set<Long> snapshotIds)

protected org.apache.spark.sql.Dataset<FileInfo>

manifestListDS(Table table)

protected org.apache.spark.sql.Dataset<FileInfo>

manifestListDS(Table table, Set<Long> snapshotIds)

protected JobGroupInfo

newJobGroupInfo(String groupId, String desc)

protected Table

newStaticTable(String metadataFileLocation, FileIO io)

protected Table

newStaticTable(TableMetadata metadata, FileIO io)

RewriteTablePath

option(String name, String value)

protected Map<String,String>

options()

RewriteTablePath

options(Map<String,String> newOptions)

protected org.apache.spark.sql.Dataset<FileInfo>

otherMetadataFileDS(Table table)

RewriteTablePath

rewriteLocationPrefix(String sPrefix, String tPrefix)

Configure a source prefix that will be replaced by the specified target prefix in all paths

protected RewriteTablePath

self()

protected org.apache.spark.sql.SparkSession

spark()

protected org.apache.spark.api.java.JavaSparkContext

sparkContext()

RewriteTablePath

stagingLocation(String stagingLocation)

Custom staging location.

RewriteTablePath

startVersion(String sVersion)

First metadata version to rewrite, identified by name of a metadata.json file in the table's metadata log.

protected org.apache.spark.sql.Dataset<FileInfo>

statisticsFileDS(Table table, Set<Long> snapshotIds)

protected <T> T

withJobGroupInfo(JobGroupInfo info, Supplier<T> supplier)

Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait

Methods inherited from interface org.apache.iceberg.actions.Action
option, options

Field Details
- MANIFEST
  
  protected static final String MANIFEST
  See Also:
  
  Constant Field Values
- MANIFEST_LIST
  
  protected static final String MANIFEST_LIST
  See Also:
  
  Constant Field Values
- STATISTICS_FILES
  
  protected static final String STATISTICS_FILES
  See Also:
  
  Constant Field Values
- OTHERS
  
  protected static final String OTHERS
  See Also:
  
  Constant Field Values
- FILE_PATH
  
  protected static final String FILE_PATH
  See Also:
  
  Constant Field Values
- LAST_MODIFIED
  
  protected static final String LAST_MODIFIED
  See Also:
  
  Constant Field Values
- COMMA_SPLITTER
  
  protected static final org.apache.iceberg.relocated.com.google.common.base.Splitter COMMA_SPLITTER
- COMMA_JOINER
  
  protected static final org.apache.iceberg.relocated.com.google.common.base.Joiner COMMA_JOINER
Method Details
- self
  
  protected RewriteTablePath self()
- rewriteLocationPrefix
  
  public RewriteTablePath rewriteLocationPrefix(String sPrefix, String tPrefix)
  
  Description copied from interface: RewriteTablePath
  
  Configure a source prefix that will be replaced by the specified target prefix in all paths
  
  Specified by:
  
  rewriteLocationPrefix in interface RewriteTablePath
  
  Parameters:
  
  sPrefix - the source prefix to be replaced
  
  tPrefix - the target prefix
  
  Returns:
  
  this for method chaining
- startVersion
  
  public RewriteTablePath startVersion(String sVersion)
  
  Description copied from interface: RewriteTablePath
  
  First metadata version to rewrite, identified by name of a metadata.json file in the table's metadata log. It is optional, if provided then this action will only rewrite metadata files added after this version.
  
  Specified by:
  
  startVersion in interface RewriteTablePath
  
  Parameters:
  
  sVersion - name of a metadata.json file. For example, "00001-8893aa9e-f92e-4443-80e7-cfa42238a654.metadata.json".
  
  Returns:
  
  this for method chaining
- endVersion
  
  public RewriteTablePath endVersion(String eVersion)
  
  Description copied from interface: RewriteTablePath
  
  Last metadata version to rewrite, identified by name of a metadata.json file in the table's metadata log. It is optional, if provided then this action will only rewrite metadata files added before this file, including the file itself.
  
  Specified by:
  
  endVersion in interface RewriteTablePath
  
  Parameters:
  
  eVersion - name of a metadata.json file. For example, "00001-8893aa9e-f92e-4443-80e7-cfa42238a654.metadata.json".
  
  Returns:
  
  this for method chaining
- stagingLocation
  
  public RewriteTablePath stagingLocation(String stagingLocation)
  
  Description copied from interface: RewriteTablePath
  
  Custom staging location. It is optional. By default, staging location is a subdirectory under table's metadata directory.
  
  Specified by:
  
  stagingLocation in interface RewriteTablePath
  
  Parameters:
  
  stagingLocation - the staging location
  
  Returns:
  
  this for method chaining
- execute
  
  public RewriteTablePath.Result execute()
  
  Description copied from interface: Action
  
  Executes this action.
  
  Specified by:
  
  execute in interface Action<RewriteTablePath,RewriteTablePath.Result>
  
  Returns:
  
  the result of this action
- spark
  
  protected org.apache.spark.sql.SparkSession spark()
- sparkContext
  
  protected org.apache.spark.api.java.JavaSparkContext sparkContext()
- option
  
  public RewriteTablePath option(String name, String value)
- options
  
  public RewriteTablePath options(Map<String,String> newOptions)
- options
  
  protected Map<String,String> options()
- withJobGroupInfo
  
  protected <T> T withJobGroupInfo(JobGroupInfo info, Supplier<T> supplier)
- newJobGroupInfo
  
  protected JobGroupInfo newJobGroupInfo(String groupId, String desc)
- newStaticTable
  
  protected Table newStaticTable(TableMetadata metadata, FileIO io)
- newStaticTable
  
  protected Table newStaticTable(String metadataFileLocation, FileIO io)
- contentFileDS
  
  protected org.apache.spark.sql.Dataset<FileInfo> contentFileDS(Table table)
- contentFileDS
  
  protected org.apache.spark.sql.Dataset<FileInfo> contentFileDS(Table table, Set<Long> snapshotIds)
- manifestDS
  
  protected org.apache.spark.sql.Dataset<FileInfo> manifestDS(Table table)
- manifestDS
  
  protected org.apache.spark.sql.Dataset<FileInfo> manifestDS(Table table, Set<Long> snapshotIds)
- manifestListDS
  
  protected org.apache.spark.sql.Dataset<FileInfo> manifestListDS(Table table)
- manifestListDS
  
  protected org.apache.spark.sql.Dataset<FileInfo> manifestListDS(Table table, Set<Long> snapshotIds)
- statisticsFileDS
  
  protected org.apache.spark.sql.Dataset<FileInfo> statisticsFileDS(Table table, Set<Long> snapshotIds)
- otherMetadataFileDS
  
  protected org.apache.spark.sql.Dataset<FileInfo> otherMetadataFileDS(Table table)
- allReachableOtherMetadataFileDS
  
  protected org.apache.spark.sql.Dataset<FileInfo> allReachableOtherMetadataFileDS(Table table)
- loadMetadataTable
  
  protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> loadMetadataTable(Table table, MetadataTableType type)
- deleteFiles
  
  protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary deleteFiles(ExecutorService executorService, Consumer<String> deleteFunc, Iterator<FileInfo> files)
  
  Deletes files and keeps track of how many files were removed for each file type.
  
  Parameters:
  
  executorService - an executor service to use for parallel deletes
  
  deleteFunc - a delete func
  
  files - an iterator of Spark rows of the structure (path: String, type: String)
  
  Returns:
  
  stats on which files were deleted
- deleteFiles
  
  protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary deleteFiles(SupportsBulkOperations io, Iterator<FileInfo> files)

Class RewriteTablePathSparkAction

Nested Class Summary

Nested classes/interfaces inherited from interface org.apache.iceberg.actions.RewriteTablePath

Field Summary

Method Summary

Methods inherited from class java.lang.Object

Methods inherited from interface org.apache.iceberg.actions.Action

Field Details

MANIFEST

MANIFEST_LIST

STATISTICS_FILES

OTHERS

FILE_PATH

LAST_MODIFIED

COMMA_SPLITTER

COMMA_JOINER

Method Details

self

rewriteLocationPrefix

startVersion

endVersion

stagingLocation

execute

spark

sparkContext

option

options

options

withJobGroupInfo

newJobGroupInfo

newStaticTable

newStaticTable

contentFileDS

contentFileDS

manifestDS

manifestDS

manifestListDS

manifestListDS

statisticsFileDS

otherMetadataFileDS

allReachableOtherMetadataFileDS

loadMetadataTable

deleteFiles

deleteFiles