Package org.apache.iceberg.spark.actions
Class SnapshotTableSparkAction
- java.lang.Object
-
- org.apache.iceberg.spark.actions.SnapshotTableSparkAction
-
- All Implemented Interfaces:
Action<SnapshotTable,SnapshotTable.Result>
,SnapshotTable
public class SnapshotTableSparkAction extends java.lang.Object implements SnapshotTable
Creates a new Iceberg table based on a source Spark table. The new Iceberg table will have a different data and metadata directory allowing it to exist independently of the source table.
-
-
Nested Class Summary
-
Nested classes/interfaces inherited from interface org.apache.iceberg.actions.SnapshotTable
SnapshotTable.Result
-
-
Field Summary
Fields Modifier and Type Field Description protected static org.apache.iceberg.relocated.com.google.common.base.Joiner
COMMA_JOINER
protected static org.apache.iceberg.relocated.com.google.common.base.Splitter
COMMA_SPLITTER
protected static java.util.List<java.lang.String>
EXCLUDED_PROPERTIES
protected static java.lang.String
FILE_PATH
protected static java.lang.String
ICEBERG_METADATA_FOLDER
protected static java.lang.String
LAST_MODIFIED
protected static java.lang.String
LOCATION
protected static java.lang.String
MANIFEST
protected static java.lang.String
MANIFEST_LIST
protected static java.lang.String
OTHERS
protected static java.lang.String
STATISTICS_FILES
-
Method Summary
All Methods Instance Methods Concrete Methods Modifier and Type Method Description protected java.util.Map<java.lang.String,java.lang.String>
additionalProperties()
protected org.apache.spark.sql.Dataset<FileInfo>
allReachableOtherMetadataFileDS(Table table)
SnapshotTableSparkAction
as(java.lang.String ident)
Sets the table identifier for the newly created Iceberg table.protected org.apache.spark.sql.connector.catalog.StagingTableCatalog
checkDestinationCatalog(org.apache.spark.sql.connector.catalog.CatalogPlugin catalog)
protected org.apache.spark.sql.connector.catalog.TableCatalog
checkSourceCatalog(org.apache.spark.sql.connector.catalog.CatalogPlugin catalog)
protected org.apache.spark.sql.Dataset<FileInfo>
contentFileDS(Table table)
protected org.apache.spark.sql.Dataset<FileInfo>
contentFileDS(Table table, java.util.Set<java.lang.Long> snapshotIds)
protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary
deleteFiles(java.util.concurrent.ExecutorService executorService, java.util.function.Consumer<java.lang.String> deleteFunc, java.util.Iterator<FileInfo> files)
Deletes files and keeps track of how many files were removed for each file type.protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary
deleteFiles(SupportsBulkOperations io, java.util.Iterator<FileInfo> files)
protected org.apache.spark.sql.connector.catalog.StagingTableCatalog
destCatalog()
protected org.apache.spark.sql.connector.catalog.Identifier
destTableIdent()
protected java.util.Map<java.lang.String,java.lang.String>
destTableProps()
protected void
ensureNameMappingPresent(Table table)
SnapshotTable.Result
execute()
Executes this action.SnapshotTableSparkAction
executeWith(java.util.concurrent.ExecutorService service)
Sets the executor service to use for parallel file reading.protected java.lang.String
getMetadataLocation(Table table)
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row>
loadMetadataTable(Table table, MetadataTableType type)
protected org.apache.spark.sql.Dataset<FileInfo>
manifestDS(Table table)
protected org.apache.spark.sql.Dataset<FileInfo>
manifestDS(Table table, java.util.Set<java.lang.Long> snapshotIds)
protected org.apache.spark.sql.Dataset<FileInfo>
manifestListDS(Table table)
protected org.apache.spark.sql.Dataset<FileInfo>
manifestListDS(Table table, java.util.Set<java.lang.Long> snapshotIds)
protected JobGroupInfo
newJobGroupInfo(java.lang.String groupId, java.lang.String desc)
protected Table
newStaticTable(TableMetadata metadata, FileIO io)
ThisT
option(java.lang.String name, java.lang.String value)
protected java.util.Map<java.lang.String,java.lang.String>
options()
ThisT
options(java.util.Map<java.lang.String,java.lang.String> newOptions)
protected org.apache.spark.sql.Dataset<FileInfo>
otherMetadataFileDS(Table table)
protected SnapshotTableSparkAction
self()
protected void
setProperties(java.util.Map<java.lang.String,java.lang.String> properties)
protected void
setProperty(java.lang.String key, java.lang.String value)
protected org.apache.spark.sql.connector.catalog.TableCatalog
sourceCatalog()
protected org.apache.spark.sql.connector.catalog.Identifier
sourceTableIdent()
protected java.lang.String
sourceTableLocation()
protected org.apache.spark.sql.SparkSession
spark()
protected org.apache.spark.api.java.JavaSparkContext
sparkContext()
protected StagedSparkTable
stageDestTable()
protected org.apache.spark.sql.Dataset<FileInfo>
statisticsFileDS(Table table, java.util.Set<java.lang.Long> snapshotIds)
SnapshotTableSparkAction
tableLocation(java.lang.String location)
Sets the table location for the newly created Iceberg table.SnapshotTableSparkAction
tableProperties(java.util.Map<java.lang.String,java.lang.String> properties)
Sets table properties in the newly created Iceberg table.SnapshotTableSparkAction
tableProperty(java.lang.String property, java.lang.String value)
Sets a table property in the newly created Iceberg table.protected org.apache.spark.sql.catalyst.catalog.CatalogTable
v1SourceTable()
protected <T> T
withJobGroupInfo(JobGroupInfo info, java.util.function.Supplier<T> supplier)
-
-
-
Field Detail
-
LOCATION
protected static final java.lang.String LOCATION
- See Also:
- Constant Field Values
-
ICEBERG_METADATA_FOLDER
protected static final java.lang.String ICEBERG_METADATA_FOLDER
- See Also:
- Constant Field Values
-
EXCLUDED_PROPERTIES
protected static final java.util.List<java.lang.String> EXCLUDED_PROPERTIES
-
MANIFEST
protected static final java.lang.String MANIFEST
- See Also:
- Constant Field Values
-
MANIFEST_LIST
protected static final java.lang.String MANIFEST_LIST
- See Also:
- Constant Field Values
-
STATISTICS_FILES
protected static final java.lang.String STATISTICS_FILES
- See Also:
- Constant Field Values
-
OTHERS
protected static final java.lang.String OTHERS
- See Also:
- Constant Field Values
-
FILE_PATH
protected static final java.lang.String FILE_PATH
- See Also:
- Constant Field Values
-
LAST_MODIFIED
protected static final java.lang.String LAST_MODIFIED
- See Also:
- Constant Field Values
-
COMMA_SPLITTER
protected static final org.apache.iceberg.relocated.com.google.common.base.Splitter COMMA_SPLITTER
-
COMMA_JOINER
protected static final org.apache.iceberg.relocated.com.google.common.base.Joiner COMMA_JOINER
-
-
Method Detail
-
self
protected SnapshotTableSparkAction self()
-
destCatalog
protected org.apache.spark.sql.connector.catalog.StagingTableCatalog destCatalog()
-
destTableIdent
protected org.apache.spark.sql.connector.catalog.Identifier destTableIdent()
-
as
public SnapshotTableSparkAction as(java.lang.String ident)
Description copied from interface:SnapshotTable
Sets the table identifier for the newly created Iceberg table.- Specified by:
as
in interfaceSnapshotTable
- Parameters:
ident
- the destination table identifier- Returns:
- this for method chaining
-
tableProperties
public SnapshotTableSparkAction tableProperties(java.util.Map<java.lang.String,java.lang.String> properties)
Description copied from interface:SnapshotTable
Sets table properties in the newly created Iceberg table. Any properties with the same key name will be overwritten.- Specified by:
tableProperties
in interfaceSnapshotTable
- Parameters:
properties
- a map of properties to be included- Returns:
- this for method chaining
-
tableProperty
public SnapshotTableSparkAction tableProperty(java.lang.String property, java.lang.String value)
Description copied from interface:SnapshotTable
Sets a table property in the newly created Iceberg table. Any properties with the same key name will be overwritten.- Specified by:
tableProperty
in interfaceSnapshotTable
- Parameters:
property
- the key of the property to addvalue
- the value of the property to add- Returns:
- this for method chaining
-
executeWith
public SnapshotTableSparkAction executeWith(java.util.concurrent.ExecutorService service)
Description copied from interface:SnapshotTable
Sets the executor service to use for parallel file reading. The default is not using executor service.- Specified by:
executeWith
in interfaceSnapshotTable
- Parameters:
service
- executor service- Returns:
- this for method chaining
-
execute
public SnapshotTable.Result execute()
Description copied from interface:Action
Executes this action.- Specified by:
execute
in interfaceAction<SnapshotTable,SnapshotTable.Result>
- Returns:
- the result of this action
-
destTableProps
protected java.util.Map<java.lang.String,java.lang.String> destTableProps()
-
checkSourceCatalog
protected org.apache.spark.sql.connector.catalog.TableCatalog checkSourceCatalog(org.apache.spark.sql.connector.catalog.CatalogPlugin catalog)
-
tableLocation
public SnapshotTableSparkAction tableLocation(java.lang.String location)
Description copied from interface:SnapshotTable
Sets the table location for the newly created Iceberg table.- Specified by:
tableLocation
in interfaceSnapshotTable
- Parameters:
location
- a table location- Returns:
- this for method chaining
-
sourceTableLocation
protected java.lang.String sourceTableLocation()
-
v1SourceTable
protected org.apache.spark.sql.catalyst.catalog.CatalogTable v1SourceTable()
-
sourceCatalog
protected org.apache.spark.sql.connector.catalog.TableCatalog sourceCatalog()
-
sourceTableIdent
protected org.apache.spark.sql.connector.catalog.Identifier sourceTableIdent()
-
setProperties
protected void setProperties(java.util.Map<java.lang.String,java.lang.String> properties)
-
setProperty
protected void setProperty(java.lang.String key, java.lang.String value)
-
additionalProperties
protected java.util.Map<java.lang.String,java.lang.String> additionalProperties()
-
checkDestinationCatalog
protected org.apache.spark.sql.connector.catalog.StagingTableCatalog checkDestinationCatalog(org.apache.spark.sql.connector.catalog.CatalogPlugin catalog)
-
stageDestTable
protected StagedSparkTable stageDestTable()
-
ensureNameMappingPresent
protected void ensureNameMappingPresent(Table table)
-
getMetadataLocation
protected java.lang.String getMetadataLocation(Table table)
-
spark
protected org.apache.spark.sql.SparkSession spark()
-
sparkContext
protected org.apache.spark.api.java.JavaSparkContext sparkContext()
-
option
public ThisT option(java.lang.String name, java.lang.String value)
-
options
public ThisT options(java.util.Map<java.lang.String,java.lang.String> newOptions)
-
options
protected java.util.Map<java.lang.String,java.lang.String> options()
-
withJobGroupInfo
protected <T> T withJobGroupInfo(JobGroupInfo info, java.util.function.Supplier<T> supplier)
-
newJobGroupInfo
protected JobGroupInfo newJobGroupInfo(java.lang.String groupId, java.lang.String desc)
-
newStaticTable
protected Table newStaticTable(TableMetadata metadata, FileIO io)
-
contentFileDS
protected org.apache.spark.sql.Dataset<FileInfo> contentFileDS(Table table, java.util.Set<java.lang.Long> snapshotIds)
-
manifestDS
protected org.apache.spark.sql.Dataset<FileInfo> manifestDS(Table table, java.util.Set<java.lang.Long> snapshotIds)
-
manifestListDS
protected org.apache.spark.sql.Dataset<FileInfo> manifestListDS(Table table, java.util.Set<java.lang.Long> snapshotIds)
-
statisticsFileDS
protected org.apache.spark.sql.Dataset<FileInfo> statisticsFileDS(Table table, java.util.Set<java.lang.Long> snapshotIds)
-
otherMetadataFileDS
protected org.apache.spark.sql.Dataset<FileInfo> otherMetadataFileDS(Table table)
-
allReachableOtherMetadataFileDS
protected org.apache.spark.sql.Dataset<FileInfo> allReachableOtherMetadataFileDS(Table table)
-
loadMetadataTable
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> loadMetadataTable(Table table, MetadataTableType type)
-
deleteFiles
protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary deleteFiles(java.util.concurrent.ExecutorService executorService, java.util.function.Consumer<java.lang.String> deleteFunc, java.util.Iterator<FileInfo> files)
Deletes files and keeps track of how many files were removed for each file type.- Parameters:
executorService
- an executor service to use for parallel deletesdeleteFunc
- a delete funcfiles
- an iterator of Spark rows of the structure (path: String, type: String)- Returns:
- stats on which files were deleted
-
deleteFiles
protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary deleteFiles(SupportsBulkOperations io, java.util.Iterator<FileInfo> files)
-
-