Package org.apache.iceberg.spark
Class SparkReadConf
java.lang.Object
org.apache.iceberg.spark.SparkReadConf
A class for common Iceberg configs for Spark reads.
If a config is set at multiple levels, the following order of precedence is used (top to bottom):
- Read options
- Session configuration
- Table metadata
Note this class is NOT meant to be serialized and sent to executors.
-
Constructor Details
-
SparkReadConf
-
SparkReadConf
-
-
Method Details
-
caseSensitive
public boolean caseSensitive() -
localityEnabled
public boolean localityEnabled() -
snapshotId
-
asOfTimestamp
-
startSnapshotId
-
endSnapshotId
-
branch
-
tag
-
scanTaskSetId
-
streamingSkipDeleteSnapshots
public boolean streamingSkipDeleteSnapshots() -
streamingSkipOverwriteSnapshots
public boolean streamingSkipOverwriteSnapshots() -
parquetVectorizationEnabled
public boolean parquetVectorizationEnabled() -
parquetBatchSize
public int parquetBatchSize() -
orcVectorizationEnabled
public boolean orcVectorizationEnabled() -
orcBatchSize
public int orcBatchSize() -
splitSizeOption
-
splitSize
public long splitSize() -
splitLookbackOption
-
splitLookback
public int splitLookback() -
splitOpenFileCostOption
-
splitOpenFileCost
public long splitOpenFileCost() -
streamFromTimestamp
public long streamFromTimestamp() -
startTimestamp
-
endTimestamp
-
maxFilesPerMicroBatch
public int maxFilesPerMicroBatch() -
maxRecordsPerMicroBatch
public int maxRecordsPerMicroBatch() -
preserveDataGrouping
public boolean preserveDataGrouping() -
aggregatePushDownEnabled
public boolean aggregatePushDownEnabled() -
adaptiveSplitSizeEnabled
public boolean adaptiveSplitSizeEnabled() -
parallelism
public int parallelism() -
distributedPlanningEnabled
public boolean distributedPlanningEnabled() -
dataPlanningMode
-
deletePlanningMode
-
executorCacheLocalityEnabled
public boolean executorCacheLocalityEnabled() -
reportColumnStats
public boolean reportColumnStats()
-