package org.apache.impala.planner;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableSet;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.impala.analysis.Expr;
import org.apache.impala.catalog.FeFsTable;
import org.apache.impala.catalog.FeIcebergTable;
import org.apache.impala.catalog.FeTable;
import org.apache.impala.catalog.HdfsFileFormat;
import org.apache.impala.common.Pair;
import org.apache.impala.compat.HiveMetadataFormatUtils;
import org.apache.impala.planner.TableSink;
import org.apache.impala.thrift.TDataSink;
import org.apache.impala.thrift.TDataSinkType;
import org.apache.impala.thrift.TExplainLevel;
import org.apache.impala.thrift.THdfsTableSink;
import org.apache.impala.thrift.TQueryOptions;
import org.apache.impala.thrift.TSortingOrder;
import org.apache.impala.thrift.TTableSink;
import org.apache.impala.thrift.TTableSinkType;
import org.apache.impala.util.BitUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/impala/planner/HdfsTableSink.class */
public class HdfsTableSink extends TableSink {
    private static final Logger LOG;
    public static final String PARQUET_BLOOM_FILTER_WRITING_TBL_PROPERTY = "parquet.bloom.filter.columns";
    public static final long PARQUET_BLOOM_FILTER_MAX_BYTES = 134217728;
    public static final long PARQUET_BLOOM_FILTER_MIN_BYTES = 64;
    protected final long DEFAULT_NUM_PARTITIONS = 10;
    protected final List<Expr> partitionKeyExprs_;
    protected final boolean overwrite_;
    protected final boolean inputIsClustered_;
    protected final boolean isResultSink_;
    public static final Set<HdfsFileFormat> SUPPORTED_FILE_FORMATS;
    private List<Integer> sortColumns_;
    private TSortingOrder sortingOrder_;
    private long writeId_;
    private int maxHdfsSinks_;
    protected String externalOutputDir_;
    private int externalOutputPartitionDepth_;
    static final /* synthetic */ boolean $assertionsDisabled;

    public HdfsTableSink(FeTable feTable, List<Expr> list, List<Expr> list2, boolean z, boolean z2, Pair<List<Integer>, TSortingOrder> pair, long j, int i, boolean z3) {
        super(feTable, TableSink.Op.INSERT, list2);
        this.DEFAULT_NUM_PARTITIONS = 10L;
        this.sortColumns_ = new ArrayList();
        Preconditions.checkState(feTable instanceof FeFsTable);
        this.partitionKeyExprs_ = list;
        this.overwrite_ = z;
        this.inputIsClustered_ = z2;
        this.sortColumns_ = pair.first;
        this.sortingOrder_ = pair.second;
        this.writeId_ = j;
        this.maxHdfsSinks_ = i;
        this.isResultSink_ = z3;
    }

    public void setExternalOutputDir(String str) {
        this.externalOutputDir_ = str;
    }

    public void setExternalOutputPartitionDepth(int i) {
        this.externalOutputPartitionDepth_ = i;
    }

    @Override // org.apache.impala.planner.DataSink
    public void computeResourceProfile(TQueryOptions tQueryOptions) {
        long perInstanceNdv;
        PlanNode planRoot = this.fragment_.getPlanRoot();
        int numInstances = this.fragment_.getNumInstances();
        if (this.inputIsClustered_) {
            perInstanceNdv = 1;
        } else {
            perInstanceNdv = this.fragment_.getPerInstanceNdv(tQueryOptions.getMt_dop(), this.partitionKeyExprs_);
            if (perInstanceNdv == -1) {
                perInstanceNdv = 10;
            }
        }
        long perPartitionMemReq = getPerPartitionMemReq(((FeFsTable) this.targetTable_).getFileFormats());
        this.resourceProfile_ = ResourceProfile.noReservation((planRoot.getCardinality() == -1 || planRoot.getAvgRowSize() == -1.0f) ? perInstanceNdv * perPartitionMemReq : Math.min((long) Math.ceil(((float) Math.max(1L, planRoot.getCardinality() / numInstances)) * planRoot.getAvgRowSize()), PlanNode.checkedMultiply(perInstanceNdv, perPartitionMemReq)));
    }

    private long getPerPartitionMemReq(Set<HdfsFileFormat> set) {
        return set.contains(HdfsFileFormat.PARQUET) ? 1073741824L : 102400L;
    }

    @Override // org.apache.impala.planner.DataSink
    public void appendSinkExplainString(String str, String str2, TQueryOptions tQueryOptions, TExplainLevel tExplainLevel, StringBuilder sb) {
        String str3 = ", OVERWRITE=" + (this.overwrite_ ? "true" : "false");
        String str4 = "";
        if (!this.partitionKeyExprs_.isEmpty()) {
            StringBuilder sb2 = new StringBuilder(", PARTITION-KEYS=(");
            Iterator<Expr> it = this.partitionKeyExprs_.iterator();
            while (it.hasNext()) {
                sb2.append(it.next().toSql() + ",");
            }
            sb2.deleteCharAt(sb2.length() - 1);
            sb2.append(")");
            str4 = sb2.toString();
        }
        sb.append(String.format("%sWRITE TO HDFS [%s%s%s]\n", str, this.targetTable_.getFullName(), str3, str4));
        if (tExplainLevel.ordinal() > TExplainLevel.MINIMAL.ordinal()) {
            long numDistinctValues = Expr.getNumDistinctValues(this.partitionKeyExprs_);
            if (numDistinctValues == -1) {
                sb.append(str2 + "partitions=unavailable");
            } else {
                sb.append(str2 + "partitions=" + (numDistinctValues == 0 ? 1L : numDistinctValues));
            }
            sb.append(HiveMetadataFormatUtils.LINE_DELIM);
        }
        if (tExplainLevel.ordinal() >= TExplainLevel.EXTENDED.ordinal()) {
            sb.append(str2 + "output exprs: ").append(Expr.getExplainString(this.outputExprs_, tExplainLevel) + HiveMetadataFormatUtils.LINE_DELIM);
        }
    }

    @Override // org.apache.impala.planner.DataSink
    protected String getLabel() {
        return "HDFS WRITER";
    }

    @VisibleForTesting
    static Map<String, Long> parseParquetBloomFilterWritingTblProp(String str) {
        long roundUpToPowerOf2;
        HashMap hashMap = new HashMap();
        for (String str2 : str.split(",")) {
            String[] split = str2.split(":");
            if (split.length == 0 || split.length > 2) {
                LOG.warn("Invalid token in table property parquet.bloom.filter.columns: " + str2.trim() + ". Expected either a column name or a column name and a size separated by a colon (';').");
                return null;
            }
            if (split.length == 1) {
                roundUpToPowerOf2 = PARQUET_BLOOM_FILTER_MAX_BYTES;
            } else {
                if (!$assertionsDisabled && split.length != 2) {
                    throw new AssertionError();
                }
                try {
                    roundUpToPowerOf2 = BitUtil.roundUpToPowerOf2(Long.min(PARQUET_BLOOM_FILTER_MAX_BYTES, Long.max(64L, Long.parseLong(split[1].trim()))));
                } catch (NumberFormatException e) {
                    LOG.warn("Invalid bitset size in table property parquet.bloom.filter.columns: " + split[1].trim());
                    return null;
                }
            }
            hashMap.put(split[0].trim(), Long.valueOf(roundUpToPowerOf2));
        }
        return hashMap;
    }

    @Override // org.apache.impala.planner.DataSink
    protected void toThriftImpl(TDataSink tDataSink) {
        THdfsTableSink tHdfsTableSink = new THdfsTableSink(Expr.treesToThrift(this.partitionKeyExprs_), this.overwrite_, this.inputIsClustered_, this.sortingOrder_);
        FeFsTable feFsTable = (FeFsTable) this.targetTable_;
        StringBuilder sb = new StringBuilder();
        int parseSkipHeaderLineCount = feFsTable.parseSkipHeaderLineCount(sb);
        Preconditions.checkState(sb.length() == 0);
        if (parseSkipHeaderLineCount > 0) {
            tHdfsTableSink.setSkip_header_line_count(parseSkipHeaderLineCount);
        }
        String str = (String) feFsTable.getMetaStoreTable().getParameters().get(PARQUET_BLOOM_FILTER_WRITING_TBL_PROPERTY);
        if (str != null) {
            tHdfsTableSink.setParquet_bloom_filter_col_info(parseParquetBloomFilterWritingTblProp(str));
        }
        tHdfsTableSink.setSort_columns(this.sortColumns_);
        tHdfsTableSink.setSorting_order(this.sortingOrder_);
        tHdfsTableSink.setIs_result_sink(this.isResultSink_);
        if (this.externalOutputDir_ != null) {
            tHdfsTableSink.setExternal_output_dir(this.externalOutputDir_);
            tHdfsTableSink.setExternal_output_partition_depth(this.externalOutputPartitionDepth_);
        }
        if (this.writeId_ != -1) {
            tHdfsTableSink.setWrite_id(this.writeId_);
        }
        TTableSink tTableSink = new TTableSink(0, TTableSinkType.HDFS, this.sinkOp_.toThrift());
        tTableSink.hdfs_table_sink = tHdfsTableSink;
        tDataSink.table_sink = tTableSink;
        tDataSink.output_exprs = Expr.treesToThrift(this.outputExprs_);
    }

    @Override // org.apache.impala.planner.DataSink
    protected TDataSinkType getSinkType() {
        return TDataSinkType.TABLE_SINK;
    }

    @Override // org.apache.impala.planner.DataSink
    public void collectExprs(List<Expr> list) {
        if (!(this.targetTable_ instanceof FeIcebergTable)) {
            list.addAll(this.partitionKeyExprs_);
        }
        if (this.isResultSink_) {
            list.addAll(this.outputExprs_);
        } else {
            list.addAll(this.outputExprs_.subList(0, this.targetTable_.getNonClusteringColumns().size()));
        }
    }

    public int getNumNodes() {
        int numNodes = getFragment().getPlanRoot().getNumNodes();
        if (this.maxHdfsSinks_ > 0) {
            numNodes = Math.min(numNodes, getNumInstances());
        }
        return numNodes;
    }

    public int getNumInstances() {
        int numInstances = getFragment().getPlanRoot().getNumInstances();
        if (this.maxHdfsSinks_ > 0) {
            numInstances = Math.min(numInstances, this.maxHdfsSinks_);
        }
        return numInstances;
    }

    static {
        $assertionsDisabled = !HdfsTableSink.class.desiredAssertionStatus();
        LOG = LoggerFactory.getLogger(HdfsTableSink.class);
        SUPPORTED_FILE_FORMATS = ImmutableSet.of(HdfsFileFormat.PARQUET, HdfsFileFormat.TEXT, HdfsFileFormat.RC_FILE, HdfsFileFormat.SEQUENCE_FILE, HdfsFileFormat.AVRO, HdfsFileFormat.ICEBERG, new HdfsFileFormat[0]);
    }
}
