/*
 * Decompiled with CFR 0.152.
 */
package org.apache.impala.catalog;

import com.google.common.base.Preconditions;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.theta.Sketches;
import org.apache.hadoop.conf.Configuration;
import org.apache.iceberg.BlobMetadata;
import org.apache.iceberg.GenericBlobMetadata;
import org.apache.iceberg.StatisticsFile;
import org.apache.iceberg.Table;
import org.apache.iceberg.exceptions.NotFoundException;
import org.apache.iceberg.hadoop.HadoopInputFile;
import org.apache.iceberg.io.InputFile;
import org.apache.iceberg.puffin.FileMetadata;
import org.apache.iceberg.puffin.Puffin;
import org.apache.iceberg.puffin.PuffinReader;
import org.apache.iceberg.util.Pair;
import org.apache.impala.common.FileSystemUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class PuffinStatsLoader {
    private static final Logger LOG = LoggerFactory.getLogger(PuffinStatsLoader.class);
    private final Table iceApiTable_;
    private final String tblName_;
    private final Map<Integer, Long> fieldIdsWithHmsStats_;
    private Map<StatisticsFile, Map<Integer, Long>> blobsToRead_;
    private Map<Integer, PuffinStatsRecord> result_ = new HashMap<Integer, PuffinStatsRecord>();

    private PuffinStatsLoader(Table iceApiTable, String tblName, Map<Integer, Long> fieldIdsWithHmsStats) {
        this.iceApiTable_ = iceApiTable;
        this.tblName_ = tblName;
        this.fieldIdsWithHmsStats_ = fieldIdsWithHmsStats;
    }

    public static Map<Integer, PuffinStatsRecord> loadPuffinStats(Table iceApiTable, String tblName, Map<Integer, Long> fieldIdsWithHmsStats) {
        PuffinStatsLoader loader = new PuffinStatsLoader(iceApiTable, tblName, fieldIdsWithHmsStats);
        return loader.loadPuffinStatsImpl();
    }

    private Map<Integer, PuffinStatsRecord> loadPuffinStatsImpl() {
        if (this.iceApiTable_.currentSnapshot() == null) {
            return new HashMap<Integer, PuffinStatsRecord>();
        }
        this.initBlobsToRead();
        for (StatisticsFile statsFile : this.blobsToRead_.keySet()) {
            this.loadStatsFromMetadata(statsFile);
        }
        for (StatisticsFile statsFile : this.blobsToRead_.keySet()) {
            this.loadStatsFromFile(statsFile);
        }
        return this.result_;
    }

    private void initBlobsToRead() {
        Map<Integer, org.apache.impala.common.Pair<Long, StatisticsFile>> blobsToRead = this.calculateBlobsToRead();
        this.blobsToRead_ = this.groupBlobsToReadByFile(blobsToRead);
    }

    private Map<Integer, org.apache.impala.common.Pair<Long, StatisticsFile>> calculateBlobsToRead() {
        HashMap<Integer, org.apache.impala.common.Pair<Long, StatisticsFile>> fieldIdToSnapshotId = new HashMap<Integer, org.apache.impala.common.Pair<Long, StatisticsFile>>();
        List statsFiles = this.iceApiTable_.statisticsFiles();
        for (StatisticsFile statsFile : statsFiles) {
            List<BlobMetadata> metadataBlobs = this.getBlobsFromMetadataJsonSection(statsFile);
            for (BlobMetadata mBlob : metadataBlobs) {
                long snapshotId;
                Preconditions.checkState((mBlob.fields().size() == 1 ? 1 : 0) != 0);
                int fieldId = (Integer)mBlob.fields().get(0);
                if (this.hmsHasMoreRecentStats(fieldId, snapshotId = mBlob.sourceSnapshotId())) continue;
                org.apache.impala.common.Pair oldValue = (org.apache.impala.common.Pair)fieldIdToSnapshotId.get(fieldId);
                if (oldValue == null || this.isMoreRecentSnapshot(snapshotId, (Long)oldValue.first)) {
                    fieldIdToSnapshotId.put(fieldId, new org.apache.impala.common.Pair<Long, StatisticsFile>(snapshotId, statsFile));
                    continue;
                }
                if ((Long)oldValue.first != snapshotId) continue;
                this.logDuplicateStat(fieldId, snapshotId, ((StatisticsFile)oldValue.second).path(), statsFile.path(), null);
            }
        }
        return fieldIdToSnapshotId;
    }

    private Map<StatisticsFile, Map<Integer, Long>> groupBlobsToReadByFile(Map<Integer, org.apache.impala.common.Pair<Long, StatisticsFile>> blobsToRead) {
        HashMap<StatisticsFile, Map<Integer, Long>> res = new HashMap<StatisticsFile, Map<Integer, Long>>();
        for (Map.Entry<Integer, org.apache.impala.common.Pair<Long, StatisticsFile>> entry : blobsToRead.entrySet()) {
            int fieldId = entry.getKey();
            long snapshotId = (Long)entry.getValue().first;
            StatisticsFile file = (StatisticsFile)entry.getValue().second;
            res.computeIfAbsent(file, k -> new HashMap()).put(fieldId, snapshotId);
        }
        return res;
    }

    private boolean isMoreRecentSnapshot(long snapshotIdToCheck, long baseSnapshotId) {
        long baseTs = this.iceApiTable_.snapshot(baseSnapshotId).timestampMillis();
        long toCheckTs = this.iceApiTable_.snapshot(snapshotIdToCheck).timestampMillis();
        return toCheckTs > baseTs;
    }

    private boolean hmsHasMoreRecentStats(int fieldId, long snapshotId) {
        Long hmsSnapshot = this.fieldIdsWithHmsStats_.get(fieldId);
        if (hmsSnapshot == null) {
            return false;
        }
        return this.isMoreRecentSnapshot(hmsSnapshot, snapshotId);
    }

    private void loadStatsFromMetadata(StatisticsFile statsFile) {
        List<BlobMetadata> metadataBlobs = this.getBlobsFromMetadataJsonSection(statsFile);
        for (BlobMetadata mBlob : metadataBlobs) {
            long snapshotId;
            Preconditions.checkState((mBlob.fields().size() == 1 ? 1 : 0) != 0);
            int fieldId = (Integer)mBlob.fields().get(0);
            if (!this.shouldUseBlob(statsFile, fieldId, snapshotId = mBlob.sourceSnapshotId())) continue;
            PuffinStatsRecord existingRecord = this.result_.get(fieldId);
            Preconditions.checkState((existingRecord == null ? 1 : 0) != 0);
            long ndv = this.getNdvFromMetadata(mBlob);
            if (ndv == -1L) continue;
            PuffinStatsRecord record = new PuffinStatsRecord(statsFile, mBlob.sourceSnapshotId(), true, ndv);
            this.addStatsRecordToResult(fieldId, record, null);
            this.blobsToRead_.get(statsFile).remove(fieldId);
        }
    }

    private void loadStatsFromFile(StatisticsFile statsFile) {
        ArrayList<Integer> fieldIdsFromFile = new ArrayList<Integer>();
        try {
            PuffinReader puffinReader = PuffinStatsLoader.createPuffinReader(statsFile);
            List<org.apache.iceberg.puffin.BlobMetadata> blobs = this.getBlobsFromPuffinFile(puffinReader, statsFile);
            for (Pair puffinData : puffinReader.readAll(blobs)) {
                org.apache.iceberg.puffin.BlobMetadata blobMetadata = (org.apache.iceberg.puffin.BlobMetadata)puffinData.first();
                ByteBuffer blobData = (ByteBuffer)puffinData.second();
                this.loadStatsFromBlob(blobMetadata, blobData, statsFile, fieldIdsFromFile);
            }
        }
        catch (NotFoundException e) {
            PuffinStatsLoader.logWarningWithFile(this.tblName_, statsFile.path(), true, (Exception)((Object)e));
        }
        catch (Exception e) {
            PuffinStatsLoader.logWarningWithFile(this.tblName_, statsFile.path(), false, e);
            this.result_.keySet().removeAll(fieldIdsFromFile);
        }
    }

    private long getNdvFromMetadata(BlobMetadata blob) {
        String ndvProperty = (String)blob.properties().get("ndv");
        if (ndvProperty == null) {
            return -1L;
        }
        try {
            return Long.parseLong(ndvProperty);
        }
        catch (NumberFormatException e) {
            int fieldId = (Integer)blob.fields().get(0);
            Preconditions.checkNotNull((Object)this.iceApiTable_.schema().findField(fieldId));
            String colName = this.fieldIdToColName(fieldId);
            LOG.warn(String.format("Invalid NDV property in the statistics metadata for column %s: '%s'", new Object[0]), (Object)colName, (Object)ndvProperty);
            return -1L;
        }
    }

    private static void logWarningWithFile(String tableName, String statsFilePath, boolean fileMissing, Exception e) {
        String missingStr = fileMissing ? "missing " : "";
        LOG.warn(String.format("Could not load Iceberg Puffin column statistics for table '%s' from %sPuffin file '%s'. Exception: %s", tableName, missingStr, statsFilePath, e));
    }

    private void logDuplicateStat(int fieldId, long snapshotId, String existingRecordFilePath, String newRecordFilePath, Long existingRecordNdv) {
        String existingNdvStr;
        String colName = this.fieldIdToColName(fieldId);
        String string = existingNdvStr = existingRecordNdv == null ? "" : String.format(" (%s)", existingRecordNdv);
        if (existingRecordFilePath.equals(newRecordFilePath)) {
            LOG.warn(String.format("Multiple NDV values from Puffin statistics file %s for column '%s' of table '%s' for snapshot %s. Only using the first encountered one%s, ignoring the rest.", existingRecordFilePath, colName, this.tblName_, snapshotId, existingNdvStr));
        } else {
            LOG.warn(String.format("Multiple NDV values from Puffin statistics for column '%s' of table '%s' for snapshot %s. Ignoring new value from file %s, using old value%s from file %s.", colName, this.tblName_, snapshotId, newRecordFilePath, existingNdvStr, existingRecordFilePath));
        }
    }

    private static PuffinReader createPuffinReader(StatisticsFile statsFile) {
        HadoopInputFile puffinFile = HadoopInputFile.fromLocation((CharSequence)statsFile.path(), (Configuration)FileSystemUtil.getConfiguration());
        return Puffin.read((InputFile)puffinFile).withFileSize(statsFile.fileSizeInBytes()).withFooterSize(statsFile.fileFooterSizeInBytes()).build();
    }

    private List<BlobMetadata> getBlobsFromMetadataJsonSection(StatisticsFile statsFile) {
        return statsFile.blobMetadata().stream().filter(blob -> this.blobFilterPredicate((BlobMetadata)blob, statsFile.path())).collect(Collectors.toList());
    }

    private List<org.apache.iceberg.puffin.BlobMetadata> getBlobsFromPuffinFile(PuffinReader puffinReader, StatisticsFile statsFile) throws IOException {
        FileMetadata fileMetadata = puffinReader.fileMetadata();
        ArrayList<org.apache.iceberg.puffin.BlobMetadata> res = new ArrayList<org.apache.iceberg.puffin.BlobMetadata>();
        HashSet<Integer> fieldIdsAdded = new HashSet<Integer>();
        for (org.apache.iceberg.puffin.BlobMetadata blob : fileMetadata.blobs()) {
            long snapshotId;
            int fieldId;
            if (!this.blobFilterPredicate(GenericBlobMetadata.from((org.apache.iceberg.puffin.BlobMetadata)blob), null) || !this.shouldUseBlob(statsFile, fieldId = ((Integer)blob.inputFields().get(0)).intValue(), snapshotId = blob.snapshotId())) continue;
            Preconditions.checkState((!this.result_.containsKey(fieldId) ? 1 : 0) != 0);
            if (fieldIdsAdded.contains(fieldId)) continue;
            res.add(blob);
            fieldIdsAdded.add(fieldId);
        }
        return res;
    }

    private boolean shouldUseBlob(StatisticsFile statsFile, int fieldId, long snapshotId) {
        Map<Integer, Long> fieldIdsAndSnapshots = this.blobsToRead_.get(statsFile);
        if (fieldIdsAndSnapshots == null) {
            return false;
        }
        Long snapshotToRead = fieldIdsAndSnapshots.get(fieldId);
        return snapshotToRead != null && snapshotId == snapshotToRead;
    }

    private boolean blobFilterPredicate(BlobMetadata blobMetadata, String fileName) {
        if (blobMetadata.fields().size() != 1) {
            return false;
        }
        if (this.iceApiTable_.snapshot(blobMetadata.sourceSnapshotId()) == null) {
            return false;
        }
        int fieldId = (Integer)blobMetadata.fields().get(0);
        if (this.iceApiTable_.schema().findField(fieldId) == null) {
            if (fileName != null) {
                LOG.warn(String.format("Invalid field id %s for table '%s' found in Puffin stats file '%s'. Ignoring blob.", fieldId, this.tblName_, fileName));
            }
            return false;
        }
        return blobMetadata.type().equals("apache-datasketches-theta-v1");
    }

    private void loadStatsFromBlob(org.apache.iceberg.puffin.BlobMetadata blobMetadata, ByteBuffer blobData, StatisticsFile statsFile, List<Integer> fieldIdsFromFile) {
        Preconditions.checkState((blobMetadata.inputFields().size() == 1 ? 1 : 0) != 0);
        int fieldId = (Integer)blobMetadata.inputFields().get(0);
        Preconditions.checkNotNull((Object)this.iceApiTable_.schema().findField(fieldId));
        double ndv = -1.0;
        try {
            ndv = Sketches.getEstimate((Memory)Memory.wrap((byte[])PuffinStatsLoader.getBytes(blobData)));
        }
        catch (SketchesArgumentException e) {
            String colName = this.fieldIdToColName(fieldId);
            LOG.warn(String.format("Error reading datasketch for column '%s' of table '%s' from Puffin stats file %s: %s", new Object[]{colName, this.tblName_, statsFile.path(), e}));
            return;
        }
        Preconditions.checkState((ndv != -1.0 ? 1 : 0) != 0);
        long ndvRounded = Math.round(ndv);
        PuffinStatsRecord record = new PuffinStatsRecord(statsFile, blobMetadata.snapshotId(), false, ndvRounded);
        this.addStatsRecordToResult(fieldId, record, fieldIdsFromFile);
        this.blobsToRead_.get(statsFile).remove(fieldId);
    }

    private void addStatsRecordToResult(int fieldId, PuffinStatsRecord record, List<Integer> fieldIdsFromFile) {
        PuffinStatsRecord prevRecord = this.result_.putIfAbsent(fieldId, record);
        Preconditions.checkState((prevRecord == null ? 1 : 0) != 0);
        if (!record.isFromMetadataJson) {
            fieldIdsFromFile.add(fieldId);
        }
    }

    private String fieldIdToColName(int fieldId) {
        String colName = (String)this.iceApiTable_.schema().idToName().get(fieldId);
        Preconditions.checkNotNull((Object)colName);
        return colName;
    }

    private static byte[] getBytes(ByteBuffer byteBuffer) {
        if (byteBuffer.hasArray() && byteBuffer.arrayOffset() == 0 && byteBuffer.position() == 0) {
            byte[] array = byteBuffer.array();
            if (byteBuffer.remaining() == array.length) {
                return array;
            }
        }
        byte[] bytes = new byte[byteBuffer.remaining()];
        byteBuffer.asReadOnlyBuffer().get(bytes);
        return bytes;
    }

    public static class PuffinStatsRecord {
        public final StatisticsFile file;
        public final long snapshotId;
        public final boolean isFromMetadataJson;
        public final long ndv;

        public PuffinStatsRecord(StatisticsFile file, long snapshotId, boolean isFromMetadataJson, long ndv) {
            this.file = file;
            this.snapshotId = snapshotId;
            this.isFromMetadataJson = isFromMetadataJson;
            this.ndv = ndv;
        }
    }
}

