/*
 * Decompiled with CFR 0.152.
 */
package org.apache.iceberg.mr.hive;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.iceberg.AssertHelpers;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
import org.apache.iceberg.Table;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.data.Record;
import org.apache.iceberg.mr.hive.HiveIcebergStorageHandlerTestUtils;
import org.apache.iceberg.mr.hive.HiveIcebergStorageHandlerWithEngineBase;
import org.apache.iceberg.mr.hive.HiveIcebergTestUtils;
import org.apache.iceberg.mr.hive.IcebergTableUtil;
import org.apache.iceberg.mr.hive.TestTables;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.types.Types;
import org.apache.thrift.TException;
import org.junit.Assert;
import org.junit.Assume;
import org.junit.Before;
import org.junit.Test;
import org.junit.runners.Parameterized;

public class TestHiveIcebergStatistics
extends HiveIcebergStorageHandlerWithEngineBase {
    @Parameterized.Parameter(value=4)
    public String statsSource;

    @Parameterized.Parameters(name="fileFormat={0}, catalog={1}, isVectorized={2}, formatVersion={3}, statsSource={4}")
    public static Collection<Object[]> parameters() {
        Collection<Object[]> baseParams = HiveIcebergStorageHandlerWithEngineBase.parameters();
        ArrayList testParams = Lists.newArrayList();
        for (String statsSource : new String[]{"iceberg", "metastore"}) {
            for (Object[] params : baseParams) {
                testParams.add(ArrayUtils.add((Object[])params, (Object)statsSource));
            }
        }
        return testParams;
    }

    @Before
    public void setStatsSource() {
        HiveConf.setVar((Configuration)shell.getHiveConf(), (HiveConf.ConfVars)HiveConf.ConfVars.HIVE_ICEBERG_STATS_SOURCE, (String)this.statsSource);
    }

    @Test
    public void testAnalyzeTableComputeStatistics() throws IOException, TException, InterruptedException {
        Assume.assumeTrue((this.statsSource.equals("iceberg") || this.testTableType == TestTables.TestTableType.HIVE_CATALOG ? 1 : 0) != 0);
        String dbName = "default";
        String tableName = "customers";
        Table table = this.testTables.createTable(shell, tableName, HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, this.fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
        shell.executeStatement("ANALYZE TABLE " + dbName + "." + tableName + " COMPUTE STATISTICS");
        this.validateBasicStats(table, dbName, tableName);
    }

    @Test
    public void testAnalyzeTableComputeStatisticsForColumns() throws IOException, TException, InterruptedException {
        String dbName = "default";
        String tableName = "orders";
        Table table = this.testTables.createTable(shell, tableName, ORDER_SCHEMA, this.fileFormat, ORDER_RECORDS);
        shell.executeStatement("ANALYZE TABLE " + dbName + "." + tableName + " COMPUTE STATISTICS FOR COLUMNS");
        this.validateBasicStats(table, dbName, tableName);
    }

    @Test
    public void testAnalyzeTableComputeStatisticsEmptyTable() throws IOException, TException, InterruptedException {
        String dbName = "default";
        String tableName = "customers";
        Table table = this.testTables.createTable(shell, tableName, HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, this.fileFormat, Lists.newArrayList());
        shell.executeStatement("ANALYZE TABLE " + dbName + "." + tableName + " COMPUTE STATISTICS");
        this.validateBasicStats(table, dbName, tableName);
    }

    @Test
    public void testStatsWithInsert() {
        TableIdentifier identifier = TableIdentifier.of((String[])new String[]{"default", "customers"});
        shell.setHiveSessionValue(HiveConf.ConfVars.HIVE_STATS_AUTOGATHER.varname, true);
        this.testTables.createTable(shell, identifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, PartitionSpec.unpartitioned(), this.fileFormat, (List<Record>)ImmutableList.of());
        if (this.testTableType != TestTables.TestTableType.HIVE_CATALOG) {
            shell.executeStatement("ANALYZE TABLE " + identifier + " COMPUTE STATISTICS FOR COLUMNS");
        }
        String insert = this.testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, identifier, false);
        shell.executeStatement(insert);
        this.checkColStat(identifier.name(), "customer_id", true);
        this.checkColStatMinMaxValue(identifier.name(), "customer_id", 0, 2);
        insert = this.testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_1, identifier, false);
        shell.executeStatement(insert);
        this.checkColStat(identifier.name(), "customer_id", true);
        this.checkColStatMinMaxValue(identifier.name(), "customer_id", 0, 5);
    }

    @Test
    public void testStatsWithPessimisticLockInsert() {
        Assume.assumeTrue((this.testTableType == TestTables.TestTableType.HIVE_CATALOG ? 1 : 0) != 0);
        TableIdentifier identifier = this.getTableIdentifierWithPessimisticLock("false");
        String insert = this.testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, identifier, false);
        shell.executeStatement(insert);
        this.checkColStat(identifier.name(), "customer_id", true);
        this.checkColStatMinMaxValue(identifier.name(), "customer_id", 0, 2);
    }

    @Test
    public void testStatsWithPessimisticLockInsertWhenHiveLockEnabled() {
        Assume.assumeTrue((this.testTableType == TestTables.TestTableType.HIVE_CATALOG ? 1 : 0) != 0);
        TableIdentifier identifier = this.getTableIdentifierWithPessimisticLock("true");
        String insert = this.testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, identifier, false);
        AssertHelpers.assertThrows((String)"Should throw RuntimeException when Hive locking is on with 'engine.hive.lock-enabled=true'", RuntimeException.class, () -> shell.executeStatement(insert));
    }

    private TableIdentifier getTableIdentifierWithPessimisticLock(String hiveLockEnabled) {
        TableIdentifier identifier = TableIdentifier.of((String[])new String[]{"default", "customers"});
        shell.setHiveSessionValue(HiveConf.ConfVars.HIVE_STATS_AUTOGATHER.varname, true);
        shell.setHiveSessionValue(HiveConf.ConfVars.HIVE_TXN_EXT_LOCKING_ENABLED.varname, true);
        this.testTables.createTable(shell, identifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, PartitionSpec.unpartitioned(), this.fileFormat, (List<Record>)ImmutableList.of(), this.formatVersion, (Map<String, String>)ImmutableMap.of((Object)"engine.hive.lock-enabled", (Object)hiveLockEnabled));
        return identifier;
    }

    @Test
    public void testStatsWithInsertOverwrite() {
        TableIdentifier identifier = TableIdentifier.of((String[])new String[]{"default", "customers"});
        shell.setHiveSessionValue(HiveConf.ConfVars.HIVE_STATS_AUTOGATHER.varname, true);
        this.testTables.createTable(shell, identifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, PartitionSpec.unpartitioned(), this.fileFormat, (List<Record>)ImmutableList.of());
        String insert = this.testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_1, identifier, true);
        shell.executeStatement(insert);
        this.checkColStat(identifier.name(), "customer_id", true);
        this.checkColStatMinMaxValue(identifier.name(), "customer_id", 3, 5);
    }

    @Test
    public void testStatsWithPartitionedInsert() {
        TableIdentifier identifier = TableIdentifier.of((String[])new String[]{"default", "customers"});
        PartitionSpec spec = PartitionSpec.builderFor((Schema)HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).identity("last_name").build();
        shell.setHiveSessionValue(HiveConf.ConfVars.HIVE_STATS_AUTOGATHER.varname, true);
        this.testTables.createTable(shell, identifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec, this.fileFormat, (List<Record>)ImmutableList.of());
        if (this.testTableType != TestTables.TestTableType.HIVE_CATALOG) {
            shell.executeStatement("ANALYZE TABLE " + identifier + " COMPUTE STATISTICS FOR COLUMNS");
        }
        String insert = this.testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, identifier, false);
        shell.executeStatement(insert);
        this.checkColStat(identifier.name(), "customer_id", true);
        this.checkColStat(identifier.name(), "first_name", true);
        this.checkColStatMinMaxValue(identifier.name(), "customer_id", 0, 2);
    }

    @Test
    public void testStatsWithCTAS() {
        Assume.assumeTrue((String)"CTAS target table must be a HiveCatalog table. For other catalog types, the target Iceberg table would be created successfully but the table will not be registered in HMS. This means that even though the CTAS query succeeds, the new table wouldn't be immediately queryable from Hive, since HMS does not know about it.", (this.testTableType == TestTables.TestTableType.HIVE_CATALOG ? 1 : 0) != 0);
        shell.executeStatement("CREATE TABLE source (id bigint, name string) PARTITIONED BY (dept string) STORED AS ORC");
        shell.executeStatement(this.testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, TableIdentifier.of((String[])new String[]{"default", "source"}), false));
        shell.setHiveSessionValue(HiveConf.ConfVars.HIVE_STATS_AUTOGATHER.varname, true);
        shell.executeStatement(String.format("CREATE TABLE target STORED BY ICEBERG %s %s AS SELECT * FROM source", this.testTables.locationForCreateTableSQL(TableIdentifier.of((String[])new String[]{"default", "target"})), this.testTables.propertiesForCreateTableSQL((Map<String, String>)ImmutableMap.of((Object)"write.format.default", (Object)this.fileFormat.toString()))));
        this.checkColStat("target", "id", true);
        this.checkColStatMinMaxValue("target", "id", 0, 2);
    }

    @Test
    public void testStatsWithPartitionedCTAS() {
        Assume.assumeTrue((String)"CTAS target table must be a HiveCatalog table. For other catalog types, the target Iceberg table would be created successfully but the table will not be registered in HMS. This means that even though the CTAS query succeeds, the new table wouldn't be immediately queryable from Hive, since HMS does not know about it.", (this.testTableType == TestTables.TestTableType.HIVE_CATALOG ? 1 : 0) != 0);
        shell.executeStatement("CREATE TABLE source (id bigint, name string) PARTITIONED BY (dept string) STORED AS ORC");
        shell.executeStatement(this.testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, TableIdentifier.of((String[])new String[]{"default", "source"}), false));
        shell.setHiveSessionValue(HiveConf.ConfVars.HIVE_STATS_AUTOGATHER.varname, true);
        shell.executeStatement(String.format("CREATE TABLE target PARTITIONED BY (dept, name) STORED BY ICEBERG %s AS SELECT * FROM source s", this.testTables.propertiesForCreateTableSQL((Map<String, String>)ImmutableMap.of((Object)"write.format.default", (Object)this.fileFormat.toString()))));
        if (this.statsSource.equals("iceberg")) {
            shell.executeStatement("ANALYZE TABLE target COMPUTE STATISTICS FOR COLUMNS");
        }
        this.checkColStat("target", "id", true);
        this.checkColStat("target", "dept", true);
        this.checkColStatMinMaxValue("target", "id", 0, 2);
        this.checkColStatMaxLengthDistinctValue("target", "dept", 5, 3);
        this.checkColStatMaxLengthDistinctValue("target", "name", 5, 3);
    }

    @Test
    public void testStatsRemoved() throws IOException {
        Assume.assumeTrue((String)"Only HiveCatalog can remove stats which become obsolete", (this.testTableType == TestTables.TestTableType.HIVE_CATALOG ? 1 : 0) != 0);
        TableIdentifier identifier = TableIdentifier.of((String[])new String[]{"default", "customers"});
        shell.setHiveSessionValue(HiveConf.ConfVars.HIVE_STATS_AUTOGATHER.varname, true);
        this.testTables.createTable(shell, identifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, PartitionSpec.unpartitioned(), this.fileFormat, (List<Record>)ImmutableList.of());
        String insert = this.testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, identifier, true);
        shell.executeStatement(insert);
        this.checkColStat(identifier.name(), "customer_id", true);
        this.checkColStatMinMaxValue(identifier.name(), "customer_id", 0, 2);
        shell.metastore().hiveConf().set("iceberg.hive.keep.stats", "false");
        TestTables nonHiveTestTables = HiveIcebergStorageHandlerTestUtils.testTables(shell, this.testTableType, this.temp);
        Table nonHiveTable = nonHiveTestTables.loadTable(identifier);
        nonHiveTestTables.appendIcebergTable(shell.getHiveConf(), nonHiveTable, this.fileFormat, null, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
        this.checkColStat(identifier.name(), "customer_id", false);
    }

    @Test
    public void testColumnStatsAccurate() throws Exception {
        TableIdentifier identifier = TableIdentifier.of((String[])new String[]{"default", "customers"});
        shell.setHiveSessionValue(HiveConf.ConfVars.HIVE_STATS_AUTOGATHER.varname, true);
        this.testTables.createTable(shell, identifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, PartitionSpec.unpartitioned(), this.fileFormat, (List<Record>)ImmutableList.of());
        String insert = this.testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, identifier, true);
        shell.executeStatement(insert);
        org.apache.hadoop.hive.metastore.api.Table hmsTable = shell.metastore().getTable("default", identifier.name());
        Assert.assertTrue((boolean)hmsTable.getParameters().containsKey("COLUMN_STATS_ACCURATE"));
        Assert.assertTrue((boolean)StatsSetupConst.areBasicStatsUptoDate((Map)hmsTable.getParameters()));
        for (Types.NestedField nestedField : HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA.columns()) {
            Assert.assertTrue((boolean)StatsSetupConst.areColumnStatsUptoDate((Map)hmsTable.getParameters(), (String)nestedField.name()));
        }
    }

    @Test
    public void testMergeStatsWithInsert() {
        TableIdentifier identifier = TableIdentifier.of((String[])new String[]{"default", "customers"});
        shell.setHiveSessionValue(HiveConf.ConfVars.HIVE_STATS_AUTOGATHER.varname, true);
        this.testTables.createTable(shell, identifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, PartitionSpec.unpartitioned(), this.fileFormat, (List<Record>)ImmutableList.of());
        if (this.testTableType != TestTables.TestTableType.HIVE_CATALOG) {
            shell.executeStatement("ANALYZE TABLE " + identifier + " COMPUTE STATISTICS FOR COLUMNS");
        }
        String insert = this.testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, identifier, false);
        shell.executeStatement(insert);
        this.checkColStat(identifier.name(), "customer_id", true);
        this.checkColStatMinMaxDistinctValue(identifier.name(), "customer_id", 0, 2, 3, 0);
        insert = this.testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_1, identifier, false);
        shell.executeStatement(insert);
        this.checkColStat(identifier.name(), "customer_id", true);
        this.checkColStatMinMaxDistinctValue(identifier.name(), "customer_id", 0, 5, 6, 0);
        insert = this.testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.OTHER_CUSTOMER_RECORDS_2, identifier, false);
        shell.executeStatement(insert);
        this.checkColStat(identifier.name(), "customer_id", true);
        this.checkColStatMinMaxDistinctValue(identifier.name(), "customer_id", 0, 5, 6, 0);
    }

    @Test
    public void testIcebergColStatsPath() throws IOException {
        Assume.assumeTrue((boolean)this.statsSource.equals("iceberg"));
        TableIdentifier identifier = TableIdentifier.of((String[])new String[]{"default", "customers"});
        shell.setHiveSessionValue(HiveConf.ConfVars.HIVE_STATS_AUTOGATHER.varname, true);
        Table table = this.testTables.createTable(shell, identifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, PartitionSpec.unpartitioned(), this.fileFormat, (List<Record>)ImmutableList.of());
        String insert = this.testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, identifier, false);
        shell.executeStatement(insert);
        table.refresh();
        Path tblColPath = IcebergTableUtil.getColStatsPath((Table)table).orElse(null);
        Assert.assertNotNull((Object)tblColPath);
        Assert.assertTrue((boolean)tblColPath.getFileSystem(shell.getHiveConf()).exists(tblColPath));
        List<Object[]> result = shell.executeStatement("SELECT * FROM customers");
        HiveIcebergTestUtils.validateData(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, HiveIcebergTestUtils.valueForRow(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, result));
    }

    private void checkColStat(String tableName, String colName, boolean accurate) {
        List<Object[]> rows = shell.executeStatement("DESCRIBE " + tableName + " " + colName);
        if (accurate) {
            Assert.assertEquals((long)2L, (long)rows.size());
            Assert.assertEquals((Object)"COLUMN_STATS_ACCURATE", (Object)rows.get(1)[0]);
            Assert.assertFalse((boolean)rows.get(1)[1].toString().matches("\\{\\}\\s*"));
        } else {
            if (rows.size() == 1) {
                return;
            }
            Assert.assertEquals((long)2L, (long)rows.size());
            Assert.assertEquals((Object)"COLUMN_STATS_ACCURATE", (Object)rows.get(1)[0]);
            Assert.assertTrue((boolean)rows.get(1)[1].toString().matches("\\{\\}\\s*"));
        }
    }

    private void checkColStatMinMaxValue(String tableName, String colName, int minValue, int maxValue) {
        List<Object[]> rows = shell.executeStatement("DESCRIBE FORMATTED " + tableName + " " + colName);
        Assert.assertEquals((Object)"min", (Object)rows.get(2)[0]);
        Assert.assertEquals((Object)String.valueOf(minValue), (Object)rows.get(2)[1]);
        Assert.assertEquals((Object)"max", (Object)rows.get(3)[0]);
        Assert.assertEquals((Object)String.valueOf(maxValue), (Object)rows.get(3)[1]);
    }

    private void checkColStatMaxLengthDistinctValue(String tableName, String colName, int maxLength, int distinct) {
        List<Object[]> rows = shell.executeStatement("DESCRIBE FORMATTED " + tableName + " " + colName);
        Assert.assertEquals((Object)"max_col_len", (Object)rows.get(7)[0]);
        Assert.assertEquals((Object)String.valueOf(maxLength), (Object)rows.get(7)[1]);
        Assert.assertEquals((Object)"distinct_count", (Object)rows.get(5)[0]);
        Assert.assertEquals((Object)String.valueOf(distinct), (Object)rows.get(5)[1]);
    }

    private void checkColStatMinMaxDistinctValue(String tableName, String colName, int minValue, int maxValue, int distinct, int nulls) {
        List<Object[]> rows = shell.executeStatement("DESCRIBE FORMATTED " + tableName + " " + colName);
        Assert.assertEquals((Object)"min", (Object)rows.get(2)[0]);
        Assert.assertEquals((Object)String.valueOf(minValue), (Object)rows.get(2)[1]);
        Assert.assertEquals((Object)"max", (Object)rows.get(3)[0]);
        Assert.assertEquals((Object)String.valueOf(maxValue), (Object)rows.get(3)[1]);
        Assert.assertEquals((Object)"num_nulls", (Object)rows.get(4)[0]);
        Assert.assertEquals((Object)String.valueOf(nulls), (Object)rows.get(4)[1]);
        Assert.assertEquals((Object)"distinct_count", (Object)rows.get(5)[0]);
        Assert.assertEquals((Object)String.valueOf(distinct), (Object)rows.get(5)[1]);
    }
}

