/*
 * Decompiled with CFR 0.152.
 */
package org.apache.impala.util;

import com.google.common.base.Preconditions;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DataFiles;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
import org.apache.impala.catalog.iceberg.GroupedContentFiles;
import org.apache.impala.thrift.TIcebergOptimizationMode;
import org.apache.impala.util.IcebergOptimizeFileFilter;
import org.junit.Assert;
import org.junit.Test;

public class IcebergFileFilterTest {
    private static final List<Integer> fileSizes = Arrays.asList(2, 10, 11, 100, 101, 200, 222, 250);
    private static PartitionSpec partitionSpec = IcebergFileFilterTest.buildPartitionSpec();

    private static PartitionSpec buildPartitionSpec() {
        Schema schema = new Schema(new Types.NestedField[]{Types.NestedField.required((int)1, (String)"a", (Type)Types.IntegerType.get()), Types.NestedField.required((int)2, (String)"b", (Type)Types.StringType.get()), Types.NestedField.required((int)3, (String)"data", (Type)Types.IntegerType.get())});
        return PartitionSpec.builderFor((Schema)schema).identity("a").build();
    }

    private static DataFile buildDataFile(long fileSizeInBytes, String path, String filename) {
        DataFile df = DataFiles.builder((PartitionSpec)partitionSpec).withPath(path + "/" + filename + ".parquet").withFileSizeInBytes(fileSizeInBytes).withPartitionPath(path).withRecordCount(1L).build();
        return df;
    }

    private void checkFiltering(GroupedContentFiles contentFiles, int fileSizeThreshold, TIcebergOptimizationMode expectedMode, Set<String> expectedPaths) {
        IcebergOptimizeFileFilter.FileFilteringResult result = IcebergOptimizeFileFilter.filterFilesBySize((IcebergOptimizeFileFilter.FilterArgs)new IcebergOptimizeFileFilter.FilterArgs(contentFiles, (long)fileSizeThreshold));
        Assert.assertEquals((long)result.getSelectedFilesWithoutDeletes().size(), (long)(expectedPaths != null ? (long)expectedPaths.size() : 0L));
        Assert.assertEquals((Object)result.getOptimizationMode(), (Object)expectedMode);
        if (expectedMode == TIcebergOptimizationMode.PARTIAL) {
            Preconditions.checkState((expectedPaths != null ? 1 : 0) != 0);
            for (DataFile df : result.getSelectedFilesWithoutDeletes()) {
                Assert.assertTrue((boolean)expectedPaths.contains(df.path()));
            }
        } else {
            Preconditions.checkState((expectedPaths == null ? 1 : 0) != 0);
            Assert.assertTrue((boolean)result.getSelectedFilesWithoutDeletes().isEmpty());
        }
    }

    @Test
    public void testUnpartitioned() {
        GroupedContentFiles contentFiles = new GroupedContentFiles();
        Iterator<Integer> iterator = fileSizes.iterator();
        while (iterator.hasNext()) {
            long fileSize = iterator.next().intValue();
            DataFile df = IcebergFileFilterTest.buildDataFile(fileSize, "a=1", "size_" + fileSize);
            contentFiles.dataFilesWithoutDeletes.add(df);
        }
        this.checkFiltering(contentFiles, 0, TIcebergOptimizationMode.NOOP, null);
        this.checkFiltering(contentFiles, 2, TIcebergOptimizationMode.NOOP, null);
        HashSet<String> filePaths = new HashSet<String>();
        Collections.addAll(filePaths, "a=1/size_2.parquet", "a=1/size_10.parquet", "a=1/size_11.parquet");
        this.checkFiltering(contentFiles, 100, TIcebergOptimizationMode.PARTIAL, filePaths);
        this.checkFiltering(contentFiles, 500, TIcebergOptimizationMode.REWRITE_ALL, null);
    }

    @Test
    public void testPartitioned() {
        GroupedContentFiles contentFiles = new GroupedContentFiles();
        for (int i = 0; i < fileSizes.size(); ++i) {
            int size = fileSizes.get(i);
            DataFile df = IcebergFileFilterTest.buildDataFile(size, "a=" + i % 3, "size_" + size);
            contentFiles.dataFilesWithoutDeletes.add(df);
        }
        contentFiles.dataFilesWithoutDeletes.add(IcebergFileFilterTest.buildDataFile(100L, "a=3", "size_100"));
        contentFiles.dataFilesWithoutDeletes.add(IcebergFileFilterTest.buildDataFile(120L, "a=4", "size_120"));
        this.checkFiltering(contentFiles, 5, TIcebergOptimizationMode.NOOP, null);
        contentFiles.dataFilesWithDeletes.add(IcebergFileFilterTest.buildDataFile(10L, "a=1", "d10"));
        contentFiles.dataFilesWithDeletes.add(IcebergFileFilterTest.buildDataFile(100L, "a=4", "d100"));
        HashSet<String> filePaths = new HashSet<String>();
        this.checkFiltering(contentFiles, 0, TIcebergOptimizationMode.PARTIAL, filePaths);
        filePaths.add("a=1/size_10.parquet");
        this.checkFiltering(contentFiles, 12, TIcebergOptimizationMode.PARTIAL, filePaths);
        Collections.addAll(filePaths, "a=0/size_2.parquet", "a=0/size_100.parquet", "a=1/size_101.parquet", "a=4/size_120.parquet");
        this.checkFiltering(contentFiles, 200, TIcebergOptimizationMode.PARTIAL, filePaths);
        Collections.addAll(filePaths, "a=0/size_222.parquet", "a=1/size_250.parquet", "a=2/size_11.parquet", "a=2/size_200.parquet");
        this.checkFiltering(contentFiles, 500, TIcebergOptimizationMode.PARTIAL, filePaths);
    }
}

