package org.apache.hadoop.mapreduce.lib.input;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.mapred.LocatedFileStatusFetcher;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.security.TokenCache;
import org.apache.hadoop.util.Lists;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.StopWatch;
import org.apache.hadoop.util.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@InterfaceStability.Stable
@InterfaceAudience.Public
/* loaded from: input_file:org/apache/hadoop/mapreduce/lib/input/FileInputFormat.class */
public abstract class FileInputFormat<K, V> extends InputFormat<K, V> {
    public static final String INPUT_DIR = "mapreduce.input.fileinputformat.inputdir";
    public static final String SPLIT_MAXSIZE = "mapreduce.input.fileinputformat.split.maxsize";
    public static final String SPLIT_MINSIZE = "mapreduce.input.fileinputformat.split.minsize";
    public static final String PATHFILTER_CLASS = "mapreduce.input.pathFilter.class";
    public static final String NUM_INPUT_FILES = "mapreduce.input.fileinputformat.numinputfiles";
    public static final String INPUT_DIR_RECURSIVE = "mapreduce.input.fileinputformat.input.dir.recursive";
    public static final String INPUT_DIR_NONRECURSIVE_IGNORE_SUBDIRS = "mapreduce.input.fileinputformat.input.dir.nonrecursive.ignore.subdirs";
    public static final String LIST_STATUS_NUM_THREADS = "mapreduce.input.fileinputformat.list-status.num-threads";
    public static final int DEFAULT_LIST_STATUS_NUM_THREADS = 1;
    private static final double SPLIT_SLOP = 1.1d;
    private static final Logger LOG = LoggerFactory.getLogger(FileInputFormat.class);
    private static final PathFilter hiddenFileFilter = new PathFilter() { // from class: org.apache.hadoop.mapreduce.lib.input.FileInputFormat.1
        @Override // org.apache.hadoop.fs.PathFilter
        public boolean accept(Path path) {
            String name = path.getName();
            return (name.startsWith("_") || name.startsWith(".")) ? false : true;
        }
    };

    @Deprecated
    /* loaded from: input_file:org/apache/hadoop/mapreduce/lib/input/FileInputFormat$Counter.class */
    public enum Counter {
        BYTES_READ
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/hadoop/mapreduce/lib/input/FileInputFormat$MultiPathFilter.class */
    public static class MultiPathFilter implements PathFilter {
        private List<PathFilter> filters;

        public MultiPathFilter(List<PathFilter> list) {
            this.filters = list;
        }

        @Override // org.apache.hadoop.fs.PathFilter
        public boolean accept(Path path) {
            Iterator<PathFilter> it = this.filters.iterator();
            while (it.hasNext()) {
                if (!it.next().accept(path)) {
                    return false;
                }
            }
            return true;
        }
    }

    public static void setInputDirRecursive(Job job, boolean z) {
        job.getConfiguration().setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", z);
    }

    public static boolean getInputDirRecursive(JobContext jobContext) {
        return jobContext.getConfiguration().getBoolean("mapreduce.input.fileinputformat.input.dir.recursive", false);
    }

    protected long getFormatMinSplitSize() {
        return 1L;
    }

    protected boolean isSplitable(JobContext jobContext, Path path) {
        return true;
    }

    public static void setInputPathFilter(Job job, Class<? extends PathFilter> cls) {
        job.getConfiguration().setClass(PATHFILTER_CLASS, cls, PathFilter.class);
    }

    public static void setMinInputSplitSize(Job job, long j) {
        job.getConfiguration().setLong(SPLIT_MINSIZE, j);
    }

    public static long getMinSplitSize(JobContext jobContext) {
        return jobContext.getConfiguration().getLong(SPLIT_MINSIZE, 1L);
    }

    public static void setMaxInputSplitSize(Job job, long j) {
        job.getConfiguration().setLong(SPLIT_MAXSIZE, j);
    }

    public static long getMaxSplitSize(JobContext jobContext) {
        return jobContext.getConfiguration().getLong(SPLIT_MAXSIZE, Long.MAX_VALUE);
    }

    public static PathFilter getInputPathFilter(JobContext jobContext) {
        Configuration configuration = jobContext.getConfiguration();
        Class cls = configuration.getClass(PATHFILTER_CLASS, null, PathFilter.class);
        if (cls != null) {
            return (PathFilter) ReflectionUtils.newInstance(cls, configuration);
        }
        return null;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public List<FileStatus> listStatus(JobContext jobContext) throws IOException {
        List<FileStatus> newArrayList;
        Path[] inputPaths = getInputPaths(jobContext);
        if (inputPaths.length == 0) {
            throw new IOException("No input paths specified in job");
        }
        TokenCache.obtainTokensForNamenodes(jobContext.getCredentials(), inputPaths, jobContext.getConfiguration());
        boolean inputDirRecursive = getInputDirRecursive(jobContext);
        ArrayList arrayList = new ArrayList();
        arrayList.add(hiddenFileFilter);
        PathFilter inputPathFilter = getInputPathFilter(jobContext);
        if (inputPathFilter != null) {
            arrayList.add(inputPathFilter);
        }
        MultiPathFilter multiPathFilter = new MultiPathFilter(arrayList);
        int i = jobContext.getConfiguration().getInt(LIST_STATUS_NUM_THREADS, 1);
        StopWatch start = new StopWatch().start();
        if (i == 1) {
            newArrayList = singleThreadedListStatus(jobContext, inputPaths, multiPathFilter, inputDirRecursive);
        } else {
            try {
                newArrayList = Lists.newArrayList(new LocatedFileStatusFetcher(jobContext.getConfiguration(), inputPaths, inputDirRecursive, multiPathFilter, true).getFileStatuses());
            } catch (InterruptedException e) {
                throw ((IOException) new InterruptedIOException("Interrupted while getting file statuses").initCause(e));
            }
        }
        start.stop();
        if (LOG.isDebugEnabled()) {
            LOG.debug("Time taken to get FileStatuses: " + start.now(TimeUnit.MILLISECONDS));
        }
        LOG.info("Total input files to process : " + newArrayList.size());
        return newArrayList;
    }

    private List<FileStatus> singleThreadedListStatus(JobContext jobContext, Path[] pathArr, PathFilter pathFilter, boolean z) throws IOException {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        for (Path path : pathArr) {
            FileSystem fileSystem = path.getFileSystem(jobContext.getConfiguration());
            FileStatus[] globStatus = fileSystem.globStatus(path, pathFilter);
            if (globStatus == null) {
                arrayList2.add(new IOException("Input path does not exist: " + path));
            } else if (globStatus.length == 0) {
                arrayList2.add(new IOException("Input Pattern " + path + " matches 0 files"));
            } else {
                for (FileStatus fileStatus : globStatus) {
                    if (fileStatus.isDirectory()) {
                        RemoteIterator<LocatedFileStatus> listLocatedStatus = fileSystem.listLocatedStatus(fileStatus.getPath());
                        while (listLocatedStatus.hasNext()) {
                            LocatedFileStatus next = listLocatedStatus.next();
                            if (pathFilter.accept(next.getPath())) {
                                if (z && next.isDirectory()) {
                                    addInputPathRecursively(arrayList, fileSystem, next.getPath(), pathFilter);
                                } else {
                                    arrayList.add(shrinkStatus(next));
                                }
                            }
                        }
                    } else {
                        arrayList.add(fileStatus);
                    }
                }
            }
        }
        if (arrayList2.isEmpty()) {
            return arrayList;
        }
        throw new InvalidInputException(arrayList2);
    }

    protected void addInputPathRecursively(List<FileStatus> list, FileSystem fileSystem, Path path, PathFilter pathFilter) throws IOException {
        try {
            RemoteIterator<LocatedFileStatus> listLocatedStatus = fileSystem.listLocatedStatus(path);
            while (listLocatedStatus.hasNext()) {
                LocatedFileStatus next = listLocatedStatus.next();
                if (pathFilter.accept(next.getPath())) {
                    if (next.isDirectory()) {
                        addInputPathRecursively(list, fileSystem, next.getPath(), pathFilter);
                    } else {
                        list.add(shrinkStatus(next));
                    }
                }
            }
        } catch (FileNotFoundException e) {
            FileUtil.maybeIgnoreMissingDirectory(fileSystem, path, e);
        }
    }

    public static FileStatus shrinkStatus(FileStatus fileStatus) {
        if (fileStatus.isDirectory() || fileStatus.getLen() == 0 || !(fileStatus instanceof LocatedFileStatus)) {
            return fileStatus;
        }
        BlockLocation[] blockLocations = ((LocatedFileStatus) fileStatus).getBlockLocations();
        BlockLocation[] blockLocationArr = new BlockLocation[blockLocations.length];
        int i = 0;
        for (BlockLocation blockLocation : blockLocations) {
            int i2 = i;
            i++;
            blockLocationArr[i2] = new BlockLocation(blockLocation);
        }
        return new LocatedFileStatus(fileStatus, blockLocationArr);
    }

    protected FileSplit makeSplit(Path path, long j, long j2, String[] strArr) {
        return new FileSplit(path, j, j2, strArr);
    }

    protected FileSplit makeSplit(Path path, long j, long j2, String[] strArr, String[] strArr2) {
        return new FileSplit(path, j, j2, strArr, strArr2);
    }

    @Override // org.apache.hadoop.mapreduce.InputFormat
    public List<InputSplit> getSplits(JobContext jobContext) throws IOException {
        long j;
        StopWatch start = new StopWatch().start();
        long max = Math.max(getFormatMinSplitSize(), getMinSplitSize(jobContext));
        long maxSplitSize = getMaxSplitSize(jobContext);
        ArrayList arrayList = new ArrayList();
        List<FileStatus> listStatus = listStatus(jobContext);
        boolean z = !getInputDirRecursive(jobContext) && jobContext.getConfiguration().getBoolean("mapreduce.input.fileinputformat.input.dir.nonrecursive.ignore.subdirs", false);
        for (FileStatus fileStatus : listStatus) {
            if (!z || !fileStatus.isDirectory()) {
                Path path = fileStatus.getPath();
                long len = fileStatus.getLen();
                if (len != 0) {
                    BlockLocation[] blockLocations = fileStatus instanceof LocatedFileStatus ? ((LocatedFileStatus) fileStatus).getBlockLocations() : path.getFileSystem(jobContext.getConfiguration()).getFileBlockLocations(fileStatus, 0L, len);
                    if (isSplitable(jobContext, path)) {
                        long computeSplitSize = computeSplitSize(fileStatus.getBlockSize(), max, maxSplitSize);
                        long j2 = len;
                        while (true) {
                            j = j2;
                            if (j / computeSplitSize <= SPLIT_SLOP) {
                                break;
                            }
                            int blockIndex = getBlockIndex(blockLocations, len - j);
                            arrayList.add(makeSplit(path, len - j, computeSplitSize, blockLocations[blockIndex].getHosts(), blockLocations[blockIndex].getCachedHosts()));
                            j2 = j - computeSplitSize;
                        }
                        if (j != 0) {
                            int blockIndex2 = getBlockIndex(blockLocations, len - j);
                            arrayList.add(makeSplit(path, len - j, j, blockLocations[blockIndex2].getHosts(), blockLocations[blockIndex2].getCachedHosts()));
                        }
                    } else {
                        if (LOG.isDebugEnabled() && len > Math.min(fileStatus.getBlockSize(), max)) {
                            LOG.debug("File is not splittable so no parallelization is possible: " + fileStatus.getPath());
                        }
                        arrayList.add(makeSplit(path, 0L, len, blockLocations[0].getHosts(), blockLocations[0].getCachedHosts()));
                    }
                } else {
                    arrayList.add(makeSplit(path, 0L, len, new String[0]));
                }
            }
        }
        jobContext.getConfiguration().setLong("mapreduce.input.fileinputformat.numinputfiles", listStatus.size());
        start.stop();
        if (LOG.isDebugEnabled()) {
            LOG.debug("Total # of splits generated by getSplits: " + arrayList.size() + ", TimeTaken: " + start.now(TimeUnit.MILLISECONDS));
        }
        return arrayList;
    }

    protected long computeSplitSize(long j, long j2, long j3) {
        return Math.max(j2, Math.min(j3, j));
    }

    protected int getBlockIndex(BlockLocation[] blockLocationArr, long j) {
        for (int i = 0; i < blockLocationArr.length; i++) {
            if (blockLocationArr[i].getOffset() <= j && j < blockLocationArr[i].getOffset() + blockLocationArr[i].getLength()) {
                return i;
            }
        }
        BlockLocation blockLocation = blockLocationArr[blockLocationArr.length - 1];
        long offset = (blockLocation.getOffset() + blockLocation.getLength()) - 1;
        IllegalArgumentException illegalArgumentException = new IllegalArgumentException("Offset " + j + " is outside of file (0.." + illegalArgumentException + ")");
        throw illegalArgumentException;
    }

    public static void setInputPaths(Job job, String str) throws IOException {
        setInputPaths(job, StringUtils.stringToPath(getPathStrings(str)));
    }

    public static void addInputPaths(Job job, String str) throws IOException {
        for (String str2 : getPathStrings(str)) {
            addInputPath(job, new Path(str2));
        }
    }

    public static void setInputPaths(Job job, Path... pathArr) throws IOException {
        Configuration configuration = job.getConfiguration();
        StringBuilder sb = new StringBuilder(StringUtils.escapeString(pathArr[0].getFileSystem(configuration).makeQualified(pathArr[0]).toString()));
        for (int i = 1; i < pathArr.length; i++) {
            sb.append(",");
            sb.append(StringUtils.escapeString(pathArr[i].getFileSystem(configuration).makeQualified(pathArr[i]).toString()));
        }
        configuration.set(INPUT_DIR, sb.toString());
    }

    public static void addInputPath(Job job, Path path) throws IOException {
        Configuration configuration = job.getConfiguration();
        String escapeString = StringUtils.escapeString(path.getFileSystem(configuration).makeQualified(path).toString());
        String str = configuration.get(INPUT_DIR);
        configuration.set(INPUT_DIR, str == null ? escapeString : str + "," + escapeString);
    }

    private static String[] getPathStrings(String str) {
        int length = str.length();
        int i = 0;
        int i2 = 0;
        boolean z = false;
        ArrayList arrayList = new ArrayList();
        for (int i3 = 0; i3 < length; i3++) {
            switch (str.charAt(i3)) {
                case ',':
                    if (z) {
                        break;
                    } else {
                        arrayList.add(str.substring(i2, i3));
                        i2 = i3 + 1;
                        break;
                    }
                case '{':
                    i++;
                    if (z) {
                        break;
                    } else {
                        z = true;
                        break;
                    }
                case '}':
                    i--;
                    if (i == 0 && z) {
                        z = false;
                        break;
                    }
                    break;
            }
        }
        arrayList.add(str.substring(i2, length));
        return (String[]) arrayList.toArray(new String[0]);
    }

    public static Path[] getInputPaths(JobContext jobContext) {
        String[] split = StringUtils.split(jobContext.getConfiguration().get(INPUT_DIR, ""));
        Path[] pathArr = new Path[split.length];
        for (int i = 0; i < split.length; i++) {
            pathArr[i] = new Path(StringUtils.unEscapeString(split[i]));
        }
        return pathArr;
    }
}
