/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.fetcher;

import java.io.File;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.NutchWritable;
import org.apache.nutch.fetcher.FetchItemQueues;
import org.apache.nutch.fetcher.FetcherOutputFormat;
import org.apache.nutch.fetcher.FetcherThread;
import org.apache.nutch.fetcher.QueueFeeder;
import org.apache.nutch.util.MimeUtil;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.NutchTool;
import org.apache.nutch.util.TimingUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class Fetcher
extends NutchTool
implements Tool {
    public static final int PERM_REFRESH_TIME = 5;
    public static final String CONTENT_REDIR = "content";
    public static final String PROTOCOL_REDIR = "protocol";
    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

    public Fetcher() {
        super(null);
    }

    public Fetcher(Configuration conf) {
        super(conf);
    }

    public static boolean isParsing(Configuration conf) {
        return conf.getBoolean("fetcher.parse", true);
    }

    public static boolean isStoringContent(Configuration conf) {
        return conf.getBoolean("fetcher.store.content", true);
    }

    public void fetch(Path segment, int threads) throws IOException, InterruptedException, ClassNotFoundException {
        this.checkConfiguration();
        StopWatch stopWatch = new StopWatch();
        stopWatch.start();
        LOG.info("Fetcher: starting");
        LOG.info("Fetcher: segment: {}", (Object)segment);
        long timelimit = this.getConf().getLong("fetcher.timelimit.mins", -1L);
        if (timelimit != -1L) {
            timelimit = System.currentTimeMillis() + timelimit * 60L * 1000L;
            LOG.info("Fetcher Timelimit set for : {}  ({})", (Object)timelimit, (Object)TimingUtil.logDateMillis(timelimit));
            this.getConf().setLong("fetcher.timelimit", timelimit);
        }
        timelimit = this.getConf().getLong("fetcher.throughput.threshold.check.after", 10L);
        timelimit = System.currentTimeMillis() + timelimit * 60L * 1000L;
        this.getConf().setLong("fetcher.throughput.threshold.check.after", timelimit);
        int maxOutlinkDepth = this.getConf().getInt("fetcher.follow.outlinks.depth", -1);
        if (maxOutlinkDepth > 0) {
            LOG.info("Fetcher: following outlinks up to depth: {}", (Object)maxOutlinkDepth);
            int maxOutlinkDepthNumLinks = this.getConf().getInt("fetcher.follow.outlinks.num.links", 4);
            int outlinksDepthDivisor = this.getConf().getInt("fetcher.follow.outlinks.depth.divisor", 2);
            int totalOutlinksToFollow = 0;
            for (int i = 0; i < maxOutlinkDepth; ++i) {
                totalOutlinksToFollow += (int)Math.floor(outlinksDepthDivisor / (i + 1) * maxOutlinkDepthNumLinks);
            }
            LOG.info("Fetcher: maximum outlinks to follow: {}", (Object)totalOutlinksToFollow);
        }
        Job job = Job.getInstance((Configuration)this.getConf(), (String)("Nutch Fetcher: " + segment.getName()));
        job.setJobName("FetchData");
        Configuration conf = job.getConfiguration();
        conf.setInt("fetcher.threads.fetch", threads);
        conf.set("nutch.segment.name", segment.getName());
        conf.set("mapreduce.map.speculative", "false");
        FileInputFormat.addInputPath((Job)job, (Path)new Path(segment, "crawl_generate"));
        job.setInputFormatClass(InputFormat.class);
        job.setJarByClass(Fetcher.class);
        job.setMapperClass(FetcherRun.class);
        FileOutputFormat.setOutputPath((Job)job, (Path)segment);
        job.setOutputFormatClass(FetcherOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NutchWritable.class);
        try {
            boolean success = job.waitForCompletion(true);
            if (!success) {
                String message = NutchJob.getJobFailureLogMessage("Fetcher", job);
                LOG.error(message);
                throw new RuntimeException(message);
            }
        }
        catch (ClassNotFoundException | InterruptedException e) {
            LOG.error(StringUtils.stringifyException((Throwable)e));
            throw e;
        }
        stopWatch.stop();
        LOG.info("Fetcher: finished, elapsed: {} ms", (Object)stopWatch.getTime(TimeUnit.MILLISECONDS));
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run((Configuration)NutchConfiguration.create(), (Tool)new Fetcher(), (String[])args);
        System.exit(res);
    }

    public int run(String[] args) throws Exception {
        String usage = "Usage: Fetcher <segment> [-threads n]";
        if (args.length < 1) {
            System.err.println(usage);
            return -1;
        }
        Path segment = new Path(args[0]);
        int threads = this.getConf().getInt("fetcher.threads.fetch", 10);
        for (int i = 1; i < args.length; ++i) {
            if (!args[i].equals("-threads")) continue;
            threads = Integer.parseInt(args[++i]);
        }
        this.getConf().setInt("fetcher.threads.fetch", threads);
        try {
            this.fetch(segment, threads);
            return 0;
        }
        catch (Exception e) {
            LOG.error("Fetcher: {}", (Object)StringUtils.stringifyException((Throwable)e));
            return -1;
        }
    }

    private void checkConfiguration() {
        String agentName = this.getConf().get("http.agent.name");
        if (agentName == null || agentName.trim().length() == 0) {
            String message = "Fetcher: No agents listed in 'http.agent.name' property.";
            LOG.error(message);
            throw new IllegalArgumentException(message);
        }
    }

    @Override
    public Map<String, Object> run(Map<String, Object> args, String crawlId) throws Exception {
        HashMap<String, Object> results = new HashMap<String, Object>();
        Path segment = null;
        if (args.containsKey("segment")) {
            Object seg = args.get("segment");
            if (seg instanceof Path) {
                segment = (Path)seg;
            } else if (seg instanceof String) {
                segment = new Path(seg.toString());
            }
        } else {
            String segmentDir = crawlId + "/segments";
            File segmentsDir = new File(segmentDir);
            File[] segmentsList = segmentsDir.listFiles();
            Arrays.sort(segmentsList, (f1, f2) -> {
                if (f1.lastModified() > f2.lastModified()) {
                    return -1;
                }
                return 0;
            });
            segment = new Path(segmentsList[0].getPath());
        }
        int threads = this.getConf().getInt("fetcher.threads.fetch", 10);
        if (args.containsKey("threads")) {
            threads = Integer.parseInt((String)args.get("threads"));
        }
        this.getConf().setInt("fetcher.threads.fetch", threads);
        try {
            this.fetch(segment, threads);
            results.put("result", Integer.toString(0));
            return results;
        }
        catch (Exception e) {
            LOG.error("Fetcher: {}", (Object)StringUtils.stringifyException((Throwable)e));
            results.put("result", Integer.toString(-1));
            return results;
        }
    }

    public static class FetcherRun
    extends Mapper<Text, CrawlDatum, Text, NutchWritable> {
        private String segmentName;
        private AtomicInteger activeThreads = new AtomicInteger(0);
        private AtomicInteger spinWaiting = new AtomicInteger(0);
        private long start = System.currentTimeMillis();
        private AtomicLong lastRequestStart = new AtomicLong(this.start);
        private AtomicLong bytes = new AtomicLong(0L);
        private AtomicInteger pages = new AtomicInteger(0);
        private AtomicInteger errors = new AtomicInteger(0);
        private boolean storingContent;
        private boolean parsing;
        private Counter bytesDownloadedCounter;
        private Counter hitByThroughputThresholdCounter;
        private Counter hitByTimelimitCounter;
        private Counter hungThreadsCounter;
        private Counter hitByTimeoutCounter;

        private AtomicInteger getActiveThreads() {
            return this.activeThreads;
        }

        private void reportStatus(Mapper.Context context, FetchItemQueues fetchQueues, int pagesLastSec, int bytesLastSec) throws IOException {
            StringBuilder status = new StringBuilder();
            Long elapsed = (System.currentTimeMillis() - this.start) / 1000L;
            float avgPagesSec = (float)this.pages.get() / elapsed.floatValue();
            long avgBytesSec = this.bytes.get() / 128L / elapsed;
            status.append(this.activeThreads).append(" threads (").append(this.spinWaiting.get()).append(" waiting), ");
            status.append(fetchQueues.getQueueCount()).append(" queues, ");
            if (fetchQueues.maxExceptionsPerQueue != -1 && fetchQueues.getQueueCountMaxExceptions() > 0) {
                status.append(fetchQueues.getQueueCountMaxExceptions()).append(" queues.max.except., ");
            }
            status.append(fetchQueues.getTotalSize()).append(" URLs queued, ");
            status.append(this.pages).append(" pages, ").append(this.errors).append(" errors, ");
            status.append(String.format("%.2f", Float.valueOf(avgPagesSec))).append(" pages/s (");
            status.append(pagesLastSec).append(" last sec), ");
            status.append(avgBytesSec).append(" kbits/s (").append(bytesLastSec / 128).append(" last sec)");
            context.setStatus(status.toString());
        }

        public void setup(Mapper.Context context) {
            Configuration conf = context.getConfiguration();
            this.segmentName = conf.get("nutch.segment.name");
            this.storingContent = Fetcher.isStoringContent(conf);
            this.parsing = Fetcher.isParsing(conf);
        }

        private void initCounters(Mapper.Context context) {
            this.bytesDownloadedCounter = context.getCounter("nutch_fetcher", "bytes_downloaded_total");
            this.hitByThroughputThresholdCounter = context.getCounter("nutch_fetcher", "hit_by_throughput_threshold_total");
            this.hitByTimelimitCounter = context.getCounter("nutch_fetcher", "hit_by_timelimit_total");
            this.hungThreadsCounter = context.getCounter("nutch_fetcher", "hung_threads_total");
            this.hitByTimeoutCounter = context.getCounter("nutch_fetcher", "hit_by_timeout_total");
        }

        /*
         * WARNING - Removed try catching itself - possible behaviour change.
         */
        public void run(Mapper.Context innerContext) throws IOException, InterruptedException {
            this.setup(innerContext);
            this.initCounters(innerContext);
            try {
                int bandwidthTargetCheckEveryNSecs;
                Configuration conf = innerContext.getConfiguration();
                LinkedList<FetcherThread> fetcherThreads = new LinkedList<FetcherThread>();
                FetchItemQueues fetchQueues = new FetchItemQueues(conf);
                int threadCount = conf.getInt("fetcher.threads.fetch", 10);
                LOG.info("Fetcher: threads: {}", (Object)threadCount);
                MimeUtil.setPoolSize(Math.max(10, threadCount / 2));
                int timeoutDivisor = conf.getInt("fetcher.threads.timeout.divisor", 2);
                LOG.info("Fetcher: time-out divisor: {}", (Object)timeoutDivisor);
                int queueDepthMultiplier = conf.getInt("fetcher.queue.depth.multiplier", 50);
                QueueFeeder feeder = new QueueFeeder(innerContext, fetchQueues, threadCount * queueDepthMultiplier);
                feeder.start();
                int startDelay = conf.getInt("fetcher.threads.start.delay", 10);
                for (int i = 0; i < threadCount; ++i) {
                    if (startDelay > 0 && i > 0) {
                        Thread.sleep(startDelay);
                    }
                    FetcherThread t = new FetcherThread(conf, this.getActiveThreads(), fetchQueues, feeder, this.spinWaiting, this.lastRequestStart, innerContext, this.errors, this.segmentName, this.parsing, this.storingContent, this.pages, this.bytes);
                    fetcherThreads.add(t);
                    t.start();
                }
                long timeout = conf.getInt("mapreduce.task.timeout", 600000) / timeoutDivisor;
                int throughputThresholdNumRetries = 0;
                int throughputThresholdPages = conf.getInt("fetcher.throughput.threshold.pages", -1);
                LOG.info("Fetcher: throughput threshold: {}", (Object)throughputThresholdPages);
                int throughputThresholdMaxRetries = conf.getInt("fetcher.throughput.threshold.retries", 5);
                LOG.info("Fetcher: throughput threshold retries: {}", (Object)throughputThresholdMaxRetries);
                long throughputThresholdTimeLimit = conf.getLong("fetcher.throughput.threshold.check.after", -1L);
                int targetBandwidth = conf.getInt("fetcher.bandwidth.target", -1) * 1000;
                int maxNumThreads = conf.getInt("fetcher.maxNum.threads", threadCount);
                if (maxNumThreads < threadCount) {
                    LOG.info("fetcher.maxNum.threads can't be < than {} : using {} instead", (Object)threadCount, (Object)threadCount);
                    maxNumThreads = threadCount;
                }
                if ((bandwidthTargetCheckEveryNSecs = conf.getInt("fetcher.bandwidth.target.check.everyNSecs", 30)) < 1) {
                    LOG.info("fetcher.bandwidth.target.check.everyNSecs can't be < to 1 : using 1 instead");
                    bandwidthTargetCheckEveryNSecs = 1;
                }
                int maxThreadsPerQueue = conf.getInt("fetcher.threads.per.queue", 1);
                int bandwidthTargetCheckCounter = 0;
                long bytesAtLastBWTCheck = 0L;
                do {
                    int hitByTimeLimit;
                    int pagesLastSec = this.pages.get();
                    int bytesLastSec = (int)this.bytes.get();
                    try {
                        Thread.sleep(1000L);
                    }
                    catch (InterruptedException interruptedException) {
                        // empty catch block
                    }
                    pagesLastSec = this.pages.get() - pagesLastSec;
                    bytesLastSec = (int)this.bytes.get() - bytesLastSec;
                    this.bytesDownloadedCounter.increment((long)bytesLastSec);
                    this.reportStatus(innerContext, fetchQueues, pagesLastSec, bytesLastSec);
                    LOG.info("-activeThreads={}, spinWaiting={}, fetchQueues.totalSize={}, fetchQueues.getQueueCount={}", new Object[]{this.activeThreads, this.spinWaiting.get(), fetchQueues.getTotalSize(), fetchQueues.getQueueCount()});
                    if (!feeder.isAlive() && fetchQueues.getTotalSize() < 5) {
                        fetchQueues.dump();
                    }
                    if (throughputThresholdTimeLimit < System.currentTimeMillis() && throughputThresholdPages != -1 && pagesLastSec < throughputThresholdPages) {
                        LOG.warn("{}: dropping below configured threshold of {} pages per second (current throughput: {} pages/sec.)", new Object[]{++throughputThresholdNumRetries, throughputThresholdPages, pagesLastSec});
                        if (throughputThresholdNumRetries == throughputThresholdMaxRetries) {
                            LOG.warn("Dropped below threshold {} times, dropping fetch queues to shut down", (Object)throughputThresholdNumRetries);
                            throughputThresholdPages = -1;
                            int hitByThrougputThreshold = fetchQueues.emptyQueues();
                            if (hitByThrougputThreshold != 0) {
                                this.hitByThroughputThresholdCounter.increment((long)hitByThrougputThreshold);
                            }
                        }
                    }
                    if (targetBandwidth > 0) {
                        if (bandwidthTargetCheckCounter < bandwidthTargetCheckEveryNSecs) {
                            ++bandwidthTargetCheckCounter;
                        } else if (bandwidthTargetCheckCounter == bandwidthTargetCheckEveryNSecs) {
                            long bpsSinceLastCheck = (this.bytes.get() - bytesAtLastBWTCheck) * 8L / (long)bandwidthTargetCheckEveryNSecs;
                            bytesAtLastBWTCheck = this.bytes.get();
                            bandwidthTargetCheckCounter = 0;
                            int averageBdwPerThread = 0;
                            if (this.activeThreads.get() > 0) {
                                averageBdwPerThread = (int)(bpsSinceLastCheck / (long)this.activeThreads.get());
                            }
                            LOG.info("averageBdwPerThread : {} kbps", (Object)(averageBdwPerThread / 1000));
                            if (bpsSinceLastCheck < (long)targetBandwidth && averageBdwPerThread > 0) {
                                if (fetchQueues.getQueueCount() * maxThreadsPerQueue > this.activeThreads.get()) {
                                    long remainingBdw = (long)targetBandwidth - bpsSinceLastCheck;
                                    int additionalThreads = Math.round(remainingBdw / (long)averageBdwPerThread);
                                    int availableThreads = maxNumThreads - this.activeThreads.get();
                                    additionalThreads = availableThreads < additionalThreads ? availableThreads : additionalThreads;
                                    LOG.info("Has space for more threads ({} vs {} kbps) \t=> adding {} new threads", new Object[]{bpsSinceLastCheck / 1000L, targetBandwidth / 1000, additionalThreads});
                                    for (int i = 0; i < additionalThreads; ++i) {
                                        FetcherThread thread = new FetcherThread(conf, this.getActiveThreads(), fetchQueues, feeder, this.spinWaiting, this.lastRequestStart, innerContext, this.errors, this.segmentName, this.parsing, this.storingContent, this.pages, this.bytes);
                                        fetcherThreads.add(thread);
                                        thread.start();
                                    }
                                }
                            } else if (bpsSinceLastCheck > (long)targetBandwidth && averageBdwPerThread > 0) {
                                long excessBdw = bpsSinceLastCheck - (long)targetBandwidth;
                                int excessThreads = Math.round(excessBdw / (long)averageBdwPerThread);
                                LOG.info("Exceeding target bandwidth ({} vs {} kbps). \t=> excessThreads = {}", new Object[]{bpsSinceLastCheck / 1000L, targetBandwidth / 1000, excessThreads});
                                if (excessThreads >= fetcherThreads.size()) {
                                    excessThreads = 0;
                                }
                                for (int i = 0; i < excessThreads; ++i) {
                                    FetcherThread thread = (FetcherThread)fetcherThreads.removeLast();
                                    thread.setHalted(true);
                                }
                            }
                        }
                    }
                    if (!feeder.isAlive() && (hitByTimeLimit = fetchQueues.checkTimelimit()) != 0) {
                        this.hitByTimelimitCounter.increment((long)hitByTimeLimit);
                    }
                    if (System.currentTimeMillis() - this.lastRequestStart.get() <= timeout) continue;
                    LOG.warn("Timeout reached with no new requests since {} seconds.", (Object)timeout);
                    LOG.warn("Aborting with {} hung threads{}.", (Object)this.activeThreads, (Object)(feeder.isAlive() ? " (queue feeder still alive)" : ""));
                    this.hungThreadsCounter.increment((long)this.activeThreads.get());
                    for (int i = 0; i < fetcherThreads.size(); ++i) {
                        FetcherThread thread = (FetcherThread)fetcherThreads.get(i);
                        if (!thread.isAlive()) continue;
                        LOG.warn("Thread #{} hung while processing {}", (Object)i, (Object)thread.getReprUrl());
                        StackTraceElement[] stack = thread.getStackTrace();
                        StringBuilder sb = new StringBuilder();
                        sb.append("Stack of thread #").append(i).append(":\n");
                        for (StackTraceElement s : stack) {
                            sb.append(s.toString()).append('\n');
                        }
                        LOG.warn(sb.toString());
                    }
                    fetchQueues.setTimeoutReached();
                    if (feeder.isAlive()) {
                        LOG.info("Signaled QueueFeeder to stop, waiting 1.5 seconds before exiting.");
                        Thread.sleep(1500L);
                    }
                    LOG.warn("Aborting with {} queued fetch items in {} queues{}.", new Object[]{fetchQueues.getTotalSize(), fetchQueues.getQueueCount(), feeder.isAlive() ? " (queue feeder still alive)" : ""});
                    int hitByTimeout = fetchQueues.emptyQueues();
                    this.hitByTimeoutCounter.increment((long)hitByTimeout);
                    return;
                } while (this.activeThreads.get() > 0);
                LOG.info("-activeThreads={}", (Object)this.activeThreads);
            }
            finally {
                this.cleanup(innerContext);
            }
        }
    }

    public static class InputFormat
    extends SequenceFileInputFormat<Text, CrawlDatum> {
        public List<InputSplit> getSplits(JobContext job) throws IOException {
            List files = this.listStatus(job);
            ArrayList<InputSplit> splits = new ArrayList<InputSplit>();
            for (FileStatus cur : files) {
                splits.add((InputSplit)new FileSplit(cur.getPath(), 0L, cur.getLen(), (String[])null));
            }
            return splits;
        }
    }
}

