package org.sonatype.nexus.proxy.maven.routing.internal.scrape;

import ch.qos.logback.classic.spi.CallerData;
import com.sun.xml.fastinfoset.EncodingConstants;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import javax.inject.Named;
import javax.inject.Singleton;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.sonatype.nexus.apachehttpclient.page.Page;
import org.sonatype.nexus.proxy.maven.routing.internal.scrape.AbstractScraper;
import org.sonatype.nexus.proxy.maven.routing.internal.task.CancelableUtil;
import org.sonatype.nexus.util.PathUtils;

@Singleton
@Named(AmazonS3IndexScraper.ID)
/* loaded from: input_file:WEB-INF/lib/nexus-core-2.14.18-01.jar:org/sonatype/nexus/proxy/maven/routing/internal/scrape/AmazonS3IndexScraper.class */
public class AmazonS3IndexScraper extends AbstractScraper {
    protected static final String ID = "amazons3-index";

    public AmazonS3IndexScraper() {
        super(4000, ID);
    }

    protected String getTargetedServer() {
        return "Amazon S3";
    }

    @Override // org.sonatype.nexus.proxy.maven.routing.internal.scrape.AbstractScraper
    protected AbstractScraper.RemoteDetectionResult detectRemoteRepository(ScrapeContext scrapeContext, Page page) {
        return (page.hasHeaderAndEqualsWith("Server", "AmazonS3") && page.hasHeader("x-amz-request-id")) ? isAccessDeniedResponse(page) ? new AbstractScraper.RemoteDetectionResult(AbstractScraper.RemoteDetectionOutcome.RECOGNIZED_SHOULD_NOT_BE_SCRAPED, getTargetedServer(), "Bucket is not publicly accessible.") : new AbstractScraper.RemoteDetectionResult(AbstractScraper.RemoteDetectionOutcome.RECOGNIZED_SHOULD_BE_SCRAPED, getTargetedServer(), "Should be scraped.") : new AbstractScraper.RemoteDetectionResult(AbstractScraper.RemoteDetectionOutcome.UNRECOGNIZED, getTargetedServer(), "Remote is not " + getTargetedServer());
    }

    @Override // org.sonatype.nexus.proxy.maven.routing.internal.scrape.AbstractScraper
    protected List<String> diveIn(ScrapeContext scrapeContext, Page page) throws IOException {
        String str = null;
        Page page2 = page;
        String url = page.getUrl();
        if (page2.getHttpResponse().getStatusLine().getStatusCode() != 200) {
            str = getKeyFromNoSuchKeyResponse(page2);
            if (str == null) {
                this.log.info("Unexpected S3 response from remote of {}, cannot scrape this: {}", scrapeContext.getProxyRepository(), page2.getDocument().outerHtml());
                scrapeContext.stop("Remote recognized as " + getTargetedServer() + ", but unexpected response code and response body received (see logs).");
                return null;
            }
            url = scrapeContext.getRemoteRepositoryRootUrl().substring(0, scrapeContext.getRemoteRepositoryRootUrl().length() - str.length());
            this.log.debug("Retrying URL {} to scrape remote of {} on URL {}", url, scrapeContext.getProxyRepository(), scrapeContext.getRemoteRepositoryRootUrl());
            page2 = Page.getPageFor(scrapeContext, url + "?prefix=" + str);
        }
        HashSet hashSet = new HashSet();
        diveIn(scrapeContext, page2, url, str, hashSet);
        return new ArrayList(hashSet);
    }

    protected void diveIn(ScrapeContext scrapeContext, Page page, String str, String str2, Set<String> set) throws IOException {
        boolean isTruncated;
        Page page2 = page;
        do {
            isTruncated = isTruncated(page2);
            CancelableUtil.checkInterruption();
            if (page2.getHttpResponse().getStatusLine().getStatusCode() != 200) {
                scrapeContext.stop("Remote recognized as " + getTargetedServer() + ", but cannot be scraped (unexpected response status " + page2.getHttpResponse().getStatusLine() + ")");
                return;
            }
            Elements elementsByTag = page2.getDocument().getElementsByTag("ListBucketResult");
            if (elementsByTag.size() != 1 || !elementsByTag.get(0).attr(EncodingConstants.XMLNS_NAMESPACE_PREFIX).equals("http://s3.amazonaws.com/doc/2006-03-01/")) {
                scrapeContext.stop("Remote recognized as " + getTargetedServer() + ", but unexpected response was received (not \"ListBucketResult\").");
                return;
            }
            this.log.debug("Processing S3 page response from remote of {} got from URL {}", scrapeContext.getProxyRepository(), page2.getUrl());
            String str3 = null;
            Iterator<Element> it = page2.getDocument().getElementsByTag("Contents").iterator();
            while (it.hasNext()) {
                Element next = it.next();
                Elements elementsByTag2 = next.getElementsByTag("Key");
                if (!elementsByTag2.isEmpty()) {
                    Elements elementsByTag3 = next.getElementsByTag("Size");
                    if (!elementsByTag3.isEmpty()) {
                        String text = elementsByTag2.get(0).text();
                        if (!text.startsWith(".") && !text.contains("/.") && Long.parseLong(elementsByTag3.get(0).text()) > 0) {
                            str3 = text;
                            set.add(PathUtils.pathFrom(PathUtils.elementsOf(str2 != null ? text.substring(str2.length()) : text), scrapeContext.getScrapeDepth()));
                        }
                    }
                }
            }
            if (isTruncated) {
                CancelableUtil.checkInterruption();
                ArrayList arrayList = new ArrayList();
                if (str2 != null) {
                    arrayList.add("prefix=" + str2);
                }
                if (str3 != null) {
                    arrayList.add("marker=" + str3);
                }
                page2 = Page.getPageFor(scrapeContext, appendParameters(str, arrayList));
            }
        } while (isTruncated);
    }

    protected String appendParameters(String str, List<String> list) {
        StringBuilder sb = new StringBuilder(str);
        boolean z = true;
        for (String str2 : list) {
            if (z) {
                sb.append(CallerData.NA);
                z = false;
            } else {
                sb.append("&");
            }
            sb.append(str2);
        }
        return sb.toString();
    }

    protected boolean isAccessDeniedResponse(Page page) {
        return page.getHttpResponse().getStatusLine().getStatusCode() == 403;
    }

    protected String getKeyFromNoSuchKeyResponse(Page page) {
        if (page.getHttpResponse().getStatusLine().getStatusCode() != 404) {
            return null;
        }
        Elements elementsByTag = page.getDocument().getElementsByTag("Error");
        Elements elements = elementsByTag.isEmpty() ? new Elements() : elementsByTag.get(0).getElementsByTag("Code");
        Elements elements2 = elementsByTag.isEmpty() ? new Elements() : elementsByTag.get(0).getElementsByTag("Key");
        if (elementsByTag.size() == 1 && elements.size() == 1 && "NoSuchKey".equals(elements.get(0).text()) && elements2.size() == 1) {
            return elements2.get(0).text();
        }
        return null;
    }

    protected boolean isTruncated(Page page) {
        Elements elementsByTag = page.getDocument().getElementsByTag("ListBucketResult");
        Elements elements = elementsByTag.isEmpty() ? new Elements() : elementsByTag.get(0).getElementsByTag("IsTruncated");
        return elementsByTag.size() == 1 && elements.size() == 1 && "true".equals(elements.get(0).text());
    }
}
