001package org.jsoup.helper;
002
003import org.jsoup.Connection;
004import org.jsoup.HttpStatusException;
005import org.jsoup.Progress;
006import org.jsoup.UncheckedIOException;
007import org.jsoup.UnsupportedMimeTypeException;
008import org.jsoup.internal.ControllableInputStream;
009import org.jsoup.internal.Functions;
010import org.jsoup.internal.StringUtil;
011import org.jsoup.nodes.Document;
012import org.jsoup.parser.Parser;
013import org.jsoup.parser.StreamParser;
014import org.jsoup.parser.TokenQueue;
015import org.jspecify.annotations.Nullable;
016
017import javax.net.ssl.HttpsURLConnection;
018import javax.net.ssl.SSLSocketFactory;
019import java.io.BufferedInputStream;
020import java.io.BufferedReader;
021import java.io.BufferedWriter;
022import java.io.ByteArrayInputStream;
023import java.io.IOException;
024import java.io.InputStream;
025import java.io.InputStreamReader;
026import java.io.OutputStream;
027import java.io.OutputStreamWriter;
028import java.net.CookieManager;
029import java.net.CookieStore;
030import java.net.HttpURLConnection;
031import java.net.InetSocketAddress;
032import java.net.MalformedURLException;
033import java.net.Proxy;
034import java.net.URL;
035import java.net.URLEncoder;
036import java.nio.Buffer;
037import java.nio.ByteBuffer;
038import java.nio.charset.Charset;
039import java.nio.charset.IllegalCharsetNameException;
040import java.util.ArrayList;
041import java.util.Collection;
042import java.util.Collections;
043import java.util.LinkedHashMap;
044import java.util.List;
045import java.util.Map;
046import java.util.regex.Pattern;
047import java.util.zip.GZIPInputStream;
048import java.util.zip.Inflater;
049import java.util.zip.InflaterInputStream;
050
051import static org.jsoup.Connection.Method.HEAD;
052import static org.jsoup.helper.DataUtil.UTF_8;
053import static org.jsoup.internal.Normalizer.lowerCase;
054import static org.jsoup.internal.SharedConstants.DefaultBufferSize;
055
056/**
057 * Implementation of {@link Connection}.
058 * @see org.jsoup.Jsoup#connect(String)
059 */
060@SuppressWarnings("CharsetObjectCanBeUsed")
061public class HttpConnection implements Connection {
062    public static final String CONTENT_ENCODING = "Content-Encoding";
063    /**
064     * Many users would get caught by not setting a user-agent and therefore getting different responses on their desktop
065     * vs in jsoup, which would otherwise default to {@code Java}. So by default, use a desktop UA.
066     */
067    public static final String DEFAULT_UA =
068        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36";
069    private static final String USER_AGENT = "User-Agent";
070    public static final String CONTENT_TYPE = "Content-Type";
071    public static final String MULTIPART_FORM_DATA = "multipart/form-data";
072    public static final String FORM_URL_ENCODED = "application/x-www-form-urlencoded";
073    private static final int HTTP_TEMP_REDIR = 307; // http/1.1 temporary redirect, not in Java's set.
074    private static final String DefaultUploadType = "application/octet-stream";
075    private static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");
076
077    /**
078     Create a new Connection, with the request URL specified.
079     @param url the URL to fetch from
080     @return a new Connection object
081     */
082    public static Connection connect(String url) {
083        Connection con = new HttpConnection();
084        con.url(url);
085        return con;
086    }
087
088    /**
089     Create a new Connection, with the request URL specified.
090     @param url the URL to fetch from
091     @return a new Connection object
092     */
093    public static Connection connect(URL url) {
094        Connection con = new HttpConnection();
095        con.url(url);
096        return con;
097    }
098
099    /**
100     Create a new, empty HttpConnection.
101     */
102    public HttpConnection() {
103        req = new Request();
104    }
105
106    /**
107     Create a new Request by deep-copying an existing Request. Note that the data and body of the original are not
108     copied. All other settings (proxy, parser, cookies, etc) are copied.
109     @param copy the request to copy
110     */
111    HttpConnection(Request copy) {
112        req = new Request(copy);
113    }
114
115    private static String encodeMimeName(String val) {
116        return val.replace("\"", "%22");
117    }
118
119    private HttpConnection.Request req;
120    private Connection.@Nullable Response res;
121
122    @Override
123    public Connection newRequest() {
124        // copy the prototype request for the different settings, cookie manager, etc
125        return new HttpConnection(req);
126    }
127
128    /** Create a new Connection that just wraps the provided Request and Response */
129    private HttpConnection(Request req, Response res) {
130        this.req = req;
131        this.res = res;
132    }
133
134    @Override
135    public Connection url(URL url) {
136        req.url(url);
137        return this;
138    }
139
140    @Override
141    public Connection url(String url) {
142        Validate.notEmptyParam(url, "url");
143        try {
144            req.url(new URL(url));
145        } catch (MalformedURLException e) {
146            throw new IllegalArgumentException(String.format("The supplied URL, '%s', is malformed. Make sure it is an absolute URL, and starts with 'http://' or 'https://'. See https://jsoup.org/cookbook/extracting-data/working-with-urls", url), e);
147        }
148        return this;
149    }
150
151    @Override
152    public Connection proxy(@Nullable Proxy proxy) {
153        req.proxy(proxy);
154        return this;
155    }
156
157    @Override
158    public Connection proxy(String host, int port) {
159        req.proxy(host, port);
160        return this;
161    }
162
163    @Override
164    public Connection userAgent(String userAgent) {
165        Validate.notNullParam(userAgent, "userAgent");
166        req.header(USER_AGENT, userAgent);
167        return this;
168    }
169
170    @Override
171    public Connection timeout(int millis) {
172        req.timeout(millis);
173        return this;
174    }
175
176    @Override
177    public Connection maxBodySize(int bytes) {
178        req.maxBodySize(bytes);
179        return this;
180    }
181
182    @Override
183    public Connection followRedirects(boolean followRedirects) {
184        req.followRedirects(followRedirects);
185        return this;
186    }
187
188    @Override
189    public Connection referrer(String referrer) {
190        Validate.notNullParam(referrer, "referrer");
191        req.header("Referer", referrer);
192        return this;
193    }
194
195    @Override
196    public Connection method(Method method) {
197        req.method(method);
198        return this;
199    }
200
201    @Override
202    public Connection ignoreHttpErrors(boolean ignoreHttpErrors) {
203                req.ignoreHttpErrors(ignoreHttpErrors);
204                return this;
205        }
206
207    @Override
208    public Connection ignoreContentType(boolean ignoreContentType) {
209        req.ignoreContentType(ignoreContentType);
210        return this;
211    }
212
213    @Override
214    public Connection data(String key, String value) {
215        req.data(KeyVal.create(key, value));
216        return this;
217    }
218
219    @Override
220    public Connection sslSocketFactory(SSLSocketFactory sslSocketFactory) {
221            req.sslSocketFactory(sslSocketFactory);
222            return this;
223    }
224
225    @Override
226    public Connection data(String key, String filename, InputStream inputStream) {
227        req.data(KeyVal.create(key, filename, inputStream));
228        return this;
229    }
230
231    @Override
232    public Connection data(String key, String filename, InputStream inputStream, String contentType) {
233        req.data(KeyVal.create(key, filename, inputStream).contentType(contentType));
234        return this;
235    }
236
237    @Override
238    public Connection data(Map<String, String> data) {
239        Validate.notNullParam(data, "data");
240        for (Map.Entry<String, String> entry : data.entrySet()) {
241            req.data(KeyVal.create(entry.getKey(), entry.getValue()));
242        }
243        return this;
244    }
245
246    @Override
247    public Connection data(String... keyvals) {
248        Validate.notNullParam(keyvals, "keyvals");
249        Validate.isTrue(keyvals.length %2 == 0, "Must supply an even number of key value pairs");
250        for (int i = 0; i < keyvals.length; i += 2) {
251            String key = keyvals[i];
252            String value = keyvals[i+1];
253            Validate.notEmpty(key, "Data key must not be empty");
254            Validate.notNull(value, "Data value must not be null");
255            req.data(KeyVal.create(key, value));
256        }
257        return this;
258    }
259
260    @Override
261    public Connection data(Collection<Connection.KeyVal> data) {
262        Validate.notNullParam(data, "data");
263        for (Connection.KeyVal entry: data) {
264            req.data(entry);
265        }
266        return this;
267    }
268
269    @Override
270    public Connection.KeyVal data(String key) {
271        Validate.notEmptyParam(key, "key");
272        for (Connection.KeyVal keyVal : request().data()) {
273            if (keyVal.key().equals(key))
274                return keyVal;
275        }
276        return null;
277    }
278
279    @Override
280    public Connection requestBody(String body) {
281        req.requestBody(body);
282        return this;
283    }
284
285    @Override
286    public Connection header(String name, String value) {
287        req.header(name, value);
288        return this;
289    }
290
291    @Override
292    public Connection headers(Map<String,String> headers) {
293        Validate.notNullParam(headers, "headers");
294        for (Map.Entry<String,String> entry : headers.entrySet()) {
295            req.header(entry.getKey(),entry.getValue());
296        }
297        return this;
298    }
299
300    @Override
301    public Connection cookie(String name, String value) {
302        req.cookie(name, value);
303        return this;
304    }
305
306    @Override
307    public Connection cookies(Map<String, String> cookies) {
308        Validate.notNullParam(cookies, "cookies");
309        for (Map.Entry<String, String> entry : cookies.entrySet()) {
310            req.cookie(entry.getKey(), entry.getValue());
311        }
312        return this;
313    }
314
315    @Override
316    public Connection cookieStore(CookieStore cookieStore) {
317        // create a new cookie manager using the new store
318        req.cookieManager = new CookieManager(cookieStore, null);
319        return this;
320    }
321
322    @Override
323    public CookieStore cookieStore() {
324        return req.cookieManager.getCookieStore();
325    }
326
327    @Override
328    public Connection parser(Parser parser) {
329        req.parser(parser);
330        return this;
331    }
332
333    @Override
334    public Document get() throws IOException {
335        req.method(Method.GET);
336        execute();
337        Validate.notNull(res);
338        return res.parse();
339    }
340
341    @Override
342    public Document post() throws IOException {
343        req.method(Method.POST);
344        execute();
345        Validate.notNull(res);
346        return res.parse();
347    }
348
349    @Override
350    public Connection.Response execute() throws IOException {
351        res = Response.execute(req);
352        return res;
353    }
354
355    @Override
356    public Connection.Request request() {
357        return req;
358    }
359
360    @Override
361    public Connection request(Connection.Request request) {
362        req = (HttpConnection.Request) request; // will throw a class-cast exception if the user has extended some but not all of Connection; that's desired
363        return this;
364    }
365
366    @Override
367    public Connection.Response response() {
368        if (res == null) {
369            throw new IllegalArgumentException("You must execute the request before getting a response.");
370        }
371        return res;
372    }
373
374    @Override
375    public Connection response(Connection.Response response) {
376        res = response;
377        return this;
378    }
379
380    @Override
381    public Connection postDataCharset(String charset) {
382        req.postDataCharset(charset);
383        return this;
384    }
385
386    @Override public Connection auth(RequestAuthenticator authenticator) {
387        req.auth(authenticator);
388        return this;
389    }
390
391    @Override public Connection onResponseProgress(Progress<Connection.Response> handler) {
392        req.responseProgress = handler;
393        return this;
394    }
395
396    @SuppressWarnings("unchecked")
397    private static abstract class Base<T extends Connection.Base<T>> implements Connection.Base<T> {
398        private static final URL UnsetUrl; // only used if you created a new Request()
399        static {
400            try {
401                UnsetUrl = new URL("http://undefined/");
402            } catch (MalformedURLException e) {
403                throw new IllegalStateException(e);
404            }
405        }
406
407        URL url = UnsetUrl;
408        Method method = Method.GET;
409        Map<String, List<String>> headers;
410        Map<String, String> cookies;
411
412        private Base() {
413            headers = new LinkedHashMap<>();
414            cookies = new LinkedHashMap<>();
415        }
416
417        private Base(Base<T> copy) {
418            url = copy.url; // unmodifiable object
419            method = copy.method;
420            headers = new LinkedHashMap<>();
421            for (Map.Entry<String, List<String>> entry : copy.headers.entrySet()) {
422                headers.put(entry.getKey(), new ArrayList<>(entry.getValue()));
423            }
424            cookies = new LinkedHashMap<>(); cookies.putAll(copy.cookies); // just holds strings
425        }
426
427        @Override
428        public URL url() {
429            if (url == UnsetUrl)
430                throw new IllegalArgumentException("URL not set. Make sure to call #url(...) before executing the request.");
431            return url;
432        }
433
434        @Override
435        public T url(URL url) {
436            Validate.notNullParam(url, "url");
437            this.url = new UrlBuilder(url).build();
438            return (T) this;
439        }
440
441        @Override
442        public Method method() {
443            return method;
444        }
445
446        @Override
447        public T method(Method method) {
448            Validate.notNullParam(method, "method");
449            this.method = method;
450            return (T) this;
451        }
452
453        @Override
454        public String header(String name) {
455            Validate.notNullParam(name, "name");
456            List<String> vals = getHeadersCaseInsensitive(name);
457            if (vals.size() > 0) {
458                // https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2
459                return StringUtil.join(vals, ", ");
460            }
461
462            return null;
463        }
464
465        @Override
466        public T addHeader(String name, @Nullable String value) {
467            Validate.notEmptyParam(name, "name");
468            value = value == null ? "" : value;
469
470            List<String> values = headers(name);
471            if (values.isEmpty()) {
472                values = new ArrayList<>();
473                headers.put(name, values);
474            }
475            values.add(value);
476
477            return (T) this;
478        }
479
480        @Override
481        public List<String> headers(String name) {
482            Validate.notEmptyParam(name, "name");
483            return getHeadersCaseInsensitive(name);
484        }
485
486        @Override
487        public T header(String name, String value) {
488            Validate.notEmptyParam(name, "name");
489            removeHeader(name); // ensures we don't get an "accept-encoding" and a "Accept-Encoding"
490            addHeader(name, value);
491            return (T) this;
492        }
493
494        @Override
495        public boolean hasHeader(String name) {
496            Validate.notEmptyParam(name, "name");
497            return !getHeadersCaseInsensitive(name).isEmpty();
498        }
499
500        /**
501         * Test if the request has a header with this value (case insensitive).
502         */
503        @Override
504        public boolean hasHeaderWithValue(String name, String value) {
505            Validate.notEmpty(name);
506            Validate.notEmpty(value);
507            List<String> values = headers(name);
508            for (String candidate : values) {
509                if (value.equalsIgnoreCase(candidate))
510                    return true;
511            }
512            return false;
513        }
514
515        @Override
516        public T removeHeader(String name) {
517            Validate.notEmptyParam(name, "name");
518            Map.Entry<String, List<String>> entry = scanHeaders(name); // remove is case-insensitive too
519            if (entry != null)
520                headers.remove(entry.getKey()); // ensures correct case
521            return (T) this;
522        }
523
524        @Override
525        public Map<String, String> headers() {
526            LinkedHashMap<String, String> map = new LinkedHashMap<>(headers.size());
527            for (Map.Entry<String, List<String>> entry : headers.entrySet()) {
528                String header = entry.getKey();
529                List<String> values = entry.getValue();
530                if (values.size() > 0)
531                    map.put(header, values.get(0));
532            }
533            return map;
534        }
535
536        @Override
537        public Map<String, List<String>> multiHeaders() {
538            return headers;
539        }
540
541        private List<String> getHeadersCaseInsensitive(String name) {
542            Validate.notNull(name);
543
544            for (Map.Entry<String, List<String>> entry : headers.entrySet()) {
545                if (name.equalsIgnoreCase(entry.getKey()))
546                    return entry.getValue();
547            }
548
549            return Collections.emptyList();
550        }
551
552        private Map.@Nullable Entry<String, List<String>> scanHeaders(String name) {
553            String lc = lowerCase(name);
554            for (Map.Entry<String, List<String>> entry : headers.entrySet()) {
555                if (lowerCase(entry.getKey()).equals(lc))
556                    return entry;
557            }
558            return null;
559        }
560
561        @Override
562        public String cookie(String name) {
563            Validate.notEmptyParam(name, "name");
564            return cookies.get(name);
565        }
566
567        @Override
568        public T cookie(String name, String value) {
569            Validate.notEmptyParam(name, "name");
570            Validate.notNullParam(value, "value");
571            cookies.put(name, value);
572            return (T) this;
573        }
574
575        @Override
576        public boolean hasCookie(String name) {
577            Validate.notEmptyParam(name, "name");
578            return cookies.containsKey(name);
579        }
580
581        @Override
582        public T removeCookie(String name) {
583            Validate.notEmptyParam(name, "name");
584            cookies.remove(name);
585            return (T) this;
586        }
587
588        @Override
589        public Map<String, String> cookies() {
590            return cookies;
591        }
592    }
593
594    public static class Request extends HttpConnection.Base<Connection.Request> implements Connection.Request {
595        static {
596            System.setProperty("sun.net.http.allowRestrictedHeaders", "true");
597            // make sure that we can send Sec-Fetch-Site headers etc.
598        }
599
600        private @Nullable Proxy proxy;
601        private int timeoutMilliseconds;
602        private int maxBodySizeBytes;
603        private boolean followRedirects;
604        private final Collection<Connection.KeyVal> data;
605        private @Nullable String body = null;
606        private boolean ignoreHttpErrors = false;
607        private boolean ignoreContentType = false;
608        private Parser parser;
609        private boolean parserDefined = false; // called parser(...) vs initialized in ctor
610        private String postDataCharset = DataUtil.defaultCharsetName;
611        private @Nullable SSLSocketFactory sslSocketFactory;
612        private CookieManager cookieManager;
613        private @Nullable RequestAuthenticator authenticator;
614        private @Nullable Progress<Connection.Response> responseProgress;
615
616        private volatile boolean executing = false;
617
618        Request() {
619            super();
620            timeoutMilliseconds = 30000; // 30 seconds
621            maxBodySizeBytes = 1024 * 1024 * 2; // 2MB
622            followRedirects = true;
623            data = new ArrayList<>();
624            method = Method.GET;
625            addHeader("Accept-Encoding", "gzip");
626            addHeader(USER_AGENT, DEFAULT_UA);
627            parser = Parser.htmlParser();
628            cookieManager = new CookieManager(); // creates a default InMemoryCookieStore
629        }
630
631        Request(Request copy) {
632            super(copy);
633            proxy = copy.proxy;
634            postDataCharset = copy.postDataCharset;
635            timeoutMilliseconds = copy.timeoutMilliseconds;
636            maxBodySizeBytes = copy.maxBodySizeBytes;
637            followRedirects = copy.followRedirects;
638            data = new ArrayList<>(); // data not copied
639            //body not copied
640            ignoreHttpErrors = copy.ignoreHttpErrors;
641            ignoreContentType = copy.ignoreContentType;
642            parser = copy.parser.newInstance(); // parsers and their tree-builders maintain state, so need a fresh copy
643            parserDefined = copy.parserDefined;
644            sslSocketFactory = copy.sslSocketFactory; // these are all synchronized so safe to share
645            cookieManager = copy.cookieManager;
646            authenticator = copy.authenticator;
647            responseProgress = copy.responseProgress;
648            executing = false;
649        }
650
651        @Override
652        public Proxy proxy() {
653            return proxy;
654        }
655
656        @Override
657        public Request proxy(@Nullable Proxy proxy) {
658            this.proxy = proxy;
659            return this;
660        }
661
662        @Override
663        public Request proxy(String host, int port) {
664            this.proxy = new Proxy(Proxy.Type.HTTP, InetSocketAddress.createUnresolved(host, port));
665            return this;
666        }
667
668        @Override
669        public int timeout() {
670            return timeoutMilliseconds;
671        }
672
673        @Override
674        public Request timeout(int millis) {
675            Validate.isTrue(millis >= 0, "Timeout milliseconds must be 0 (infinite) or greater");
676            timeoutMilliseconds = millis;
677            return this;
678        }
679
680        @Override
681        public int maxBodySize() {
682            return maxBodySizeBytes;
683        }
684
685        @Override
686        public Connection.Request maxBodySize(int bytes) {
687            Validate.isTrue(bytes >= 0, "maxSize must be 0 (unlimited) or larger");
688            maxBodySizeBytes = bytes;
689            return this;
690        }
691
692        @Override
693        public boolean followRedirects() {
694            return followRedirects;
695        }
696
697        @Override
698        public Connection.Request followRedirects(boolean followRedirects) {
699            this.followRedirects = followRedirects;
700            return this;
701        }
702
703        @Override
704        public boolean ignoreHttpErrors() {
705            return ignoreHttpErrors;
706        }
707
708        @Override
709        public SSLSocketFactory sslSocketFactory() {
710            return sslSocketFactory;
711        }
712
713        @Override
714        public void sslSocketFactory(SSLSocketFactory sslSocketFactory) {
715            this.sslSocketFactory = sslSocketFactory;
716        }
717
718        @Override
719        public Connection.Request ignoreHttpErrors(boolean ignoreHttpErrors) {
720            this.ignoreHttpErrors = ignoreHttpErrors;
721            return this;
722        }
723
724        @Override
725        public boolean ignoreContentType() {
726            return ignoreContentType;
727        }
728
729        @Override
730        public Connection.Request ignoreContentType(boolean ignoreContentType) {
731            this.ignoreContentType = ignoreContentType;
732            return this;
733        }
734
735        @Override
736        public Request data(Connection.KeyVal keyval) {
737            Validate.notNullParam(keyval, "keyval");
738            data.add(keyval);
739            return this;
740        }
741
742        @Override
743        public Collection<Connection.KeyVal> data() {
744            return data;
745        }
746
747        @Override
748        public Connection.Request requestBody(@Nullable String body) {
749            this.body = body;
750            return this;
751        }
752
753        @Override
754        public String requestBody() {
755            return body;
756        }
757
758        @Override
759        public Request parser(Parser parser) {
760            this.parser = parser;
761            parserDefined = true;
762            return this;
763        }
764
765        @Override
766        public Parser parser() {
767            return parser;
768        }
769
770        @Override
771        public Connection.Request postDataCharset(String charset) {
772            Validate.notNullParam(charset, "charset");
773            if (!Charset.isSupported(charset)) throw new IllegalCharsetNameException(charset);
774            this.postDataCharset = charset;
775            return this;
776        }
777
778        @Override
779        public String postDataCharset() {
780            return postDataCharset;
781        }
782
783        CookieManager cookieManager() {
784            return cookieManager;
785        }
786
787        @Override public Connection.Request auth(@Nullable RequestAuthenticator authenticator) {
788            this.authenticator = authenticator;
789            return this;
790        }
791
792        @Override @Nullable public RequestAuthenticator auth() {
793            return authenticator;
794        }
795    }
796
797    public static class Response extends HttpConnection.Base<Connection.Response> implements Connection.Response {
798        private static final int MAX_REDIRECTS = 20;
799        private static final String LOCATION = "Location";
800        private final int statusCode;
801        private final String statusMessage;
802        private @Nullable ByteBuffer byteData;
803        private @Nullable ControllableInputStream bodyStream;
804        private @Nullable HttpURLConnection conn;
805        private @Nullable String charset;
806        private @Nullable final String contentType;
807        private boolean executed = false;
808        private boolean inputStreamRead = false;
809        private int numRedirects = 0;
810        private final HttpConnection.Request req;
811
812        /*
813         * Matches XML content types (like text/xml, image/svg+xml, application/xhtml+xml;charset=UTF8, etc)
814         */
815        private static final Pattern xmlContentTypeRxp = Pattern.compile("(\\w+)/\\w*\\+?xml.*");
816
817        /**
818         <b>Internal only! </b>Creates a dummy HttpConnection.Response, useful for testing. All actual responses
819         are created from the HttpURLConnection and fields defined.
820         */
821        Response() {
822            super();
823            statusCode = 400;
824            statusMessage = "Request not made";
825            req = new Request();
826            contentType = null;
827        }
828
829        static Response execute(HttpConnection.Request req) throws IOException {
830            return execute(req, null);
831        }
832
833        static Response execute(HttpConnection.Request req, @Nullable Response previousResponse) throws IOException {
834            synchronized (req) {
835                Validate.isFalse(req.executing, "Multiple threads were detected trying to execute the same request concurrently. Make sure to use Connection#newRequest() and do not share an executing request between threads.");
836                req.executing = true;
837            }
838            Validate.notNullParam(req, "req");
839            URL url = req.url();
840            Validate.notNull(url, "URL must be specified to connect");
841            String protocol = url.getProtocol();
842            if (!protocol.equals("http") && !protocol.equals("https"))
843                throw new MalformedURLException("Only http & https protocols supported");
844            final boolean methodHasBody = req.method().hasBody();
845            final boolean hasRequestBody = req.requestBody() != null;
846            if (!methodHasBody)
847                Validate.isFalse(hasRequestBody, "Cannot set a request body for HTTP method " + req.method());
848
849            // set up the request for execution
850            String mimeBoundary = null;
851            if (req.data().size() > 0 && (!methodHasBody || hasRequestBody))
852                serialiseRequestUrl(req);
853            else if (methodHasBody)
854                mimeBoundary = setOutputContentType(req);
855
856            long startTime = System.nanoTime();
857            HttpURLConnection conn = createConnection(req);
858            Response res = null;
859            try {
860                conn.connect();
861                if (conn.getDoOutput()) {
862                    OutputStream out = conn.getOutputStream();
863                    try { writePost(req, out, mimeBoundary); }
864                    catch (IOException e) { conn.disconnect(); throw e; }
865                    finally { out.close(); }
866                }
867
868                int status = conn.getResponseCode();
869                res = new Response(conn, req, previousResponse);
870
871                // redirect if there's a location header (from 3xx, or 201 etc)
872                if (res.hasHeader(LOCATION) && req.followRedirects()) {
873                    if (status != HTTP_TEMP_REDIR) {
874                        req.method(Method.GET); // always redirect with a get. any data param from original req are dropped.
875                        req.data().clear();
876                        req.requestBody(null);
877                        req.removeHeader(CONTENT_TYPE);
878                    }
879
880                    String location = res.header(LOCATION);
881                    Validate.notNull(location);
882                    if (location.startsWith("http:/") && location.charAt(6) != '/') // fix broken Location: http:/temp/AAG_New/en/index.php
883                        location = location.substring(6);
884                    URL redir = StringUtil.resolve(req.url(), location);
885                    req.url(redir);
886
887                    req.executing = false;
888                    return execute(req, res);
889                }
890                if ((status < 200 || status >= 400) && !req.ignoreHttpErrors())
891                        throw new HttpStatusException("HTTP error fetching URL", status, req.url().toString());
892
893                // check that we can handle the returned content type; if not, abort before fetching it
894                String contentType = res.contentType();
895                if (contentType != null
896                        && !req.ignoreContentType()
897                        && !contentType.startsWith("text/")
898                        && !xmlContentTypeRxp.matcher(contentType).matches()
899                        )
900                    throw new UnsupportedMimeTypeException("Unhandled content type. Must be text/*, */xml, or */*+xml",
901                            contentType, req.url().toString());
902
903                // switch to the XML parser if content type is xml and not parser not explicitly set
904                if (contentType != null && xmlContentTypeRxp.matcher(contentType).matches()) {
905                    if (!req.parserDefined) req.parser(Parser.xmlParser());
906                }
907
908                res.charset = DataUtil.getCharsetFromContentType(res.contentType); // may be null, readInputStream deals with it
909                if (conn.getContentLength() != 0 && req.method() != HEAD) { // -1 means unknown, chunked. sun throws an IO exception on 500 response with no content when trying to read body
910                    InputStream stream = conn.getErrorStream() != null ? conn.getErrorStream() : conn.getInputStream();
911                    if (res.hasHeaderWithValue(CONTENT_ENCODING, "gzip"))
912                        stream = new GZIPInputStream(stream);
913                    else if (res.hasHeaderWithValue(CONTENT_ENCODING, "deflate"))
914                        stream = new InflaterInputStream(stream, new Inflater(true));
915                    
916                    res.bodyStream = ControllableInputStream.wrap(
917                        stream, DefaultBufferSize, req.maxBodySize())
918                        .timeout(startTime, req.timeout());
919
920                    if (req.responseProgress != null) // set response progress listener
921                        res.bodyStream.onProgress(conn.getContentLength(), req.responseProgress, res);
922                } else {
923                    res.byteData = DataUtil.emptyByteBuffer();
924                }
925            } catch (IOException e) {
926                if (res != null) res.safeClose(); // will be non-null if got to conn
927                throw e;
928            } finally {
929                req.executing = false;
930
931                // detach any thread local auth delegate
932                if (req.authenticator != null)
933                    AuthenticationHandler.handler.remove();
934            }
935
936            res.executed = true;
937            return res;
938        }
939
940        @Override
941        public int statusCode() {
942            return statusCode;
943        }
944
945        @Override
946        public String statusMessage() {
947            return statusMessage;
948        }
949
950        @Override
951        public String charset() {
952            return charset;
953        }
954
955        @Override
956        public Response charset(String charset) {
957            this.charset = charset;
958            return this;
959        }
960
961        @Override
962        public String contentType() {
963            return contentType;
964        }
965
966        /** Called from parse() or streamParser(), validates and prepares the input stream, and aligns common settings. */
967        private ControllableInputStream prepareParse() {
968            Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before parsing response");
969            ControllableInputStream stream = bodyStream;
970            if (byteData != null) { // bytes have been read in to the buffer, parse that
971                ByteArrayInputStream bytes = new ByteArrayInputStream(byteData.array(), 0, byteData.limit());
972                stream = ControllableInputStream.wrap(bytes, 0); // no max
973                inputStreamRead = false; // ok to reparse if in bytes
974            }
975            Validate.isFalse(inputStreamRead, "Input stream already read and parsed, cannot re-read.");
976            Validate.notNull(stream);
977            inputStreamRead = true;
978            return stream;
979        }
980
981        @Override public Document parse() throws IOException {
982            ControllableInputStream stream = prepareParse();
983            Document doc = DataUtil.parseInputStream(stream, charset, url.toExternalForm(), req.parser());
984            doc.connection(new HttpConnection(req, this)); // because we're static, don't have the connection obj. // todo - maybe hold in the req?
985            charset = doc.outputSettings().charset().name(); // update charset from meta-equiv, possibly
986            safeClose();
987            return doc;
988        }
989
990        @Override public StreamParser streamParser() throws IOException {
991            ControllableInputStream stream = prepareParse();
992            String baseUri = url.toExternalForm();
993            DataUtil.CharsetDoc charsetDoc = DataUtil.detectCharset(stream, charset, baseUri, req.parser());
994            // note that there may be a document in CharsetDoc as a result of scanning meta-data -- but as requires a stream parse, it is not used here. todo - revisit.
995
996            // set up the stream parser and rig this connection up to the parsed doc:
997            StreamParser streamer = new StreamParser(req.parser());
998            BufferedReader reader = new BufferedReader(new InputStreamReader(stream, charsetDoc.charset));
999            streamer.parse(reader, baseUri); // initializes the parse and the document, but does not step() it
1000            streamer.document().connection(new HttpConnection(req, this));
1001            charset = charsetDoc.charset.name();
1002
1003            // we don't safeClose() as in parse(); caller must close streamParser to close InputStream stream
1004            return streamer;
1005        }
1006
1007        private void prepareByteData() {
1008            Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body");
1009            if (bodyStream != null && byteData == null) {
1010                Validate.isFalse(inputStreamRead, "Request has already been read (with .parse())");
1011                try {
1012                    byteData = DataUtil.readToByteBuffer(bodyStream, req.maxBodySize());
1013                } catch (IOException e) {
1014                    throw new UncheckedIOException(e);
1015                } finally {
1016                    inputStreamRead = true;
1017                    safeClose();
1018                }
1019            }
1020        }
1021
1022        @Override
1023        public String body() {
1024            prepareByteData();
1025            Validate.notNull(byteData);
1026            // charset gets set from header on execute, and from meta-equiv on parse. parse may not have happened yet
1027            String body = (charset == null ? UTF_8 : Charset.forName(charset))
1028                .decode(byteData).toString();
1029            ((Buffer)byteData).rewind(); // cast to avoid covariant return type change in jdk9
1030            return body;
1031        }
1032
1033        @Override
1034        public byte[] bodyAsBytes() {
1035            prepareByteData();
1036            Validate.notNull(byteData);
1037            Validate.isTrue(byteData.hasArray()); // we made it, so it should
1038
1039            byte[] array = byteData.array();
1040            int offset = byteData.arrayOffset();
1041            int length = byteData.limit();
1042
1043            if (offset == 0 && length == array.length) { // exact, just return it
1044                return array;
1045            } else { // trim to size
1046                byte[] exactArray = new byte[length];
1047                System.arraycopy(array, offset, exactArray, 0, length);
1048                return exactArray;
1049            }
1050        }
1051
1052        @Override
1053        public Connection.Response bufferUp() {
1054            prepareByteData();
1055            return this;
1056        }
1057
1058        @Override
1059        public BufferedInputStream bodyStream() {
1060            Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body");
1061
1062            // if we have read to bytes (via buffer up), return those as a stream.
1063            if (byteData != null) {
1064                return new BufferedInputStream(
1065                    new ByteArrayInputStream(byteData.array(), 0, byteData.limit()),
1066                    DefaultBufferSize);
1067            }
1068
1069            Validate.isFalse(inputStreamRead, "Request has already been read");
1070            Validate.notNull(bodyStream);
1071            inputStreamRead = true;
1072            return bodyStream.inputStream();
1073        }
1074
1075        // set up connection defaults, and details from request
1076        private static HttpURLConnection createConnection(HttpConnection.Request req) throws IOException {
1077            Proxy proxy = req.proxy();
1078            final HttpURLConnection conn = (HttpURLConnection) (
1079                proxy == null ?
1080                req.url().openConnection() :
1081                req.url().openConnection(proxy)
1082            );
1083
1084            conn.setRequestMethod(req.method().name());
1085            conn.setInstanceFollowRedirects(false); // don't rely on native redirection support
1086            conn.setConnectTimeout(req.timeout());
1087            conn.setReadTimeout(req.timeout() / 2); // gets reduced after connection is made and status is read
1088
1089            if (req.sslSocketFactory() != null && conn instanceof HttpsURLConnection)
1090                ((HttpsURLConnection) conn).setSSLSocketFactory(req.sslSocketFactory());
1091            if (req.authenticator != null)
1092                AuthenticationHandler.handler.enable(req.authenticator, conn); // removed in finally
1093            if (req.method().hasBody())
1094                conn.setDoOutput(true);
1095            CookieUtil.applyCookiesToRequest(req, conn); // from the Request key/val cookies and the Cookie Store
1096            for (Map.Entry<String, List<String>> header : req.multiHeaders().entrySet()) {
1097                for (String value : header.getValue()) {
1098                    conn.addRequestProperty(header.getKey(), value);
1099                }
1100            }
1101            return conn;
1102        }
1103
1104        /**
1105         * Call on completion of stream read, to close the body (or error) stream. The connection.disconnect allows
1106         * keep-alives to work (as the underlying connection is actually held open, despite the name).
1107         */
1108        private void safeClose() {
1109            if (bodyStream != null) {
1110                try {
1111                    bodyStream.close();
1112                } catch (IOException e) {
1113                    // no-op
1114                } finally {
1115                    bodyStream = null;
1116                }
1117            }
1118            if (conn != null) {
1119                conn.disconnect();
1120                conn = null;
1121            }
1122        }
1123
1124        // set up url, method, header, cookies
1125        private Response(HttpURLConnection conn, HttpConnection.Request request, HttpConnection.@Nullable Response previousResponse) throws IOException {
1126            this.conn = conn;
1127            this.req = request;
1128            method = Method.valueOf(conn.getRequestMethod());
1129            url = conn.getURL();
1130            statusCode = conn.getResponseCode();
1131            statusMessage = conn.getResponseMessage();
1132            contentType = conn.getContentType();
1133
1134            Map<String, List<String>> resHeaders = createHeaderMap(conn);
1135            processResponseHeaders(resHeaders); // includes cookie key/val read during header scan
1136            CookieUtil.storeCookies(req, this, url, resHeaders); // add set cookies to cookie store
1137
1138            if (previousResponse != null) { // was redirected
1139                // map previous response cookies into this response cookies() object
1140                for (Map.Entry<String, String> prevCookie : previousResponse.cookies().entrySet()) {
1141                    if (!hasCookie(prevCookie.getKey()))
1142                        cookie(prevCookie.getKey(), prevCookie.getValue());
1143                }
1144                previousResponse.safeClose();
1145
1146                // enforce too many redirects:
1147                numRedirects = previousResponse.numRedirects + 1;
1148                if (numRedirects >= MAX_REDIRECTS)
1149                    throw new IOException(String.format("Too many redirects occurred trying to load URL %s", previousResponse.url()));
1150            }
1151        }
1152
1153        private static LinkedHashMap<String, List<String>> createHeaderMap(HttpURLConnection conn) {
1154            // the default sun impl of conn.getHeaderFields() returns header values out of order
1155            final LinkedHashMap<String, List<String>> headers = new LinkedHashMap<>();
1156            int i = 0;
1157            while (true) {
1158                final String key = conn.getHeaderFieldKey(i);
1159                final String val = conn.getHeaderField(i);
1160                if (key == null && val == null)
1161                    break;
1162                i++;
1163                if (key == null || val == null)
1164                    continue; // skip http1.1 line
1165
1166                final List<String> vals = headers.computeIfAbsent(key, Functions.listFunction());
1167                vals.add(val);
1168            }
1169            return headers;
1170        }
1171
1172        void processResponseHeaders(Map<String, List<String>> resHeaders) {
1173            for (Map.Entry<String, List<String>> entry : resHeaders.entrySet()) {
1174                String name = entry.getKey();
1175                if (name == null)
1176                    continue; // http/1.1 line
1177
1178                List<String> values = entry.getValue();
1179                for (String value : values) {
1180                    addHeader(name, fixHeaderEncoding(value));
1181                }
1182            }
1183        }
1184
1185        /**
1186         Servers may encode response headers in UTF-8 instead of RFC defined 8859. This method attempts to detect that
1187         and re-decode the string as UTF-8.
1188         * @param val a header value string that may have been incorrectly decoded as 8859.
1189         * @return a potentially re-decoded string.
1190         */
1191        @Nullable
1192        private static String fixHeaderEncoding(@Nullable String val) {
1193            if (val == null) return val;
1194            byte[] bytes = val.getBytes(ISO_8859_1);
1195            if (looksLikeUtf8(bytes))
1196                return new String(bytes, UTF_8);
1197            else
1198                return val;
1199        }
1200
1201        private static boolean looksLikeUtf8(byte[] input) {
1202            int i = 0;
1203            // BOM:
1204            if (input.length >= 3
1205                && (input[0] & 0xFF) == 0xEF
1206                && (input[1] & 0xFF) == 0xBB
1207                && (input[2] & 0xFF) == 0xBF) {
1208                i = 3;
1209            }
1210
1211            int end;
1212            boolean foundNonAscii = false;
1213            for (int j = input.length; i < j; ++i) {
1214                int o = input[i];
1215                if ((o & 0x80) == 0) {
1216                    continue; // ASCII
1217                }
1218                foundNonAscii = true;
1219
1220                // UTF-8 leading:
1221                if ((o & 0xE0) == 0xC0) {
1222                    end = i + 1;
1223                } else if ((o & 0xF0) == 0xE0) {
1224                    end = i + 2;
1225                } else if ((o & 0xF8) == 0xF0) {
1226                    end = i + 3;
1227                } else {
1228                    return false;
1229                }
1230
1231                if (end >= input.length)
1232                    return false;
1233
1234                while (i < end) {
1235                    i++;
1236                    o = input[i];
1237                    if ((o & 0xC0) != 0x80) {
1238                        return false;
1239                    }
1240                }
1241            }
1242            return foundNonAscii;
1243        }
1244
1245        private @Nullable static String setOutputContentType(final Connection.Request req) {
1246            final String contentType = req.header(CONTENT_TYPE);
1247            String bound = null;
1248            if (contentType != null) {
1249                // no-op; don't add content type as already set (e.g. for requestBody())
1250                // todo - if content type already set, we could add charset
1251
1252                // if user has set content type to multipart/form-data, auto add boundary.
1253                if(contentType.contains(MULTIPART_FORM_DATA) && !contentType.contains("boundary")) {
1254                    bound = DataUtil.mimeBoundary();
1255                    req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound);
1256                }
1257
1258            }
1259            else if (needsMultipart(req)) {
1260                bound = DataUtil.mimeBoundary();
1261                req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound);
1262            } else {
1263                req.header(CONTENT_TYPE, FORM_URL_ENCODED + "; charset=" + req.postDataCharset());
1264            }
1265            return bound;
1266        }
1267
1268        private static void writePost(final Connection.Request req, final OutputStream outputStream, @Nullable final String boundary) throws IOException {
1269            final Collection<Connection.KeyVal> data = req.data();
1270            final BufferedWriter w = new BufferedWriter(new OutputStreamWriter(outputStream, Charset.forName(req.postDataCharset())));
1271
1272            if (boundary != null) {
1273                // boundary will be set if we're in multipart mode
1274                for (Connection.KeyVal keyVal : data) {
1275                    w.write("--");
1276                    w.write(boundary);
1277                    w.write("\r\n");
1278                    w.write("Content-Disposition: form-data; name=\"");
1279                    w.write(encodeMimeName(keyVal.key())); // encodes " to %22
1280                    w.write("\"");
1281                    final InputStream input = keyVal.inputStream();
1282                    if (input != null) {
1283                        w.write("; filename=\"");
1284                        w.write(encodeMimeName(keyVal.value()));
1285                        w.write("\"\r\nContent-Type: ");
1286                        String contentType = keyVal.contentType();
1287                        w.write(contentType != null ? contentType : DefaultUploadType);
1288                        w.write("\r\n\r\n");
1289                        w.flush(); // flush
1290                        DataUtil.crossStreams(input, outputStream);
1291                        outputStream.flush();
1292                    } else {
1293                        w.write("\r\n\r\n");
1294                        w.write(keyVal.value());
1295                    }
1296                    w.write("\r\n");
1297                }
1298                w.write("--");
1299                w.write(boundary);
1300                w.write("--");
1301            } else {
1302                String body = req.requestBody();
1303                if (body != null) {
1304                    // data will be in query string, we're sending a plaintext body
1305                    w.write(body);
1306                }
1307                else {
1308                    // regular form data (application/x-www-form-urlencoded)
1309                    boolean first = true;
1310                    for (Connection.KeyVal keyVal : data) {
1311                        if (!first)
1312                            w.append('&');
1313                        else
1314                            first = false;
1315
1316                        w.write(URLEncoder.encode(keyVal.key(), req.postDataCharset()));
1317                        w.write('=');
1318                        w.write(URLEncoder.encode(keyVal.value(), req.postDataCharset()));
1319                    }
1320                }
1321            }
1322            w.close();
1323        }
1324
1325        // for get url reqs, serialise the data map into the url
1326        private static void serialiseRequestUrl(Connection.Request req) throws IOException {
1327            UrlBuilder in = new UrlBuilder(req.url());
1328
1329            for (Connection.KeyVal keyVal : req.data()) {
1330                Validate.isFalse(keyVal.hasInputStream(), "InputStream data not supported in URL query string.");
1331                in.appendKeyVal(keyVal);
1332            }
1333            req.url(in.build());
1334            req.data().clear(); // moved into url as get params
1335        }
1336    }
1337
1338    private static boolean needsMultipart(Connection.Request req) {
1339        // multipart mode, for files. add the header if we see something with an inputstream, and return a non-null boundary
1340        for (Connection.KeyVal keyVal : req.data()) {
1341            if (keyVal.hasInputStream())
1342                return true;
1343        }
1344        return false;
1345    }
1346
1347    public static class KeyVal implements Connection.KeyVal {
1348        private String key;
1349        private String value;
1350        private @Nullable InputStream stream;
1351        private @Nullable String contentType;
1352
1353        public static KeyVal create(String key, String value) {
1354            return new KeyVal(key, value);
1355        }
1356
1357        public static KeyVal create(String key, String filename, InputStream stream) {
1358            return new KeyVal(key, filename)
1359                .inputStream(stream);
1360        }
1361
1362        private KeyVal(String key, String value) {
1363            Validate.notEmptyParam(key, "key");
1364            Validate.notNullParam(value, "value");
1365            this.key = key;
1366            this.value = value;
1367        }
1368
1369        @Override
1370        public KeyVal key(String key) {
1371            Validate.notEmptyParam(key, "key");
1372            this.key = key;
1373            return this;
1374        }
1375
1376        @Override
1377        public String key() {
1378            return key;
1379        }
1380
1381        @Override
1382        public KeyVal value(String value) {
1383            Validate.notNullParam(value, "value");
1384            this.value = value;
1385            return this;
1386        }
1387
1388        @Override
1389        public String value() {
1390            return value;
1391        }
1392
1393        public KeyVal inputStream(InputStream inputStream) {
1394            Validate.notNullParam(value, "inputStream");
1395            this.stream = inputStream;
1396            return this;
1397        }
1398
1399        @Override
1400        public InputStream inputStream() {
1401            return stream;
1402        }
1403
1404        @Override
1405        public boolean hasInputStream() {
1406            return stream != null;
1407        }
1408
1409        @Override
1410        public Connection.KeyVal contentType(String contentType) {
1411            Validate.notEmpty(contentType);
1412            this.contentType = contentType;
1413            return this;
1414        }
1415
1416        @Override
1417        public String contentType() {
1418            return contentType;
1419        }
1420
1421        @Override
1422        public String toString() {
1423            return key + "=" + value;
1424        }
1425    }
1426}