001package org.jsoup.helper;
002
003import org.jsoup.Connection;
004import org.jsoup.HttpStatusException;
005import org.jsoup.Progress;
006import org.jsoup.UncheckedIOException;
007import org.jsoup.UnsupportedMimeTypeException;
008import org.jsoup.internal.ControllableInputStream;
009import org.jsoup.internal.Functions;
010import org.jsoup.internal.SharedConstants;
011import org.jsoup.internal.StringUtil;
012import org.jsoup.nodes.Document;
013import org.jsoup.parser.Parser;
014import org.jsoup.parser.StreamParser;
015import org.jsoup.parser.TokenQueue;
016import org.jspecify.annotations.Nullable;
017
018import javax.net.ssl.HttpsURLConnection;
019import javax.net.ssl.SSLSocketFactory;
020import java.io.BufferedInputStream;
021import java.io.BufferedReader;
022import java.io.BufferedWriter;
023import java.io.ByteArrayInputStream;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.InputStreamReader;
027import java.io.OutputStream;
028import java.io.OutputStreamWriter;
029import java.net.CookieManager;
030import java.net.CookieStore;
031import java.net.HttpURLConnection;
032import java.net.InetSocketAddress;
033import java.net.MalformedURLException;
034import java.net.Proxy;
035import java.net.URL;
036import java.net.URLEncoder;
037import java.nio.Buffer;
038import java.nio.ByteBuffer;
039import java.nio.charset.Charset;
040import java.nio.charset.IllegalCharsetNameException;
041import java.util.ArrayList;
042import java.util.Collection;
043import java.util.Collections;
044import java.util.LinkedHashMap;
045import java.util.List;
046import java.util.Map;
047import java.util.regex.Pattern;
048import java.util.zip.GZIPInputStream;
049import java.util.zip.Inflater;
050import java.util.zip.InflaterInputStream;
051
052import static org.jsoup.Connection.Method.HEAD;
053import static org.jsoup.helper.DataUtil.UTF_8;
054import static org.jsoup.internal.Normalizer.lowerCase;
055
056/**
057 * Implementation of {@link Connection}.
058 * @see org.jsoup.Jsoup#connect(String)
059 */
060@SuppressWarnings("CharsetObjectCanBeUsed")
061public class HttpConnection implements Connection {
062    public static final String CONTENT_ENCODING = "Content-Encoding";
063    /**
064     * Many users would get caught by not setting a user-agent and therefore getting different responses on their desktop
065     * vs in jsoup, which would otherwise default to {@code Java}. So by default, use a desktop UA.
066     */
067    public static final String DEFAULT_UA =
068        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36";
069    private static final String USER_AGENT = "User-Agent";
070    public static final String CONTENT_TYPE = "Content-Type";
071    public static final String MULTIPART_FORM_DATA = "multipart/form-data";
072    public static final String FORM_URL_ENCODED = "application/x-www-form-urlencoded";
073    private static final int HTTP_TEMP_REDIR = 307; // http/1.1 temporary redirect, not in Java's set.
074    private static final String DefaultUploadType = "application/octet-stream";
075    private static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");
076
077    /**
078     Create a new Connection, with the request URL specified.
079     @param url the URL to fetch from
080     @return a new Connection object
081     */
082    public static Connection connect(String url) {
083        Connection con = new HttpConnection();
084        con.url(url);
085        return con;
086    }
087
088    /**
089     Create a new Connection, with the request URL specified.
090     @param url the URL to fetch from
091     @return a new Connection object
092     */
093    public static Connection connect(URL url) {
094        Connection con = new HttpConnection();
095        con.url(url);
096        return con;
097    }
098
099    /**
100     Create a new, empty HttpConnection.
101     */
102    public HttpConnection() {
103        req = new Request();
104    }
105
106    /**
107     Create a new Request by deep-copying an existing Request. Note that the data and body of the original are not
108     copied. All other settings (proxy, parser, cookies, etc) are copied.
109     @param copy the request to copy
110     */
111    HttpConnection(Request copy) {
112        req = new Request(copy);
113    }
114
115    private static String encodeMimeName(String val) {
116        return val.replace("\"", "%22");
117    }
118
119    private HttpConnection.Request req;
120    private Connection.@Nullable Response res;
121
122    @Override
123    public Connection newRequest() {
124        // copy the prototype request for the different settings, cookie manager, etc
125        return new HttpConnection(req);
126    }
127
128    /** Create a new Connection that just wraps the provided Request and Response */
129    private HttpConnection(Request req, Response res) {
130        this.req = req;
131        this.res = res;
132    }
133
134    @Override
135    public Connection url(URL url) {
136        req.url(url);
137        return this;
138    }
139
140    @Override
141    public Connection url(String url) {
142        Validate.notEmptyParam(url, "url");
143        try {
144            req.url(new URL(url));
145        } catch (MalformedURLException e) {
146            throw new IllegalArgumentException(String.format("The supplied URL, '%s', is malformed. Make sure it is an absolute URL, and starts with 'http://' or 'https://'. See https://jsoup.org/cookbook/extracting-data/working-with-urls", url), e);
147        }
148        return this;
149    }
150
151    @Override
152    public Connection proxy(@Nullable Proxy proxy) {
153        req.proxy(proxy);
154        return this;
155    }
156
157    @Override
158    public Connection proxy(String host, int port) {
159        req.proxy(host, port);
160        return this;
161    }
162
163    @Override
164    public Connection userAgent(String userAgent) {
165        Validate.notNullParam(userAgent, "userAgent");
166        req.header(USER_AGENT, userAgent);
167        return this;
168    }
169
170    @Override
171    public Connection timeout(int millis) {
172        req.timeout(millis);
173        return this;
174    }
175
176    @Override
177    public Connection maxBodySize(int bytes) {
178        req.maxBodySize(bytes);
179        return this;
180    }
181
182    @Override
183    public Connection followRedirects(boolean followRedirects) {
184        req.followRedirects(followRedirects);
185        return this;
186    }
187
188    @Override
189    public Connection referrer(String referrer) {
190        Validate.notNullParam(referrer, "referrer");
191        req.header("Referer", referrer);
192        return this;
193    }
194
195    @Override
196    public Connection method(Method method) {
197        req.method(method);
198        return this;
199    }
200
201    @Override
202    public Connection ignoreHttpErrors(boolean ignoreHttpErrors) {
203                req.ignoreHttpErrors(ignoreHttpErrors);
204                return this;
205        }
206
207    @Override
208    public Connection ignoreContentType(boolean ignoreContentType) {
209        req.ignoreContentType(ignoreContentType);
210        return this;
211    }
212
213    @Override
214    public Connection data(String key, String value) {
215        req.data(KeyVal.create(key, value));
216        return this;
217    }
218
219    @Override
220    public Connection sslSocketFactory(SSLSocketFactory sslSocketFactory) {
221            req.sslSocketFactory(sslSocketFactory);
222            return this;
223    }
224
225    @Override
226    public Connection data(String key, String filename, InputStream inputStream) {
227        req.data(KeyVal.create(key, filename, inputStream));
228        return this;
229    }
230
231    @Override
232    public Connection data(String key, String filename, InputStream inputStream, String contentType) {
233        req.data(KeyVal.create(key, filename, inputStream).contentType(contentType));
234        return this;
235    }
236
237    @Override
238    public Connection data(Map<String, String> data) {
239        Validate.notNullParam(data, "data");
240        for (Map.Entry<String, String> entry : data.entrySet()) {
241            req.data(KeyVal.create(entry.getKey(), entry.getValue()));
242        }
243        return this;
244    }
245
246    @Override
247    public Connection data(String... keyvals) {
248        Validate.notNullParam(keyvals, "keyvals");
249        Validate.isTrue(keyvals.length %2 == 0, "Must supply an even number of key value pairs");
250        for (int i = 0; i < keyvals.length; i += 2) {
251            String key = keyvals[i];
252            String value = keyvals[i+1];
253            Validate.notEmpty(key, "Data key must not be empty");
254            Validate.notNull(value, "Data value must not be null");
255            req.data(KeyVal.create(key, value));
256        }
257        return this;
258    }
259
260    @Override
261    public Connection data(Collection<Connection.KeyVal> data) {
262        Validate.notNullParam(data, "data");
263        for (Connection.KeyVal entry: data) {
264            req.data(entry);
265        }
266        return this;
267    }
268
269    @Override
270    public Connection.KeyVal data(String key) {
271        Validate.notEmptyParam(key, "key");
272        for (Connection.KeyVal keyVal : request().data()) {
273            if (keyVal.key().equals(key))
274                return keyVal;
275        }
276        return null;
277    }
278
279    @Override
280    public Connection requestBody(String body) {
281        req.requestBody(body);
282        return this;
283    }
284
285    @Override
286    public Connection header(String name, String value) {
287        req.header(name, value);
288        return this;
289    }
290
291    @Override
292    public Connection headers(Map<String,String> headers) {
293        Validate.notNullParam(headers, "headers");
294        for (Map.Entry<String,String> entry : headers.entrySet()) {
295            req.header(entry.getKey(),entry.getValue());
296        }
297        return this;
298    }
299
300    @Override
301    public Connection cookie(String name, String value) {
302        req.cookie(name, value);
303        return this;
304    }
305
306    @Override
307    public Connection cookies(Map<String, String> cookies) {
308        Validate.notNullParam(cookies, "cookies");
309        for (Map.Entry<String, String> entry : cookies.entrySet()) {
310            req.cookie(entry.getKey(), entry.getValue());
311        }
312        return this;
313    }
314
315    @Override
316    public Connection cookieStore(CookieStore cookieStore) {
317        // create a new cookie manager using the new store
318        req.cookieManager = new CookieManager(cookieStore, null);
319        return this;
320    }
321
322    @Override
323    public CookieStore cookieStore() {
324        return req.cookieManager.getCookieStore();
325    }
326
327    @Override
328    public Connection parser(Parser parser) {
329        req.parser(parser);
330        return this;
331    }
332
333    @Override
334    public Document get() throws IOException {
335        req.method(Method.GET);
336        execute();
337        Validate.notNull(res);
338        return res.parse();
339    }
340
341    @Override
342    public Document post() throws IOException {
343        req.method(Method.POST);
344        execute();
345        Validate.notNull(res);
346        return res.parse();
347    }
348
349    @Override
350    public Connection.Response execute() throws IOException {
351        res = Response.execute(req);
352        return res;
353    }
354
355    @Override
356    public Connection.Request request() {
357        return req;
358    }
359
360    @Override
361    public Connection request(Connection.Request request) {
362        req = (HttpConnection.Request) request; // will throw a class-cast exception if the user has extended some but not all of Connection; that's desired
363        return this;
364    }
365
366    @Override
367    public Connection.Response response() {
368        if (res == null) {
369            throw new IllegalArgumentException("You must execute the request before getting a response.");
370        }
371        return res;
372    }
373
374    @Override
375    public Connection response(Connection.Response response) {
376        res = response;
377        return this;
378    }
379
380    @Override
381    public Connection postDataCharset(String charset) {
382        req.postDataCharset(charset);
383        return this;
384    }
385
386    @Override public Connection auth(RequestAuthenticator authenticator) {
387        req.auth(authenticator);
388        return this;
389    }
390
391    @Override public Connection onResponseProgress(Progress<Connection.Response> handler) {
392        req.responseProgress = handler;
393        return this;
394    }
395
396    @SuppressWarnings("unchecked")
397    private static abstract class Base<T extends Connection.Base<T>> implements Connection.Base<T> {
398        private static final URL UnsetUrl; // only used if you created a new Request()
399        static {
400            try {
401                UnsetUrl = new URL("http://undefined/");
402            } catch (MalformedURLException e) {
403                throw new IllegalStateException(e);
404            }
405        }
406
407        URL url = UnsetUrl;
408        Method method = Method.GET;
409        Map<String, List<String>> headers;
410        Map<String, String> cookies;
411
412        private Base() {
413            headers = new LinkedHashMap<>();
414            cookies = new LinkedHashMap<>();
415        }
416
417        private Base(Base<T> copy) {
418            url = copy.url; // unmodifiable object
419            method = copy.method;
420            headers = new LinkedHashMap<>();
421            for (Map.Entry<String, List<String>> entry : copy.headers.entrySet()) {
422                headers.put(entry.getKey(), new ArrayList<>(entry.getValue()));
423            }
424            cookies = new LinkedHashMap<>(); cookies.putAll(copy.cookies); // just holds strings
425        }
426
427        @Override
428        public URL url() {
429            if (url == UnsetUrl)
430                throw new IllegalArgumentException("URL not set. Make sure to call #url(...) before executing the request.");
431            return url;
432        }
433
434        @Override
435        public T url(URL url) {
436            Validate.notNullParam(url, "url");
437            this.url = new UrlBuilder(url).build();
438            return (T) this;
439        }
440
441        @Override
442        public Method method() {
443            return method;
444        }
445
446        @Override
447        public T method(Method method) {
448            Validate.notNullParam(method, "method");
449            this.method = method;
450            return (T) this;
451        }
452
453        @Override
454        public String header(String name) {
455            Validate.notNullParam(name, "name");
456            List<String> vals = getHeadersCaseInsensitive(name);
457            if (vals.size() > 0) {
458                // https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2
459                return StringUtil.join(vals, ", ");
460            }
461
462            return null;
463        }
464
465        @Override
466        public T addHeader(String name, @Nullable String value) {
467            Validate.notEmptyParam(name, "name");
468            //noinspection ConstantConditions
469            value = value == null ? "" : value;
470
471            List<String> values = headers(name);
472            if (values.isEmpty()) {
473                values = new ArrayList<>();
474                headers.put(name, values);
475            }
476            values.add(value);
477
478            return (T) this;
479        }
480
481        @Override
482        public List<String> headers(String name) {
483            Validate.notEmptyParam(name, "name");
484            return getHeadersCaseInsensitive(name);
485        }
486
487        @Override
488        public T header(String name, String value) {
489            Validate.notEmptyParam(name, "name");
490            removeHeader(name); // ensures we don't get an "accept-encoding" and a "Accept-Encoding"
491            addHeader(name, value);
492            return (T) this;
493        }
494
495        @Override
496        public boolean hasHeader(String name) {
497            Validate.notEmptyParam(name, "name");
498            return !getHeadersCaseInsensitive(name).isEmpty();
499        }
500
501        /**
502         * Test if the request has a header with this value (case insensitive).
503         */
504        @Override
505        public boolean hasHeaderWithValue(String name, String value) {
506            Validate.notEmpty(name);
507            Validate.notEmpty(value);
508            List<String> values = headers(name);
509            for (String candidate : values) {
510                if (value.equalsIgnoreCase(candidate))
511                    return true;
512            }
513            return false;
514        }
515
516        @Override
517        public T removeHeader(String name) {
518            Validate.notEmptyParam(name, "name");
519            Map.Entry<String, List<String>> entry = scanHeaders(name); // remove is case-insensitive too
520            if (entry != null)
521                headers.remove(entry.getKey()); // ensures correct case
522            return (T) this;
523        }
524
525        @Override
526        public Map<String, String> headers() {
527            LinkedHashMap<String, String> map = new LinkedHashMap<>(headers.size());
528            for (Map.Entry<String, List<String>> entry : headers.entrySet()) {
529                String header = entry.getKey();
530                List<String> values = entry.getValue();
531                if (values.size() > 0)
532                    map.put(header, values.get(0));
533            }
534            return map;
535        }
536
537        @Override
538        public Map<String, List<String>> multiHeaders() {
539            return headers;
540        }
541
542        private List<String> getHeadersCaseInsensitive(String name) {
543            Validate.notNull(name);
544
545            for (Map.Entry<String, List<String>> entry : headers.entrySet()) {
546                if (name.equalsIgnoreCase(entry.getKey()))
547                    return entry.getValue();
548            }
549
550            return Collections.emptyList();
551        }
552
553        private Map.@Nullable Entry<String, List<String>> scanHeaders(String name) {
554            String lc = lowerCase(name);
555            for (Map.Entry<String, List<String>> entry : headers.entrySet()) {
556                if (lowerCase(entry.getKey()).equals(lc))
557                    return entry;
558            }
559            return null;
560        }
561
562        @Override
563        public String cookie(String name) {
564            Validate.notEmptyParam(name, "name");
565            return cookies.get(name);
566        }
567
568        @Override
569        public T cookie(String name, String value) {
570            Validate.notEmptyParam(name, "name");
571            Validate.notNullParam(value, "value");
572            cookies.put(name, value);
573            return (T) this;
574        }
575
576        @Override
577        public boolean hasCookie(String name) {
578            Validate.notEmptyParam(name, "name");
579            return cookies.containsKey(name);
580        }
581
582        @Override
583        public T removeCookie(String name) {
584            Validate.notEmptyParam(name, "name");
585            cookies.remove(name);
586            return (T) this;
587        }
588
589        @Override
590        public Map<String, String> cookies() {
591            return cookies;
592        }
593    }
594
595    public static class Request extends HttpConnection.Base<Connection.Request> implements Connection.Request {
596        static {
597            System.setProperty("sun.net.http.allowRestrictedHeaders", "true");
598            // make sure that we can send Sec-Fetch-Site headers etc.
599        }
600
601        private @Nullable Proxy proxy;
602        private int timeoutMilliseconds;
603        private int maxBodySizeBytes;
604        private boolean followRedirects;
605        private final Collection<Connection.KeyVal> data;
606        private @Nullable String body = null;
607        private boolean ignoreHttpErrors = false;
608        private boolean ignoreContentType = false;
609        private Parser parser;
610        private boolean parserDefined = false; // called parser(...) vs initialized in ctor
611        private String postDataCharset = DataUtil.defaultCharsetName;
612        private @Nullable SSLSocketFactory sslSocketFactory;
613        private CookieManager cookieManager;
614        private @Nullable RequestAuthenticator authenticator;
615        private @Nullable Progress<Connection.Response> responseProgress;
616
617        private volatile boolean executing = false;
618
619        Request() {
620            super();
621            timeoutMilliseconds = 30000; // 30 seconds
622            maxBodySizeBytes = 1024 * 1024 * 2; // 2MB
623            followRedirects = true;
624            data = new ArrayList<>();
625            method = Method.GET;
626            addHeader("Accept-Encoding", "gzip");
627            addHeader(USER_AGENT, DEFAULT_UA);
628            parser = Parser.htmlParser();
629            cookieManager = new CookieManager(); // creates a default InMemoryCookieStore
630        }
631
632        Request(Request copy) {
633            super(copy);
634            proxy = copy.proxy;
635            postDataCharset = copy.postDataCharset;
636            timeoutMilliseconds = copy.timeoutMilliseconds;
637            maxBodySizeBytes = copy.maxBodySizeBytes;
638            followRedirects = copy.followRedirects;
639            data = new ArrayList<>(); // data not copied
640            //body not copied
641            ignoreHttpErrors = copy.ignoreHttpErrors;
642            ignoreContentType = copy.ignoreContentType;
643            parser = copy.parser.newInstance(); // parsers and their tree-builders maintain state, so need a fresh copy
644            parserDefined = copy.parserDefined;
645            sslSocketFactory = copy.sslSocketFactory; // these are all synchronized so safe to share
646            cookieManager = copy.cookieManager;
647            authenticator = copy.authenticator;
648            responseProgress = copy.responseProgress;
649            executing = false;
650        }
651
652        @Override
653        public Proxy proxy() {
654            return proxy;
655        }
656
657        @Override
658        public Request proxy(@Nullable Proxy proxy) {
659            this.proxy = proxy;
660            return this;
661        }
662
663        @Override
664        public Request proxy(String host, int port) {
665            this.proxy = new Proxy(Proxy.Type.HTTP, InetSocketAddress.createUnresolved(host, port));
666            return this;
667        }
668
669        @Override
670        public int timeout() {
671            return timeoutMilliseconds;
672        }
673
674        @Override
675        public Request timeout(int millis) {
676            Validate.isTrue(millis >= 0, "Timeout milliseconds must be 0 (infinite) or greater");
677            timeoutMilliseconds = millis;
678            return this;
679        }
680
681        @Override
682        public int maxBodySize() {
683            return maxBodySizeBytes;
684        }
685
686        @Override
687        public Connection.Request maxBodySize(int bytes) {
688            Validate.isTrue(bytes >= 0, "maxSize must be 0 (unlimited) or larger");
689            maxBodySizeBytes = bytes;
690            return this;
691        }
692
693        @Override
694        public boolean followRedirects() {
695            return followRedirects;
696        }
697
698        @Override
699        public Connection.Request followRedirects(boolean followRedirects) {
700            this.followRedirects = followRedirects;
701            return this;
702        }
703
704        @Override
705        public boolean ignoreHttpErrors() {
706            return ignoreHttpErrors;
707        }
708
709        @Override
710        public SSLSocketFactory sslSocketFactory() {
711            return sslSocketFactory;
712        }
713
714        @Override
715        public void sslSocketFactory(SSLSocketFactory sslSocketFactory) {
716            this.sslSocketFactory = sslSocketFactory;
717        }
718
719        @Override
720        public Connection.Request ignoreHttpErrors(boolean ignoreHttpErrors) {
721            this.ignoreHttpErrors = ignoreHttpErrors;
722            return this;
723        }
724
725        @Override
726        public boolean ignoreContentType() {
727            return ignoreContentType;
728        }
729
730        @Override
731        public Connection.Request ignoreContentType(boolean ignoreContentType) {
732            this.ignoreContentType = ignoreContentType;
733            return this;
734        }
735
736        @Override
737        public Request data(Connection.KeyVal keyval) {
738            Validate.notNullParam(keyval, "keyval");
739            data.add(keyval);
740            return this;
741        }
742
743        @Override
744        public Collection<Connection.KeyVal> data() {
745            return data;
746        }
747
748        @Override
749        public Connection.Request requestBody(@Nullable String body) {
750            this.body = body;
751            return this;
752        }
753
754        @Override
755        public String requestBody() {
756            return body;
757        }
758
759        @Override
760        public Request parser(Parser parser) {
761            this.parser = parser;
762            parserDefined = true;
763            return this;
764        }
765
766        @Override
767        public Parser parser() {
768            return parser;
769        }
770
771        @Override
772        public Connection.Request postDataCharset(String charset) {
773            Validate.notNullParam(charset, "charset");
774            if (!Charset.isSupported(charset)) throw new IllegalCharsetNameException(charset);
775            this.postDataCharset = charset;
776            return this;
777        }
778
779        @Override
780        public String postDataCharset() {
781            return postDataCharset;
782        }
783
784        CookieManager cookieManager() {
785            return cookieManager;
786        }
787
788        @Override public Connection.Request auth(@Nullable RequestAuthenticator authenticator) {
789            this.authenticator = authenticator;
790            return this;
791        }
792
793        @Override @Nullable public RequestAuthenticator auth() {
794            return authenticator;
795        }
796    }
797
798    public static class Response extends HttpConnection.Base<Connection.Response> implements Connection.Response {
799        private static final int MAX_REDIRECTS = 20;
800        private static final String LOCATION = "Location";
801        private final int statusCode;
802        private final String statusMessage;
803        private @Nullable ByteBuffer byteData;
804        private @Nullable ControllableInputStream bodyStream;
805        private @Nullable HttpURLConnection conn;
806        private @Nullable String charset;
807        private @Nullable final String contentType;
808        private boolean executed = false;
809        private boolean inputStreamRead = false;
810        private int numRedirects = 0;
811        private final HttpConnection.Request req;
812
813        /*
814         * Matches XML content types (like text/xml, image/svg+xml, application/xhtml+xml;charset=UTF8, etc)
815         */
816        private static final Pattern xmlContentTypeRxp = Pattern.compile("(\\w+)/\\w*\\+?xml.*");
817
818        /**
819         <b>Internal only! </b>Creates a dummy HttpConnection.Response, useful for testing. All actual responses
820         are created from the HttpURLConnection and fields defined.
821         */
822        Response() {
823            super();
824            statusCode = 400;
825            statusMessage = "Request not made";
826            req = new Request();
827            contentType = null;
828        }
829
830        static Response execute(HttpConnection.Request req) throws IOException {
831            return execute(req, null);
832        }
833
834        static Response execute(HttpConnection.Request req, @Nullable Response previousResponse) throws IOException {
835            synchronized (req) {
836                Validate.isFalse(req.executing, "Multiple threads were detected trying to execute the same request concurrently. Make sure to use Connection#newRequest() and do not share an executing request between threads.");
837                req.executing = true;
838            }
839            Validate.notNullParam(req, "req");
840            URL url = req.url();
841            Validate.notNull(url, "URL must be specified to connect");
842            String protocol = url.getProtocol();
843            if (!protocol.equals("http") && !protocol.equals("https"))
844                throw new MalformedURLException("Only http & https protocols supported");
845            final boolean methodHasBody = req.method().hasBody();
846            final boolean hasRequestBody = req.requestBody() != null;
847            if (!methodHasBody)
848                Validate.isFalse(hasRequestBody, "Cannot set a request body for HTTP method " + req.method());
849
850            // set up the request for execution
851            String mimeBoundary = null;
852            if (req.data().size() > 0 && (!methodHasBody || hasRequestBody))
853                serialiseRequestUrl(req);
854            else if (methodHasBody)
855                mimeBoundary = setOutputContentType(req);
856
857            long startTime = System.nanoTime();
858            HttpURLConnection conn = createConnection(req);
859            Response res = null;
860            try {
861                conn.connect();
862                if (conn.getDoOutput()) {
863                    OutputStream out = conn.getOutputStream();
864                    try { writePost(req, out, mimeBoundary); }
865                    catch (IOException e) { conn.disconnect(); throw e; }
866                    finally { out.close(); }
867                }
868
869                int status = conn.getResponseCode();
870                res = new Response(conn, req, previousResponse);
871
872                // redirect if there's a location header (from 3xx, or 201 etc)
873                if (res.hasHeader(LOCATION) && req.followRedirects()) {
874                    if (status != HTTP_TEMP_REDIR) {
875                        req.method(Method.GET); // always redirect with a get. any data param from original req are dropped.
876                        req.data().clear();
877                        req.requestBody(null);
878                        req.removeHeader(CONTENT_TYPE);
879                    }
880
881                    String location = res.header(LOCATION);
882                    Validate.notNull(location);
883                    if (location.startsWith("http:/") && location.charAt(6) != '/') // fix broken Location: http:/temp/AAG_New/en/index.php
884                        location = location.substring(6);
885                    URL redir = StringUtil.resolve(req.url(), location);
886                    req.url(redir);
887
888                    req.executing = false;
889                    return execute(req, res);
890                }
891                if ((status < 200 || status >= 400) && !req.ignoreHttpErrors())
892                        throw new HttpStatusException("HTTP error fetching URL", status, req.url().toString());
893
894                // check that we can handle the returned content type; if not, abort before fetching it
895                String contentType = res.contentType();
896                if (contentType != null
897                        && !req.ignoreContentType()
898                        && !contentType.startsWith("text/")
899                        && !xmlContentTypeRxp.matcher(contentType).matches()
900                        )
901                    throw new UnsupportedMimeTypeException("Unhandled content type. Must be text/*, */xml, or */*+xml",
902                            contentType, req.url().toString());
903
904                // switch to the XML parser if content type is xml and not parser not explicitly set
905                if (contentType != null && xmlContentTypeRxp.matcher(contentType).matches()) {
906                    if (!req.parserDefined) req.parser(Parser.xmlParser());
907                }
908
909                res.charset = DataUtil.getCharsetFromContentType(res.contentType); // may be null, readInputStream deals with it
910                if (conn.getContentLength() != 0 && req.method() != HEAD) { // -1 means unknown, chunked. sun throws an IO exception on 500 response with no content when trying to read body
911                    InputStream stream = conn.getErrorStream() != null ? conn.getErrorStream() : conn.getInputStream();
912                    if (res.hasHeaderWithValue(CONTENT_ENCODING, "gzip"))
913                        stream = new GZIPInputStream(stream);
914                    else if (res.hasHeaderWithValue(CONTENT_ENCODING, "deflate"))
915                        stream = new InflaterInputStream(stream, new Inflater(true));
916                    
917                    res.bodyStream = ControllableInputStream.wrap(
918                        stream, SharedConstants.DefaultBufferSize, req.maxBodySize())
919                        .timeout(startTime, req.timeout());
920
921                    if (req.responseProgress != null) // set response progress listener
922                        res.bodyStream.onProgress(conn.getContentLength(), req.responseProgress, res);
923                } else {
924                    res.byteData = DataUtil.emptyByteBuffer();
925                }
926            } catch (IOException e) {
927                if (res != null) res.safeClose(); // will be non-null if got to conn
928                throw e;
929            } finally {
930                req.executing = false;
931
932                // detach any thread local auth delegate
933                if (req.authenticator != null)
934                    AuthenticationHandler.handler.remove();
935            }
936
937            res.executed = true;
938            return res;
939        }
940
941        @Override
942        public int statusCode() {
943            return statusCode;
944        }
945
946        @Override
947        public String statusMessage() {
948            return statusMessage;
949        }
950
951        @Override
952        public String charset() {
953            return charset;
954        }
955
956        @Override
957        public Response charset(String charset) {
958            this.charset = charset;
959            return this;
960        }
961
962        @Override
963        public String contentType() {
964            return contentType;
965        }
966
967        /** Called from parse() or streamParser(), validates and prepares the input stream, and aligns common settings. */
968        private InputStream prepareParse() {
969            Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before parsing response");
970            InputStream stream = bodyStream;
971            if (byteData != null) { // bytes have been read in to the buffer, parse that
972                stream = new ByteArrayInputStream(byteData.array());
973                inputStreamRead = false; // ok to reparse if in bytes
974            }
975            Validate.isFalse(inputStreamRead, "Input stream already read and parsed, cannot re-read.");
976            Validate.notNull(stream);
977            inputStreamRead = true;
978            return stream;
979        }
980
981        @Override public Document parse() throws IOException {
982            InputStream stream = prepareParse();
983            Document doc = DataUtil.parseInputStream(stream, charset, url.toExternalForm(), req.parser());
984            doc.connection(new HttpConnection(req, this)); // because we're static, don't have the connection obj. // todo - maybe hold in the req?
985            charset = doc.outputSettings().charset().name(); // update charset from meta-equiv, possibly
986            safeClose();
987            return doc;
988        }
989
990        @Override public StreamParser streamParser() throws IOException {
991            InputStream stream = prepareParse();
992            String baseUri = url.toExternalForm();
993            DataUtil.CharsetDoc charsetDoc = DataUtil.detectCharset(stream, charset, baseUri, req.parser());
994            // note that there may be a document in CharsetDoc as a result of scanning meta-data -- but as requires a stream parse, it is not used here. todo - revisit.
995
996            // set up the stream parser and rig this connection up to the parsed doc:
997            StreamParser streamer = new StreamParser(req.parser());
998            BufferedReader reader = new BufferedReader(new InputStreamReader(stream, charsetDoc.charset));
999            DataUtil.maybeSkipBom(reader, charsetDoc);
1000            streamer.parse(reader, baseUri); // initializes the parse and the document, but does not step() it
1001            streamer.document().connection(new HttpConnection(req, this));
1002            charset = charsetDoc.charset.name();
1003
1004            // we don't safeClose() as in parse(); caller must close streamParser to close InputStream stream
1005            return streamer;
1006        }
1007
1008        private void prepareByteData() {
1009            Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body");
1010            if (bodyStream != null && byteData == null) {
1011                Validate.isFalse(inputStreamRead, "Request has already been read (with .parse())");
1012                try {
1013                    byteData = DataUtil.readToByteBuffer(bodyStream, req.maxBodySize());
1014                } catch (IOException e) {
1015                    throw new UncheckedIOException(e);
1016                } finally {
1017                    inputStreamRead = true;
1018                    safeClose();
1019                }
1020            }
1021        }
1022
1023        @Override
1024        public String body() {
1025            prepareByteData();
1026            Validate.notNull(byteData);
1027            // charset gets set from header on execute, and from meta-equiv on parse. parse may not have happened yet
1028            String body = (charset == null ? UTF_8 : Charset.forName(charset))
1029                .decode(byteData).toString();
1030            ((Buffer)byteData).rewind(); // cast to avoid covariant return type change in jdk9
1031            return body;
1032        }
1033
1034        @Override
1035        public byte[] bodyAsBytes() {
1036            prepareByteData();
1037            Validate.notNull(byteData);
1038            return byteData.array();
1039        }
1040
1041        @Override
1042        public Connection.Response bufferUp() {
1043            prepareByteData();
1044            return this;
1045        }
1046
1047        @Override
1048        public BufferedInputStream bodyStream() {
1049            Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body");
1050
1051            // if we have read to bytes (via buffer up), return those as a stream.
1052            if (byteData != null) {
1053                return new BufferedInputStream(new ByteArrayInputStream(byteData.array()), SharedConstants.DefaultBufferSize);
1054            }
1055
1056            Validate.isFalse(inputStreamRead, "Request has already been read");
1057            Validate.notNull(bodyStream);
1058            inputStreamRead = true;
1059            return bodyStream.inputStream();
1060        }
1061
1062        // set up connection defaults, and details from request
1063        private static HttpURLConnection createConnection(HttpConnection.Request req) throws IOException {
1064            Proxy proxy = req.proxy();
1065            final HttpURLConnection conn = (HttpURLConnection) (
1066                proxy == null ?
1067                req.url().openConnection() :
1068                req.url().openConnection(proxy)
1069            );
1070
1071            conn.setRequestMethod(req.method().name());
1072            conn.setInstanceFollowRedirects(false); // don't rely on native redirection support
1073            conn.setConnectTimeout(req.timeout());
1074            conn.setReadTimeout(req.timeout() / 2); // gets reduced after connection is made and status is read
1075
1076            if (req.sslSocketFactory() != null && conn instanceof HttpsURLConnection)
1077                ((HttpsURLConnection) conn).setSSLSocketFactory(req.sslSocketFactory());
1078            if (req.authenticator != null)
1079                AuthenticationHandler.handler.enable(req.authenticator, conn); // removed in finally
1080            if (req.method().hasBody())
1081                conn.setDoOutput(true);
1082            CookieUtil.applyCookiesToRequest(req, conn); // from the Request key/val cookies and the Cookie Store
1083            for (Map.Entry<String, List<String>> header : req.multiHeaders().entrySet()) {
1084                for (String value : header.getValue()) {
1085                    conn.addRequestProperty(header.getKey(), value);
1086                }
1087            }
1088            return conn;
1089        }
1090
1091        /**
1092         * Call on completion of stream read, to close the body (or error) stream. The connection.disconnect allows
1093         * keep-alives to work (as the underlying connection is actually held open, despite the name).
1094         */
1095        private void safeClose() {
1096            if (bodyStream != null) {
1097                try {
1098                    bodyStream.close();
1099                } catch (IOException e) {
1100                    // no-op
1101                } finally {
1102                    bodyStream = null;
1103                }
1104            }
1105            if (conn != null) {
1106                conn.disconnect();
1107                conn = null;
1108            }
1109        }
1110
1111        // set up url, method, header, cookies
1112        private Response(HttpURLConnection conn, HttpConnection.Request request, HttpConnection.@Nullable Response previousResponse) throws IOException {
1113            this.conn = conn;
1114            this.req = request;
1115            method = Method.valueOf(conn.getRequestMethod());
1116            url = conn.getURL();
1117            statusCode = conn.getResponseCode();
1118            statusMessage = conn.getResponseMessage();
1119            contentType = conn.getContentType();
1120
1121            Map<String, List<String>> resHeaders = createHeaderMap(conn);
1122            processResponseHeaders(resHeaders); // includes cookie key/val read during header scan
1123            CookieUtil.storeCookies(req, url, resHeaders); // add set cookies to cookie store
1124
1125            if (previousResponse != null) { // was redirected
1126                // map previous response cookies into this response cookies() object
1127                for (Map.Entry<String, String> prevCookie : previousResponse.cookies().entrySet()) {
1128                    if (!hasCookie(prevCookie.getKey()))
1129                        cookie(prevCookie.getKey(), prevCookie.getValue());
1130                }
1131                previousResponse.safeClose();
1132
1133                // enforce too many redirects:
1134                numRedirects = previousResponse.numRedirects + 1;
1135                if (numRedirects >= MAX_REDIRECTS)
1136                    throw new IOException(String.format("Too many redirects occurred trying to load URL %s", previousResponse.url()));
1137            }
1138        }
1139
1140        private static LinkedHashMap<String, List<String>> createHeaderMap(HttpURLConnection conn) {
1141            // the default sun impl of conn.getHeaderFields() returns header values out of order
1142            final LinkedHashMap<String, List<String>> headers = new LinkedHashMap<>();
1143            int i = 0;
1144            while (true) {
1145                final String key = conn.getHeaderFieldKey(i);
1146                final String val = conn.getHeaderField(i);
1147                if (key == null && val == null)
1148                    break;
1149                i++;
1150                if (key == null || val == null)
1151                    continue; // skip http1.1 line
1152
1153                final List<String> vals = headers.computeIfAbsent(key, Functions.listFunction());
1154                vals.add(val);
1155            }
1156            return headers;
1157        }
1158
1159        void processResponseHeaders(Map<String, List<String>> resHeaders) {
1160            for (Map.Entry<String, List<String>> entry : resHeaders.entrySet()) {
1161                String name = entry.getKey();
1162                if (name == null)
1163                    continue; // http/1.1 line
1164
1165                List<String> values = entry.getValue();
1166                if (name.equalsIgnoreCase("Set-Cookie")) {
1167                    for (String value : values) {
1168                        if (value == null)
1169                            continue;
1170                        TokenQueue cd = new TokenQueue(value);
1171                        String cookieName = cd.chompTo("=").trim();
1172                        String cookieVal = cd.consumeTo(";").trim();
1173                        // ignores path, date, domain, validateTLSCertificates et al. full details will be available in cookiestore if required
1174                        // name not blank, value not null
1175                        if (cookieName.length() > 0 && !cookies.containsKey(cookieName)) // if duplicates, only keep the first
1176                            cookie(cookieName, cookieVal);
1177                    }
1178                }
1179                for (String value : values) {
1180                    addHeader(name, fixHeaderEncoding(value));
1181                }
1182            }
1183        }
1184
1185        /**
1186         Servers may encode response headers in UTF-8 instead of RFC defined 8859. This method attempts to detect that
1187         and re-decode the string as UTF-8.
1188         * @param val a header value string that may have been incorrectly decoded as 8859.
1189         * @return a potentially re-decoded string.
1190         */
1191        @Nullable
1192        private static String fixHeaderEncoding(@Nullable String val) {
1193            if (val == null) return val;
1194            byte[] bytes = val.getBytes(ISO_8859_1);
1195            if (looksLikeUtf8(bytes))
1196                return new String(bytes, UTF_8);
1197            else
1198                return val;
1199        }
1200
1201        private static boolean looksLikeUtf8(byte[] input) {
1202            int i = 0;
1203            // BOM:
1204            if (input.length >= 3
1205                && (input[0] & 0xFF) == 0xEF
1206                && (input[1] & 0xFF) == 0xBB
1207                && (input[2] & 0xFF) == 0xBF) {
1208                i = 3;
1209            }
1210
1211            int end;
1212            boolean foundNonAscii = false;
1213            for (int j = input.length; i < j; ++i) {
1214                int o = input[i];
1215                if ((o & 0x80) == 0) {
1216                    continue; // ASCII
1217                }
1218                foundNonAscii = true;
1219
1220                // UTF-8 leading:
1221                if ((o & 0xE0) == 0xC0) {
1222                    end = i + 1;
1223                } else if ((o & 0xF0) == 0xE0) {
1224                    end = i + 2;
1225                } else if ((o & 0xF8) == 0xF0) {
1226                    end = i + 3;
1227                } else {
1228                    return false;
1229                }
1230
1231                if (end >= input.length)
1232                    return false;
1233
1234                while (i < end) {
1235                    i++;
1236                    o = input[i];
1237                    if ((o & 0xC0) != 0x80) {
1238                        return false;
1239                    }
1240                }
1241            }
1242            return foundNonAscii;
1243        }
1244
1245        private @Nullable static String setOutputContentType(final Connection.Request req) {
1246            final String contentType = req.header(CONTENT_TYPE);
1247            String bound = null;
1248            if (contentType != null) {
1249                // no-op; don't add content type as already set (e.g. for requestBody())
1250                // todo - if content type already set, we could add charset
1251
1252                // if user has set content type to multipart/form-data, auto add boundary.
1253                if(contentType.contains(MULTIPART_FORM_DATA) && !contentType.contains("boundary")) {
1254                    bound = DataUtil.mimeBoundary();
1255                    req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound);
1256                }
1257
1258            }
1259            else if (needsMultipart(req)) {
1260                bound = DataUtil.mimeBoundary();
1261                req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound);
1262            } else {
1263                req.header(CONTENT_TYPE, FORM_URL_ENCODED + "; charset=" + req.postDataCharset());
1264            }
1265            return bound;
1266        }
1267
1268        private static void writePost(final Connection.Request req, final OutputStream outputStream, @Nullable final String boundary) throws IOException {
1269            final Collection<Connection.KeyVal> data = req.data();
1270            final BufferedWriter w = new BufferedWriter(new OutputStreamWriter(outputStream, Charset.forName(req.postDataCharset())));
1271
1272            if (boundary != null) {
1273                // boundary will be set if we're in multipart mode
1274                for (Connection.KeyVal keyVal : data) {
1275                    w.write("--");
1276                    w.write(boundary);
1277                    w.write("\r\n");
1278                    w.write("Content-Disposition: form-data; name=\"");
1279                    w.write(encodeMimeName(keyVal.key())); // encodes " to %22
1280                    w.write("\"");
1281                    final InputStream input = keyVal.inputStream();
1282                    if (input != null) {
1283                        w.write("; filename=\"");
1284                        w.write(encodeMimeName(keyVal.value()));
1285                        w.write("\"\r\nContent-Type: ");
1286                        String contentType = keyVal.contentType();
1287                        w.write(contentType != null ? contentType : DefaultUploadType);
1288                        w.write("\r\n\r\n");
1289                        w.flush(); // flush
1290                        DataUtil.crossStreams(input, outputStream);
1291                        outputStream.flush();
1292                    } else {
1293                        w.write("\r\n\r\n");
1294                        w.write(keyVal.value());
1295                    }
1296                    w.write("\r\n");
1297                }
1298                w.write("--");
1299                w.write(boundary);
1300                w.write("--");
1301            } else {
1302                String body = req.requestBody();
1303                if (body != null) {
1304                    // data will be in query string, we're sending a plaintext body
1305                    w.write(body);
1306                }
1307                else {
1308                    // regular form data (application/x-www-form-urlencoded)
1309                    boolean first = true;
1310                    for (Connection.KeyVal keyVal : data) {
1311                        if (!first)
1312                            w.append('&');
1313                        else
1314                            first = false;
1315
1316                        w.write(URLEncoder.encode(keyVal.key(), req.postDataCharset()));
1317                        w.write('=');
1318                        w.write(URLEncoder.encode(keyVal.value(), req.postDataCharset()));
1319                    }
1320                }
1321            }
1322            w.close();
1323        }
1324
1325        // for get url reqs, serialise the data map into the url
1326        private static void serialiseRequestUrl(Connection.Request req) throws IOException {
1327            UrlBuilder in = new UrlBuilder(req.url());
1328
1329            for (Connection.KeyVal keyVal : req.data()) {
1330                Validate.isFalse(keyVal.hasInputStream(), "InputStream data not supported in URL query string.");
1331                in.appendKeyVal(keyVal);
1332            }
1333            req.url(in.build());
1334            req.data().clear(); // moved into url as get params
1335        }
1336    }
1337
1338    private static boolean needsMultipart(Connection.Request req) {
1339        // multipart mode, for files. add the header if we see something with an inputstream, and return a non-null boundary
1340        for (Connection.KeyVal keyVal : req.data()) {
1341            if (keyVal.hasInputStream())
1342                return true;
1343        }
1344        return false;
1345    }
1346
1347    public static class KeyVal implements Connection.KeyVal {
1348        private String key;
1349        private String value;
1350        private @Nullable InputStream stream;
1351        private @Nullable String contentType;
1352
1353        public static KeyVal create(String key, String value) {
1354            return new KeyVal(key, value);
1355        }
1356
1357        public static KeyVal create(String key, String filename, InputStream stream) {
1358            return new KeyVal(key, filename)
1359                .inputStream(stream);
1360        }
1361
1362        private KeyVal(String key, String value) {
1363            Validate.notEmptyParam(key, "key");
1364            Validate.notNullParam(value, "value");
1365            this.key = key;
1366            this.value = value;
1367        }
1368
1369        @Override
1370        public KeyVal key(String key) {
1371            Validate.notEmptyParam(key, "key");
1372            this.key = key;
1373            return this;
1374        }
1375
1376        @Override
1377        public String key() {
1378            return key;
1379        }
1380
1381        @Override
1382        public KeyVal value(String value) {
1383            Validate.notNullParam(value, "value");
1384            this.value = value;
1385            return this;
1386        }
1387
1388        @Override
1389        public String value() {
1390            return value;
1391        }
1392
1393        public KeyVal inputStream(InputStream inputStream) {
1394            Validate.notNullParam(value, "inputStream");
1395            this.stream = inputStream;
1396            return this;
1397        }
1398
1399        @Override
1400        public InputStream inputStream() {
1401            return stream;
1402        }
1403
1404        @Override
1405        public boolean hasInputStream() {
1406            return stream != null;
1407        }
1408
1409        @Override
1410        public Connection.KeyVal contentType(String contentType) {
1411            Validate.notEmpty(contentType);
1412            this.contentType = contentType;
1413            return this;
1414        }
1415
1416        @Override
1417        public String contentType() {
1418            return contentType;
1419        }
1420
1421        @Override
1422        public String toString() {
1423            return key + "=" + value;
1424        }
1425    }
1426}