001package org.jsoup.helper;
002
003import org.jsoup.Connection;
004import org.jsoup.HttpStatusException;
005import org.jsoup.Progress;
006import org.jsoup.UnsupportedMimeTypeException;
007import org.jsoup.internal.ControllableInputStream;
008import org.jsoup.internal.StringUtil;
009import org.jsoup.nodes.Document;
010import org.jsoup.parser.Parser;
011import org.jsoup.parser.StreamParser;
012import org.jspecify.annotations.Nullable;
013
014import javax.net.ssl.SSLSocketFactory;
015import java.io.BufferedInputStream;
016import java.io.BufferedReader;
017import java.io.BufferedWriter;
018import java.io.ByteArrayInputStream;
019import java.io.IOException;
020import java.io.InputStream;
021import java.io.InputStreamReader;
022import java.io.OutputStream;
023import java.io.OutputStreamWriter;
024import java.io.UncheckedIOException;
025import java.net.CookieManager;
026import java.net.CookieStore;
027import java.net.InetSocketAddress;
028import java.net.MalformedURLException;
029import java.net.Proxy;
030import java.net.URL;
031import java.net.URLEncoder;
032import java.nio.Buffer;
033import java.nio.ByteBuffer;
034import java.nio.charset.Charset;
035import java.nio.charset.IllegalCharsetNameException;
036import java.util.ArrayList;
037import java.util.Collection;
038import java.util.Collections;
039import java.util.LinkedHashMap;
040import java.util.List;
041import java.util.Map;
042import java.util.regex.Pattern;
043import java.util.zip.GZIPInputStream;
044import java.util.zip.Inflater;
045import java.util.zip.InflaterInputStream;
046
047import static org.jsoup.Connection.Method.HEAD;
048import static org.jsoup.helper.DataUtil.UTF_8;
049import static org.jsoup.internal.Normalizer.lowerCase;
050import static org.jsoup.internal.SharedConstants.DefaultBufferSize;
051
052/**
053 * Implementation of {@link Connection}.
054 * @see org.jsoup.Jsoup#connect(String)
055 */
056@SuppressWarnings("CharsetObjectCanBeUsed")
057public class HttpConnection implements Connection {
058    public static final String CONTENT_ENCODING = "Content-Encoding";
059    /**
060     * Many users would get caught by not setting a user-agent and therefore getting different responses on their desktop
061     * vs in jsoup, which would otherwise default to {@code Java}. So by default, use a desktop UA.
062     */
063    public static final String DEFAULT_UA =
064        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36";
065    private static final String USER_AGENT = "User-Agent";
066    public static final String CONTENT_TYPE = "Content-Type";
067    public static final String MULTIPART_FORM_DATA = "multipart/form-data";
068    public static final String FORM_URL_ENCODED = "application/x-www-form-urlencoded";
069    private static final int HTTP_TEMP_REDIR = 307; // http/1.1 temporary redirect, not in Java's set.
070    static final String DefaultUploadType = "application/octet-stream";
071    private static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");
072
073    private HttpConnection.Request req;
074    private Connection.@Nullable Response res;
075    @Nullable Object client; // The HttpClient for this Connection, if via the HttpClientExecutor
076    @Nullable RequestAuthenticator lastAuth; // The previous Authenticator used by this Connection, if via the HttpClientExecutor
077
078    /**
079     Create a new Connection, with the request URL specified.
080     @param url the URL to fetch from
081     @return a new Connection object
082     */
083    public static Connection connect(String url) {
084        Connection con = new HttpConnection();
085        con.url(url);
086        return con;
087    }
088
089    /**
090     Create a new Connection, with the request URL specified.
091     @param url the URL to fetch from
092     @return a new Connection object
093     */
094    public static Connection connect(URL url) {
095        Connection con = new HttpConnection();
096        con.url(url);
097        return con;
098    }
099
100    /**
101     Create a new, empty HttpConnection.
102     */
103    public HttpConnection() {
104        req = new Request();
105        req.connection = this;
106    }
107
108    /**
109     Create a new Request by deep-copying an existing Request. Note that the data and body of the original are not
110     copied. All other settings (proxy, parser, cookies, etc) are copied.
111     @param copy the request to copy
112     */
113    HttpConnection(Request copy) {
114        req = new Request(copy);
115    }
116
117    static String encodeMimeName(String val) {
118        return val.replace("\"", "%22");
119    }
120
121    @Override
122    public Connection newRequest() {
123        // copy the prototype request for the different settings, cookie manager, etc
124        return new HttpConnection(req);
125    }
126
127    /** Create a new Connection that just wraps the provided Request and Response */
128    private HttpConnection(Request req, Response res) {
129        this.req = req;
130        this.res = res;
131    }
132
133    @Override
134    public Connection url(URL url) {
135        req.url(url);
136        return this;
137    }
138
139    @Override
140    public Connection url(String url) {
141        Validate.notEmptyParam(url, "url");
142        try {
143            req.url(new URL(url));
144        } catch (MalformedURLException e) {
145            throw new IllegalArgumentException(String.format("The supplied URL, '%s', is malformed. Make sure it is an absolute URL, and starts with 'http://' or 'https://'. See https://jsoup.org/cookbook/extracting-data/working-with-urls", url), e);
146        }
147        return this;
148    }
149
150    @Override
151    public Connection proxy(@Nullable Proxy proxy) {
152        req.proxy(proxy);
153        return this;
154    }
155
156    @Override
157    public Connection proxy(String host, int port) {
158        req.proxy(host, port);
159        return this;
160    }
161
162    @Override
163    public Connection userAgent(String userAgent) {
164        Validate.notNullParam(userAgent, "userAgent");
165        req.header(USER_AGENT, userAgent);
166        return this;
167    }
168
169    @Override
170    public Connection timeout(int millis) {
171        req.timeout(millis);
172        return this;
173    }
174
175    @Override
176    public Connection maxBodySize(int bytes) {
177        req.maxBodySize(bytes);
178        return this;
179    }
180
181    @Override
182    public Connection followRedirects(boolean followRedirects) {
183        req.followRedirects(followRedirects);
184        return this;
185    }
186
187    @Override
188    public Connection referrer(String referrer) {
189        Validate.notNullParam(referrer, "referrer");
190        req.header("Referer", referrer);
191        return this;
192    }
193
194    @Override
195    public Connection method(Method method) {
196        req.method(method);
197        return this;
198    }
199
200    @Override
201    public Connection ignoreHttpErrors(boolean ignoreHttpErrors) {
202                req.ignoreHttpErrors(ignoreHttpErrors);
203                return this;
204        }
205
206    @Override
207    public Connection ignoreContentType(boolean ignoreContentType) {
208        req.ignoreContentType(ignoreContentType);
209        return this;
210    }
211
212    @Override
213    public Connection data(String key, String value) {
214        req.data(KeyVal.create(key, value));
215        return this;
216    }
217
218    @Override
219    public Connection sslSocketFactory(SSLSocketFactory sslSocketFactory) {
220            req.sslSocketFactory(sslSocketFactory);
221            return this;
222    }
223
224    @Override
225    public Connection data(String key, String filename, InputStream inputStream) {
226        req.data(KeyVal.create(key, filename, inputStream));
227        return this;
228    }
229
230    @Override
231    public Connection data(String key, String filename, InputStream inputStream, String contentType) {
232        req.data(KeyVal.create(key, filename, inputStream).contentType(contentType));
233        return this;
234    }
235
236    @Override
237    public Connection data(Map<String, String> data) {
238        Validate.notNullParam(data, "data");
239        for (Map.Entry<String, String> entry : data.entrySet()) {
240            req.data(KeyVal.create(entry.getKey(), entry.getValue()));
241        }
242        return this;
243    }
244
245    @Override
246    public Connection data(String... keyvals) {
247        Validate.notNullParam(keyvals, "keyvals");
248        Validate.isTrue(keyvals.length %2 == 0, "Must supply an even number of key value pairs");
249        for (int i = 0; i < keyvals.length; i += 2) {
250            String key = keyvals[i];
251            String value = keyvals[i+1];
252            Validate.notEmpty(key, "Data key must not be empty");
253            Validate.notNull(value, "Data value must not be null");
254            req.data(KeyVal.create(key, value));
255        }
256        return this;
257    }
258
259    @Override
260    public Connection data(Collection<Connection.KeyVal> data) {
261        Validate.notNullParam(data, "data");
262        for (Connection.KeyVal entry: data) {
263            req.data(entry);
264        }
265        return this;
266    }
267
268    @Override
269    public Connection.@Nullable KeyVal data(String key) {
270        Validate.notEmptyParam(key, "key");
271        for (Connection.KeyVal keyVal : request().data()) {
272            if (keyVal.key().equals(key))
273                return keyVal;
274        }
275        return null;
276    }
277
278    @Override
279    public Connection requestBody(String body) {
280        req.requestBody(body);
281        return this;
282    }
283
284    @Override
285    public Connection header(String name, String value) {
286        req.header(name, value);
287        return this;
288    }
289
290    @Override
291    public Connection headers(Map<String,String> headers) {
292        Validate.notNullParam(headers, "headers");
293        for (Map.Entry<String,String> entry : headers.entrySet()) {
294            req.header(entry.getKey(),entry.getValue());
295        }
296        return this;
297    }
298
299    @Override
300    public Connection cookie(String name, String value) {
301        req.cookie(name, value);
302        return this;
303    }
304
305    @Override
306    public Connection cookies(Map<String, String> cookies) {
307        Validate.notNullParam(cookies, "cookies");
308        for (Map.Entry<String, String> entry : cookies.entrySet()) {
309            req.cookie(entry.getKey(), entry.getValue());
310        }
311        return this;
312    }
313
314    @Override
315    public Connection cookieStore(CookieStore cookieStore) {
316        // create a new cookie manager using the new store
317        req.cookieManager = new CookieManager(cookieStore, null);
318        return this;
319    }
320
321    @Override
322    public CookieStore cookieStore() {
323        return req.cookieManager.getCookieStore();
324    }
325
326    @Override
327    public Connection parser(Parser parser) {
328        req.parser(parser);
329        return this;
330    }
331
332    @Override
333    public Document get() throws IOException {
334        req.method(Method.GET);
335        execute();
336        Validate.notNull(res);
337        return res.parse();
338    }
339
340    @Override
341    public Document post() throws IOException {
342        req.method(Method.POST);
343        execute();
344        Validate.notNull(res);
345        return res.parse();
346    }
347
348    @Override
349    public Connection.Response execute() throws IOException {
350        res = Response.execute(req);
351        return res;
352    }
353
354    @Override
355    public Connection.Request request() {
356        return req;
357    }
358
359    @Override
360    public Connection request(Connection.Request request) {
361        req = (HttpConnection.Request) request; // will throw a class-cast exception if the user has extended some but not all of Connection; that's desired
362        return this;
363    }
364
365    @Override
366    public Connection.Response response() {
367        if (res == null) {
368            throw new IllegalArgumentException("You must execute the request before getting a response.");
369        }
370        return res;
371    }
372
373    @Override
374    public Connection response(Connection.Response response) {
375        res = response;
376        return this;
377    }
378
379    @Override
380    public Connection postDataCharset(String charset) {
381        req.postDataCharset(charset);
382        return this;
383    }
384
385    @Override public Connection auth(@Nullable RequestAuthenticator authenticator) {
386        req.auth(authenticator);
387        return this;
388    }
389
390    @Override public Connection onResponseProgress(Progress<Connection.Response> handler) {
391        req.responseProgress = handler;
392        return this;
393    }
394
395    @SuppressWarnings("unchecked")
396    private static abstract class Base<T extends Connection.Base<T>> implements Connection.Base<T> {
397        private static final URL UnsetUrl; // only used if you created a new Request()
398        static {
399            try {
400                UnsetUrl = new URL("http://undefined/");
401            } catch (MalformedURLException e) {
402                throw new IllegalStateException(e);
403            }
404        }
405
406        URL url = UnsetUrl;
407        Method method = Method.GET;
408        Map<String, List<String>> headers;
409        Map<String, String> cookies;
410
411        private Base() {
412            headers = new LinkedHashMap<>();
413            cookies = new LinkedHashMap<>();
414        }
415
416        private Base(Base<T> copy) {
417            url = copy.url; // unmodifiable object
418            method = copy.method;
419            headers = new LinkedHashMap<>();
420            for (Map.Entry<String, List<String>> entry : copy.headers.entrySet()) {
421                headers.put(entry.getKey(), new ArrayList<>(entry.getValue()));
422            }
423            cookies = new LinkedHashMap<>(); cookies.putAll(copy.cookies); // just holds strings
424        }
425
426        @Override
427        public URL url() {
428            if (url == UnsetUrl)
429                throw new IllegalArgumentException("URL not set. Make sure to call #url(...) before executing the request.");
430            return url;
431        }
432
433        @Override
434        public T url(URL url) {
435            Validate.notNullParam(url, "url");
436            this.url = new UrlBuilder(url).build();
437            return (T) this;
438        }
439
440        @Override
441        public Method method() {
442            return method;
443        }
444
445        @Override
446        public T method(Method method) {
447            Validate.notNullParam(method, "method");
448            this.method = method;
449            return (T) this;
450        }
451
452        @Override @Nullable
453        public String header(String name) {
454            Validate.notNullParam(name, "name");
455            List<String> vals = getHeadersCaseInsensitive(name);
456            if (!vals.isEmpty()) {
457                // https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2
458                return StringUtil.join(vals, ", ");
459            }
460
461            return null;
462        }
463
464        @Override
465        public T addHeader(String name, @Nullable String value) {
466            Validate.notEmptyParam(name, "name");
467            value = value == null ? "" : value;
468
469            List<String> values = headers(name);
470            if (values.isEmpty()) {
471                values = new ArrayList<>();
472                headers.put(name, values);
473            }
474            values.add(value);
475
476            return (T) this;
477        }
478
479        @Override
480        public List<String> headers(String name) {
481            Validate.notEmptyParam(name, "name");
482            return getHeadersCaseInsensitive(name);
483        }
484
485        @Override
486        public T header(String name, String value) {
487            Validate.notEmptyParam(name, "name");
488            removeHeader(name); // ensures we don't get an "accept-encoding" and an "Accept-Encoding"
489            addHeader(name, value);
490            return (T) this;
491        }
492
493        @Override
494        public boolean hasHeader(String name) {
495            Validate.notEmptyParam(name, "name");
496            return !getHeadersCaseInsensitive(name).isEmpty();
497        }
498
499        /**
500         * Test if the request has a header with this value (case-insensitive).
501         */
502        @Override
503        public boolean hasHeaderWithValue(String name, String value) {
504            Validate.notEmpty(name);
505            Validate.notEmpty(value);
506            List<String> values = headers(name);
507            for (String candidate : values) {
508                if (value.equalsIgnoreCase(candidate))
509                    return true;
510            }
511            return false;
512        }
513
514        @Override
515        public T removeHeader(String name) {
516            Validate.notEmptyParam(name, "name");
517            Map.Entry<String, List<String>> entry = scanHeaders(name); // remove is case-insensitive too
518            if (entry != null)
519                headers.remove(entry.getKey()); // ensures correct case
520            return (T) this;
521        }
522
523        @Override
524        public Map<String, String> headers() {
525            LinkedHashMap<String, String> map = new LinkedHashMap<>(headers.size());
526            for (Map.Entry<String, List<String>> entry : headers.entrySet()) {
527                String header = entry.getKey();
528                List<String> values = entry.getValue();
529                if (!values.isEmpty())
530                    map.put(header, values.get(0));
531            }
532            return map;
533        }
534
535        @Override
536        public Map<String, List<String>> multiHeaders() {
537            return headers;
538        }
539
540        private List<String> getHeadersCaseInsensitive(String name) {
541            Validate.notNull(name);
542
543            for (Map.Entry<String, List<String>> entry : headers.entrySet()) {
544                if (name.equalsIgnoreCase(entry.getKey()))
545                    return entry.getValue();
546            }
547
548            return Collections.emptyList();
549        }
550
551        private Map.@Nullable Entry<String, List<String>> scanHeaders(String name) {
552            String lc = lowerCase(name);
553            for (Map.Entry<String, List<String>> entry : headers.entrySet()) {
554                if (lowerCase(entry.getKey()).equals(lc))
555                    return entry;
556            }
557            return null;
558        }
559
560        @Override
561        public String cookie(String name) {
562            Validate.notEmptyParam(name, "name");
563            return cookies.get(name);
564        }
565
566        @Override
567        public T cookie(String name, String value) {
568            Validate.notEmptyParam(name, "name");
569            Validate.notNullParam(value, "value");
570            cookies.put(name, value);
571            return (T) this;
572        }
573
574        @Override
575        public boolean hasCookie(String name) {
576            Validate.notEmptyParam(name, "name");
577            return cookies.containsKey(name);
578        }
579
580        @Override
581        public T removeCookie(String name) {
582            Validate.notEmptyParam(name, "name");
583            cookies.remove(name);
584            return (T) this;
585        }
586
587        @Override
588        public Map<String, String> cookies() {
589            return cookies;
590        }
591    }
592
593    public static class Request extends HttpConnection.Base<Connection.Request> implements Connection.Request {
594        static {
595            System.setProperty("sun.net.http.allowRestrictedHeaders", "true");
596            // make sure that we can send Sec-Fetch-Site headers etc.
597        }
598
599        HttpConnection connection;
600        private @Nullable Proxy proxy;
601        private int timeoutMilliseconds;
602        private int maxBodySizeBytes;
603        private boolean followRedirects;
604        private final Collection<Connection.KeyVal> data;
605        private @Nullable String body = null;
606        @Nullable String mimeBoundary;
607        private boolean ignoreHttpErrors = false;
608        private boolean ignoreContentType = false;
609        private Parser parser;
610        private boolean parserDefined = false; // called parser(...) vs initialized in ctor
611        private String postDataCharset = DataUtil.defaultCharsetName;
612        private @Nullable SSLSocketFactory sslSocketFactory;
613        private CookieManager cookieManager;
614        @Nullable RequestAuthenticator authenticator;
615        private @Nullable Progress<Connection.Response> responseProgress;
616
617        private volatile boolean executing = false;
618
619        Request() {
620            super();
621            timeoutMilliseconds = 30000; // 30 seconds
622            maxBodySizeBytes = 1024 * 1024 * 2; // 2MB
623            followRedirects = true;
624            data = new ArrayList<>();
625            method = Method.GET;
626            addHeader("Accept-Encoding", "gzip");
627            addHeader(USER_AGENT, DEFAULT_UA);
628            parser = Parser.htmlParser();
629            cookieManager = new CookieManager(); // creates a default InMemoryCookieStore
630        }
631
632        Request(Request copy) {
633            super(copy);
634            connection = copy.connection;
635            proxy = copy.proxy;
636            postDataCharset = copy.postDataCharset;
637            timeoutMilliseconds = copy.timeoutMilliseconds;
638            maxBodySizeBytes = copy.maxBodySizeBytes;
639            followRedirects = copy.followRedirects;
640            data = new ArrayList<>(); // data not copied
641            //body not copied
642            ignoreHttpErrors = copy.ignoreHttpErrors;
643            ignoreContentType = copy.ignoreContentType;
644            parser = copy.parser.newInstance(); // parsers and their tree-builders maintain state, so need a fresh copy
645            parserDefined = copy.parserDefined;
646            sslSocketFactory = copy.sslSocketFactory; // these are all synchronized so safe to share
647            cookieManager = copy.cookieManager;
648            authenticator = copy.authenticator;
649            responseProgress = copy.responseProgress;
650            executing = false;
651        }
652
653        @Override @Nullable
654        public Proxy proxy() {
655            return proxy;
656        }
657
658        @Override
659        public Request proxy(@Nullable Proxy proxy) {
660            this.proxy = proxy;
661            return this;
662        }
663
664        @Override
665        public Request proxy(String host, int port) {
666            this.proxy = new Proxy(Proxy.Type.HTTP, InetSocketAddress.createUnresolved(host, port));
667            return this;
668        }
669
670        @Override
671        public int timeout() {
672            return timeoutMilliseconds;
673        }
674
675        @Override
676        public Request timeout(int millis) {
677            Validate.isTrue(millis >= 0, "Timeout milliseconds must be 0 (infinite) or greater");
678            timeoutMilliseconds = millis;
679            return this;
680        }
681
682        @Override
683        public int maxBodySize() {
684            return maxBodySizeBytes;
685        }
686
687        @Override
688        public Connection.Request maxBodySize(int bytes) {
689            Validate.isTrue(bytes >= 0, "maxSize must be 0 (unlimited) or larger");
690            maxBodySizeBytes = bytes;
691            return this;
692        }
693
694        @Override
695        public boolean followRedirects() {
696            return followRedirects;
697        }
698
699        @Override
700        public Connection.Request followRedirects(boolean followRedirects) {
701            this.followRedirects = followRedirects;
702            return this;
703        }
704
705        @Override
706        public boolean ignoreHttpErrors() {
707            return ignoreHttpErrors;
708        }
709
710        @Override @Nullable
711        public SSLSocketFactory sslSocketFactory() {
712            return sslSocketFactory;
713        }
714
715        @Override
716        public void sslSocketFactory(SSLSocketFactory sslSocketFactory) {
717            this.sslSocketFactory = sslSocketFactory;
718        }
719
720        @Override
721        public Connection.Request ignoreHttpErrors(boolean ignoreHttpErrors) {
722            this.ignoreHttpErrors = ignoreHttpErrors;
723            return this;
724        }
725
726        @Override
727        public boolean ignoreContentType() {
728            return ignoreContentType;
729        }
730
731        @Override
732        public Connection.Request ignoreContentType(boolean ignoreContentType) {
733            this.ignoreContentType = ignoreContentType;
734            return this;
735        }
736
737        @Override
738        public Request data(Connection.KeyVal keyval) {
739            Validate.notNullParam(keyval, "keyval");
740            data.add(keyval);
741            return this;
742        }
743
744        @Override
745        public Collection<Connection.KeyVal> data() {
746            return data;
747        }
748
749        @Override
750        public Connection.Request requestBody(@Nullable String body) {
751            this.body = body;
752            return this;
753        }
754
755        @Override @Nullable
756        public String requestBody() {
757            return body;
758        }
759
760        @Override
761        public Request parser(Parser parser) {
762            this.parser = parser;
763            parserDefined = true;
764            return this;
765        }
766
767        @Override
768        public Parser parser() {
769            return parser;
770        }
771
772        @Override
773        public Connection.Request postDataCharset(String charset) {
774            Validate.notNullParam(charset, "charset");
775            if (!Charset.isSupported(charset)) throw new IllegalCharsetNameException(charset);
776            this.postDataCharset = charset;
777            return this;
778        }
779
780        @Override
781        public String postDataCharset() {
782            return postDataCharset;
783        }
784
785        CookieManager cookieManager() {
786            return cookieManager;
787        }
788
789        @Override public Connection.Request auth(@Nullable RequestAuthenticator authenticator) {
790            this.authenticator = authenticator;
791            return this;
792        }
793
794        @Override @Nullable public RequestAuthenticator auth() {
795            return authenticator;
796        }
797    }
798
799    public static class Response extends HttpConnection.Base<Connection.Response> implements Connection.Response {
800        private static final int MAX_REDIRECTS = 20;
801        private static final String LOCATION = "Location";
802        int statusCode;
803        @Nullable String statusMessage;
804        private @Nullable ByteBuffer byteData;
805        private @Nullable ControllableInputStream bodyStream;
806        @Nullable RequestExecutor executor;
807        private @Nullable String charset;
808        @Nullable String contentType;
809        int contentLength;
810        private boolean executed = false;
811        private boolean inputStreamRead = false;
812        private int numRedirects = 0;
813        private final HttpConnection.Request req;
814
815        /*
816         * Matches XML content types (like text/xml, image/svg+xml, application/xhtml+xml;charset=UTF8, etc)
817         */
818        private static final Pattern xmlContentTypeRxp = Pattern.compile("(\\w+)/\\w*\\+?xml.*");
819
820        /**
821         <b>Internal only! </b>Creates a dummy HttpConnection.Response, useful for testing. All actual responses
822         are created from the HttpURLConnection and fields defined.
823         */
824        Response() {
825            super();
826            statusCode = 400;
827            statusMessage = "Request not made";
828            req = new Request();
829            contentType = null;
830        }
831
832        static Response execute(HttpConnection.Request req) throws IOException {
833            return execute(req, null);
834        }
835
836        static Response execute(HttpConnection.Request req, @Nullable Response prevRes) throws IOException {
837            synchronized (req) {
838                Validate.isFalse(req.executing, "Multiple threads were detected trying to execute the same request concurrently. Make sure to use Connection#newRequest() and do not share an executing request between threads.");
839                req.executing = true;
840            }
841            Validate.notNullParam(req, "req");
842            URL url = req.url();
843            Validate.notNull(url, "URL must be specified to connect");
844            String protocol = url.getProtocol();
845            if (!protocol.equals("http") && !protocol.equals("https"))
846                throw new MalformedURLException("Only http & https protocols supported");
847            final boolean supportsBody = req.method().hasBody();
848            final boolean hasBody = req.requestBody() != null;
849            if (!supportsBody)
850                Validate.isFalse(hasBody, "Cannot set a request body for HTTP method " + req.method());
851
852            // set up the request for execution
853            if (!req.data().isEmpty() && (!supportsBody || hasBody))
854                serialiseRequestUrl(req);
855            else if (supportsBody)
856                setOutputContentType(req);
857
858            long startTime = System.nanoTime();
859            RequestExecutor executor = RequestDispatch.get(req, prevRes);
860            Response res = null;
861            try {
862                res = executor.execute();
863
864                // redirect if there's a location header (from 3xx, or 201 etc)
865                if (res.hasHeader(LOCATION) && req.followRedirects()) {
866                    if (res.statusCode != HTTP_TEMP_REDIR) {
867                        req.method(Method.GET); // always redirect with a get. any data param from original req are dropped.
868                        req.data().clear();
869                        req.requestBody(null);
870                        req.removeHeader(CONTENT_TYPE);
871                    }
872
873                    String location = res.header(LOCATION);
874                    Validate.notNull(location);
875                    if (location.startsWith("http:/") && location.charAt(6) != '/') // fix broken Location: http:/temp/AAG_New/en/index.php
876                        location = location.substring(6);
877                    URL redir = StringUtil.resolve(req.url(), location);
878                    req.url(redir);
879
880                    req.executing = false;
881                    return execute(req, res);
882                }
883                if ((res.statusCode < 200 || res.statusCode >= 400) && !req.ignoreHttpErrors())
884                        throw new HttpStatusException("HTTP error fetching URL", res.statusCode, req.url().toString());
885
886                // check that we can handle the returned content type; if not, abort before fetching it
887                String contentType = res.contentType();
888                if (contentType != null
889                        && !req.ignoreContentType()
890                        && !contentType.startsWith("text/")
891                        && !xmlContentTypeRxp.matcher(contentType).matches()
892                        )
893                    throw new UnsupportedMimeTypeException("Unhandled content type. Must be text/*, */xml, or */*+xml",
894                            contentType, req.url().toString());
895
896                // switch to the XML parser if content type is xml and not parser not explicitly set
897                if (contentType != null && xmlContentTypeRxp.matcher(contentType).matches()) {
898                    if (!req.parserDefined) req.parser(Parser.xmlParser());
899                }
900
901                res.charset = DataUtil.getCharsetFromContentType(res.contentType); // may be null, readInputStream deals with it
902                if (res.contentLength != 0 && req.method() != HEAD) { // -1 means unknown, chunked. sun throws an IO exception on 500 response with no content when trying to read body
903                    InputStream stream = executor.responseBody();
904                    if (res.hasHeaderWithValue(CONTENT_ENCODING, "gzip"))
905                        stream = new GZIPInputStream(stream);
906                    else if (res.hasHeaderWithValue(CONTENT_ENCODING, "deflate"))
907                        stream = new InflaterInputStream(stream, new Inflater(true));
908                    
909                    res.bodyStream = ControllableInputStream.wrap(
910                        stream, DefaultBufferSize, req.maxBodySize())
911                        .timeout(startTime, req.timeout());
912
913                    if (req.responseProgress != null) // set response progress listener
914                        res.bodyStream.onProgress(res.contentLength, req.responseProgress, res);
915                } else {
916                    res.byteData = DataUtil.emptyByteBuffer();
917                }
918            } catch (IOException e) {
919                if (res != null) res.safeClose(); // will be non-null if got to conn
920                throw e;
921            } finally {
922                req.executing = false;
923
924                // detach any thread local auth delegate
925                if (req.authenticator != null)
926                    AuthenticationHandler.handler.remove();
927            }
928
929            res.executed = true;
930            return res;
931        }
932
933        @Override
934        public int statusCode() {
935            return statusCode;
936        }
937
938        @Override
939        public String statusMessage() {
940            return statusMessage;
941        }
942
943        @Override @Nullable
944        public String charset() {
945            return charset;
946        }
947
948        @Override
949        public Response charset(String charset) {
950            this.charset = charset;
951            return this;
952        }
953
954        @Override @Nullable
955        public String contentType() {
956            return contentType;
957        }
958
959        /** Called from parse() or streamParser(), validates and prepares the input stream, and aligns common settings. */
960        private ControllableInputStream prepareParse() {
961            Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before parsing response");
962            ControllableInputStream stream = bodyStream;
963            if (byteData != null) { // bytes have been read in to the buffer, parse that
964                ByteArrayInputStream bytes = new ByteArrayInputStream(byteData.array(), 0, byteData.limit());
965                stream = ControllableInputStream.wrap(bytes, 0); // no max
966                inputStreamRead = false; // ok to reparse if in bytes
967            }
968            Validate.isFalse(inputStreamRead, "Input stream already read and parsed, cannot re-read.");
969            Validate.notNull(stream);
970            inputStreamRead = true;
971            return stream;
972        }
973
974        @Override public Document parse() throws IOException {
975            ControllableInputStream stream = prepareParse();
976            Document doc = DataUtil.parseInputStream(stream, charset, url.toExternalForm(), req.parser());
977            doc.connection(new HttpConnection(req, this)); // because we're static, don't have the connection obj. // todo - maybe hold in the req?
978            charset = doc.outputSettings().charset().name(); // update charset from meta-equiv, possibly
979            safeClose();
980            return doc;
981        }
982
983        @Override public StreamParser streamParser() throws IOException {
984            ControllableInputStream stream = prepareParse();
985            String baseUri = url.toExternalForm();
986            DataUtil.CharsetDoc charsetDoc = DataUtil.detectCharset(stream, charset, baseUri, req.parser());
987            // note that there may be a document in CharsetDoc as a result of scanning meta-data -- but as requires a stream parse, it is not used here. todo - revisit.
988
989            // set up the stream parser and rig this connection up to the parsed doc:
990            StreamParser streamer = new StreamParser(req.parser());
991            BufferedReader reader = new BufferedReader(new InputStreamReader(stream, charsetDoc.charset));
992            streamer.parse(reader, baseUri); // initializes the parse and the document, but does not step() it
993            streamer.document().connection(new HttpConnection(req, this));
994            charset = charsetDoc.charset.name();
995
996            // we don't safeClose() as in parse(); caller must close streamParser to close InputStream stream
997            return streamer;
998        }
999
1000        private void prepareByteData() {
1001            Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body");
1002            if (bodyStream != null && byteData == null) {
1003                Validate.isFalse(inputStreamRead, "Request has already been read (with .parse())");
1004                try {
1005                    byteData = DataUtil.readToByteBuffer(bodyStream, req.maxBodySize());
1006                } catch (IOException e) {
1007                    throw new UncheckedIOException(e);
1008                } finally {
1009                    inputStreamRead = true;
1010                    safeClose();
1011                }
1012            }
1013        }
1014
1015        @Override
1016        public String body() {
1017            prepareByteData();
1018            Validate.notNull(byteData);
1019            // charset gets set from header on execute, and from meta-equiv on parse. parse may not have happened yet
1020            String body = (charset == null ? UTF_8 : Charset.forName(charset))
1021                .decode(byteData).toString();
1022            ((Buffer)byteData).rewind(); // cast to avoid covariant return type change in jdk9
1023            return body;
1024        }
1025
1026        @Override
1027        public byte[] bodyAsBytes() {
1028            prepareByteData();
1029            Validate.notNull(byteData);
1030            Validate.isTrue(byteData.hasArray()); // we made it, so it should
1031
1032            byte[] array = byteData.array();
1033            int offset = byteData.arrayOffset();
1034            int length = byteData.limit();
1035
1036            if (offset == 0 && length == array.length) { // exact, just return it
1037                return array;
1038            } else { // trim to size
1039                byte[] exactArray = new byte[length];
1040                System.arraycopy(array, offset, exactArray, 0, length);
1041                return exactArray;
1042            }
1043        }
1044
1045        @Override
1046        public Connection.Response bufferUp() {
1047            prepareByteData();
1048            return this;
1049        }
1050
1051        @Override
1052        public BufferedInputStream bodyStream() {
1053            Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body");
1054
1055            // if we have read to bytes (via buffer up), return those as a stream.
1056            if (byteData != null) {
1057                return new BufferedInputStream(
1058                    new ByteArrayInputStream(byteData.array(), 0, byteData.limit()),
1059                    DefaultBufferSize);
1060            }
1061
1062            Validate.isFalse(inputStreamRead, "Request has already been read");
1063            Validate.notNull(bodyStream);
1064            inputStreamRead = true;
1065            return bodyStream.inputStream();
1066        }
1067
1068        /**
1069         * Call on completion of stream read, to close the body (or error) stream. The connection.disconnect allows
1070         * keep-alives to work (as the underlying connection is actually held open, despite the name).
1071         */
1072        private void safeClose() {
1073            if (bodyStream != null) {
1074                try {
1075                    bodyStream.close();
1076                } catch (IOException e) {
1077                    // no-op
1078                } finally {
1079                    bodyStream = null;
1080                }
1081            }
1082
1083            if (executor != null) executor.safeClose(); // disconnect
1084        }
1085
1086        Response(HttpConnection.Request request) {
1087            this.req = request;
1088        }
1089
1090        // set up url, method, header, cookies
1091        void prepareResponse(Map<String, List<String>> resHeaders, HttpConnection.@Nullable Response previousResponse) throws IOException {
1092            processResponseHeaders(resHeaders); // includes cookie key/val read during header scan
1093            CookieUtil.storeCookies(req, this, url, resHeaders); // add set cookies to cookie store
1094
1095            if (previousResponse != null) { // was redirected
1096                // map previous response cookies into this response cookies() object
1097                for (Map.Entry<String, String> prevCookie : previousResponse.cookies().entrySet()) {
1098                    if (!hasCookie(prevCookie.getKey()))
1099                        cookie(prevCookie.getKey(), prevCookie.getValue());
1100                }
1101                previousResponse.safeClose();
1102
1103                // enforce too many redirects:
1104                numRedirects = previousResponse.numRedirects + 1;
1105                if (numRedirects >= MAX_REDIRECTS)
1106                    throw new IOException(String.format("Too many redirects occurred trying to load URL %s", previousResponse.url()));
1107            }
1108        }
1109
1110        void processResponseHeaders(Map<String, List<String>> resHeaders) {
1111            for (Map.Entry<String, List<String>> entry : resHeaders.entrySet()) {
1112                String name = entry.getKey();
1113                if (name == null)
1114                    continue; // http/1.1 line
1115
1116                List<String> values = entry.getValue();
1117                for (String value : values) {
1118                    addHeader(name, fixHeaderEncoding(value));
1119                }
1120            }
1121        }
1122
1123        /**
1124         Servers may encode response headers in UTF-8 instead of RFC defined 8859. This method attempts to detect that
1125         and re-decode the string as UTF-8.
1126         * @param val a header value string that may have been incorrectly decoded as 8859.
1127         * @return a potentially re-decoded string.
1128         */
1129        @Nullable
1130        private static String fixHeaderEncoding(@Nullable String val) {
1131            if (val == null) return val;
1132            byte[] bytes = val.getBytes(ISO_8859_1);
1133            if (looksLikeUtf8(bytes))
1134                return new String(bytes, UTF_8);
1135            else
1136                return val;
1137        }
1138
1139        private static boolean looksLikeUtf8(byte[] input) {
1140            int i = 0;
1141            // BOM:
1142            if (input.length >= 3
1143                && (input[0] & 0xFF) == 0xEF
1144                && (input[1] & 0xFF) == 0xBB
1145                && (input[2] & 0xFF) == 0xBF) {
1146                i = 3;
1147            }
1148
1149            int end;
1150            boolean foundNonAscii = false;
1151            for (int j = input.length; i < j; ++i) {
1152                int o = input[i];
1153                if ((o & 0x80) == 0) {
1154                    continue; // ASCII
1155                }
1156                foundNonAscii = true;
1157
1158                // UTF-8 leading:
1159                if ((o & 0xE0) == 0xC0) {
1160                    end = i + 1;
1161                } else if ((o & 0xF0) == 0xE0) {
1162                    end = i + 2;
1163                } else if ((o & 0xF8) == 0xF0) {
1164                    end = i + 3;
1165                } else {
1166                    return false;
1167                }
1168
1169                if (end >= input.length)
1170                    return false;
1171
1172                while (i < end) {
1173                    i++;
1174                    o = input[i];
1175                    if ((o & 0xC0) != 0x80) {
1176                        return false;
1177                    }
1178                }
1179            }
1180            return foundNonAscii;
1181        }
1182
1183        private static void setOutputContentType(final HttpConnection.Request req) {
1184            final String contentType = req.header(CONTENT_TYPE);
1185            String bound = null;
1186            if (contentType != null) {
1187                // no-op; don't add content type as already set (e.g. for requestBody())
1188                // todo - if content type already set, we could add charset
1189
1190                // if user has set content type to multipart/form-data, auto add boundary.
1191                if(contentType.contains(MULTIPART_FORM_DATA) && !contentType.contains("boundary")) {
1192                    bound = DataUtil.mimeBoundary();
1193                    req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound);
1194                }
1195
1196            }
1197            else if (needsMultipart(req)) {
1198                bound = DataUtil.mimeBoundary();
1199                req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound);
1200            } else {
1201                req.header(CONTENT_TYPE, FORM_URL_ENCODED + "; charset=" + req.postDataCharset());
1202            }
1203            req.mimeBoundary = bound;
1204        }
1205
1206        static void writePost(final HttpConnection.Request req, final OutputStream outputStream) throws IOException {
1207            final Collection<Connection.KeyVal> data = req.data();
1208            final BufferedWriter w = new BufferedWriter(new OutputStreamWriter(outputStream, Charset.forName(req.postDataCharset())));
1209            final String boundary = req.mimeBoundary;
1210
1211            if (boundary != null) {
1212                // boundary will be set if we're in multipart mode
1213                for (Connection.KeyVal keyVal : data) {
1214                    w.write("--");
1215                    w.write(boundary);
1216                    w.write("\r\n");
1217                    w.write("Content-Disposition: form-data; name=\"");
1218                    w.write(encodeMimeName(keyVal.key())); // encodes " to %22
1219                    w.write("\"");
1220                    final InputStream input = keyVal.inputStream();
1221                    if (input != null) {
1222                        w.write("; filename=\"");
1223                        w.write(encodeMimeName(keyVal.value()));
1224                        w.write("\"\r\nContent-Type: ");
1225                        String contentType = keyVal.contentType();
1226                        w.write(contentType != null ? contentType : DefaultUploadType);
1227                        w.write("\r\n\r\n");
1228                        w.flush(); // flush
1229                        DataUtil.crossStreams(input, outputStream);
1230                        outputStream.flush();
1231                    } else {
1232                        w.write("\r\n\r\n");
1233                        w.write(keyVal.value());
1234                    }
1235                    w.write("\r\n");
1236                }
1237                w.write("--");
1238                w.write(boundary);
1239                w.write("--");
1240            } else {
1241                String body = req.requestBody();
1242                if (body != null) {
1243                    // data will be in query string, we're sending a plaintext body
1244                    w.write(body);
1245                }
1246                else {
1247                    // regular form data (application/x-www-form-urlencoded)
1248                    boolean first = true;
1249                    for (Connection.KeyVal keyVal : data) {
1250                        if (!first)
1251                            w.append('&');
1252                        else
1253                            first = false;
1254
1255                        w.write(URLEncoder.encode(keyVal.key(), req.postDataCharset()));
1256                        w.write('=');
1257                        w.write(URLEncoder.encode(keyVal.value(), req.postDataCharset()));
1258                    }
1259                }
1260            }
1261            w.close();
1262        }
1263
1264        // for get url reqs, serialise the data map into the url
1265        private static void serialiseRequestUrl(Connection.Request req) throws IOException {
1266            UrlBuilder in = new UrlBuilder(req.url());
1267
1268            for (Connection.KeyVal keyVal : req.data()) {
1269                Validate.isFalse(keyVal.hasInputStream(), "InputStream data not supported in URL query string.");
1270                in.appendKeyVal(keyVal);
1271            }
1272            req.url(in.build());
1273            req.data().clear(); // moved into url as get params
1274        }
1275    }
1276
1277    private static boolean needsMultipart(Connection.Request req) {
1278        // multipart mode, for files. add the header if we see something with an inputstream, and return a non-null boundary
1279        for (Connection.KeyVal keyVal : req.data()) {
1280            if (keyVal.hasInputStream())
1281                return true;
1282        }
1283        return false;
1284    }
1285
1286    public static class KeyVal implements Connection.KeyVal {
1287        private String key;
1288        private String value;
1289        private @Nullable InputStream stream;
1290        private @Nullable String contentType;
1291
1292        public static KeyVal create(String key, String value) {
1293            return new KeyVal(key, value);
1294        }
1295
1296        public static KeyVal create(String key, String filename, InputStream stream) {
1297            return new KeyVal(key, filename)
1298                .inputStream(stream);
1299        }
1300
1301        private KeyVal(String key, String value) {
1302            Validate.notEmptyParam(key, "key");
1303            Validate.notNullParam(value, "value");
1304            this.key = key;
1305            this.value = value;
1306        }
1307
1308        @Override
1309        public KeyVal key(String key) {
1310            Validate.notEmptyParam(key, "key");
1311            this.key = key;
1312            return this;
1313        }
1314
1315        @Override
1316        public String key() {
1317            return key;
1318        }
1319
1320        @Override
1321        public KeyVal value(String value) {
1322            Validate.notNullParam(value, "value");
1323            this.value = value;
1324            return this;
1325        }
1326
1327        @Override
1328        public String value() {
1329            return value;
1330        }
1331
1332        @Override
1333        public KeyVal inputStream(InputStream inputStream) {
1334            Validate.notNullParam(value, "inputStream");
1335            this.stream = inputStream;
1336            return this;
1337        }
1338
1339        @Override @Nullable
1340        public InputStream inputStream() {
1341            return stream;
1342        }
1343
1344        @Override
1345        public boolean hasInputStream() {
1346            return stream != null;
1347        }
1348
1349        @Override
1350        public Connection.KeyVal contentType(String contentType) {
1351            Validate.notEmpty(contentType);
1352            this.contentType = contentType;
1353            return this;
1354        }
1355
1356        @Override @Nullable
1357        public String contentType() {
1358            return contentType;
1359        }
1360
1361        @Override
1362        public String toString() {
1363            return key + "=" + value;
1364        }
1365    }
1366}