001package org.jsoup.helper; 002 003import org.jsoup.Connection; 004import org.jsoup.HttpStatusException; 005import org.jsoup.Progress; 006import org.jsoup.UnsupportedMimeTypeException; 007import org.jsoup.internal.ControllableInputStream; 008import org.jsoup.internal.StringUtil; 009import org.jsoup.nodes.Document; 010import org.jsoup.parser.Parser; 011import org.jsoup.parser.StreamParser; 012import org.jspecify.annotations.Nullable; 013 014import javax.net.ssl.SSLSocketFactory; 015import java.io.BufferedInputStream; 016import java.io.BufferedReader; 017import java.io.BufferedWriter; 018import java.io.ByteArrayInputStream; 019import java.io.IOException; 020import java.io.InputStream; 021import java.io.InputStreamReader; 022import java.io.OutputStream; 023import java.io.OutputStreamWriter; 024import java.io.UncheckedIOException; 025import java.net.CookieManager; 026import java.net.CookieStore; 027import java.net.InetSocketAddress; 028import java.net.MalformedURLException; 029import java.net.Proxy; 030import java.net.URL; 031import java.net.URLEncoder; 032import java.nio.Buffer; 033import java.nio.ByteBuffer; 034import java.nio.charset.Charset; 035import java.nio.charset.IllegalCharsetNameException; 036import java.util.ArrayList; 037import java.util.Collection; 038import java.util.Collections; 039import java.util.LinkedHashMap; 040import java.util.List; 041import java.util.Map; 042import java.util.regex.Pattern; 043import java.util.zip.GZIPInputStream; 044import java.util.zip.Inflater; 045import java.util.zip.InflaterInputStream; 046 047import static org.jsoup.Connection.Method.HEAD; 048import static org.jsoup.helper.DataUtil.UTF_8; 049import static org.jsoup.internal.Normalizer.lowerCase; 050import static org.jsoup.internal.SharedConstants.DefaultBufferSize; 051 052/** 053 * Implementation of {@link Connection}. 054 * @see org.jsoup.Jsoup#connect(String) 055 */ 056@SuppressWarnings("CharsetObjectCanBeUsed") 057public class HttpConnection implements Connection { 058 public static final String CONTENT_ENCODING = "Content-Encoding"; 059 /** 060 * Many users would get caught by not setting a user-agent and therefore getting different responses on their desktop 061 * vs in jsoup, which would otherwise default to {@code Java}. So by default, use a desktop UA. 062 */ 063 public static final String DEFAULT_UA = 064 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"; 065 private static final String USER_AGENT = "User-Agent"; 066 public static final String CONTENT_TYPE = "Content-Type"; 067 public static final String MULTIPART_FORM_DATA = "multipart/form-data"; 068 public static final String FORM_URL_ENCODED = "application/x-www-form-urlencoded"; 069 private static final int HTTP_TEMP_REDIR = 307; // http/1.1 temporary redirect, not in Java's set. 070 static final String DefaultUploadType = "application/octet-stream"; 071 private static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1"); 072 073 private HttpConnection.Request req; 074 private Connection.@Nullable Response res; 075 @Nullable Object client; // The HttpClient for this Connection, if via the HttpClientExecutor 076 @Nullable RequestAuthenticator lastAuth; // The previous Authenticator used by this Connection, if via the HttpClientExecutor 077 078 /** 079 Create a new Connection, with the request URL specified. 080 @param url the URL to fetch from 081 @return a new Connection object 082 */ 083 public static Connection connect(String url) { 084 Connection con = new HttpConnection(); 085 con.url(url); 086 return con; 087 } 088 089 /** 090 Create a new Connection, with the request URL specified. 091 @param url the URL to fetch from 092 @return a new Connection object 093 */ 094 public static Connection connect(URL url) { 095 Connection con = new HttpConnection(); 096 con.url(url); 097 return con; 098 } 099 100 /** 101 Create a new, empty HttpConnection. 102 */ 103 public HttpConnection() { 104 req = new Request(); 105 req.connection = this; 106 } 107 108 /** 109 Create a new Request by deep-copying an existing Request. Note that the data and body of the original are not 110 copied. All other settings (proxy, parser, cookies, etc) are copied. 111 @param copy the request to copy 112 */ 113 HttpConnection(Request copy) { 114 req = new Request(copy); 115 } 116 117 static String encodeMimeName(String val) { 118 return val.replace("\"", "%22"); 119 } 120 121 @Override 122 public Connection newRequest() { 123 // copy the prototype request for the different settings, cookie manager, etc 124 return new HttpConnection(req); 125 } 126 127 /** Create a new Connection that just wraps the provided Request and Response */ 128 private HttpConnection(Request req, Response res) { 129 this.req = req; 130 this.res = res; 131 } 132 133 @Override 134 public Connection url(URL url) { 135 req.url(url); 136 return this; 137 } 138 139 @Override 140 public Connection url(String url) { 141 Validate.notEmptyParam(url, "url"); 142 try { 143 req.url(new URL(url)); 144 } catch (MalformedURLException e) { 145 throw new IllegalArgumentException(String.format("The supplied URL, '%s', is malformed. Make sure it is an absolute URL, and starts with 'http://' or 'https://'. See https://jsoup.org/cookbook/extracting-data/working-with-urls", url), e); 146 } 147 return this; 148 } 149 150 @Override 151 public Connection proxy(@Nullable Proxy proxy) { 152 req.proxy(proxy); 153 return this; 154 } 155 156 @Override 157 public Connection proxy(String host, int port) { 158 req.proxy(host, port); 159 return this; 160 } 161 162 @Override 163 public Connection userAgent(String userAgent) { 164 Validate.notNullParam(userAgent, "userAgent"); 165 req.header(USER_AGENT, userAgent); 166 return this; 167 } 168 169 @Override 170 public Connection timeout(int millis) { 171 req.timeout(millis); 172 return this; 173 } 174 175 @Override 176 public Connection maxBodySize(int bytes) { 177 req.maxBodySize(bytes); 178 return this; 179 } 180 181 @Override 182 public Connection followRedirects(boolean followRedirects) { 183 req.followRedirects(followRedirects); 184 return this; 185 } 186 187 @Override 188 public Connection referrer(String referrer) { 189 Validate.notNullParam(referrer, "referrer"); 190 req.header("Referer", referrer); 191 return this; 192 } 193 194 @Override 195 public Connection method(Method method) { 196 req.method(method); 197 return this; 198 } 199 200 @Override 201 public Connection ignoreHttpErrors(boolean ignoreHttpErrors) { 202 req.ignoreHttpErrors(ignoreHttpErrors); 203 return this; 204 } 205 206 @Override 207 public Connection ignoreContentType(boolean ignoreContentType) { 208 req.ignoreContentType(ignoreContentType); 209 return this; 210 } 211 212 @Override 213 public Connection data(String key, String value) { 214 req.data(KeyVal.create(key, value)); 215 return this; 216 } 217 218 @Override 219 public Connection sslSocketFactory(SSLSocketFactory sslSocketFactory) { 220 req.sslSocketFactory(sslSocketFactory); 221 return this; 222 } 223 224 @Override 225 public Connection data(String key, String filename, InputStream inputStream) { 226 req.data(KeyVal.create(key, filename, inputStream)); 227 return this; 228 } 229 230 @Override 231 public Connection data(String key, String filename, InputStream inputStream, String contentType) { 232 req.data(KeyVal.create(key, filename, inputStream).contentType(contentType)); 233 return this; 234 } 235 236 @Override 237 public Connection data(Map<String, String> data) { 238 Validate.notNullParam(data, "data"); 239 for (Map.Entry<String, String> entry : data.entrySet()) { 240 req.data(KeyVal.create(entry.getKey(), entry.getValue())); 241 } 242 return this; 243 } 244 245 @Override 246 public Connection data(String... keyvals) { 247 Validate.notNullParam(keyvals, "keyvals"); 248 Validate.isTrue(keyvals.length %2 == 0, "Must supply an even number of key value pairs"); 249 for (int i = 0; i < keyvals.length; i += 2) { 250 String key = keyvals[i]; 251 String value = keyvals[i+1]; 252 Validate.notEmpty(key, "Data key must not be empty"); 253 Validate.notNull(value, "Data value must not be null"); 254 req.data(KeyVal.create(key, value)); 255 } 256 return this; 257 } 258 259 @Override 260 public Connection data(Collection<Connection.KeyVal> data) { 261 Validate.notNullParam(data, "data"); 262 for (Connection.KeyVal entry: data) { 263 req.data(entry); 264 } 265 return this; 266 } 267 268 @Override 269 public Connection.@Nullable KeyVal data(String key) { 270 Validate.notEmptyParam(key, "key"); 271 for (Connection.KeyVal keyVal : request().data()) { 272 if (keyVal.key().equals(key)) 273 return keyVal; 274 } 275 return null; 276 } 277 278 @Override 279 public Connection requestBody(String body) { 280 req.requestBody(body); 281 return this; 282 } 283 284 @Override 285 public Connection header(String name, String value) { 286 req.header(name, value); 287 return this; 288 } 289 290 @Override 291 public Connection headers(Map<String,String> headers) { 292 Validate.notNullParam(headers, "headers"); 293 for (Map.Entry<String,String> entry : headers.entrySet()) { 294 req.header(entry.getKey(),entry.getValue()); 295 } 296 return this; 297 } 298 299 @Override 300 public Connection cookie(String name, String value) { 301 req.cookie(name, value); 302 return this; 303 } 304 305 @Override 306 public Connection cookies(Map<String, String> cookies) { 307 Validate.notNullParam(cookies, "cookies"); 308 for (Map.Entry<String, String> entry : cookies.entrySet()) { 309 req.cookie(entry.getKey(), entry.getValue()); 310 } 311 return this; 312 } 313 314 @Override 315 public Connection cookieStore(CookieStore cookieStore) { 316 // create a new cookie manager using the new store 317 req.cookieManager = new CookieManager(cookieStore, null); 318 return this; 319 } 320 321 @Override 322 public CookieStore cookieStore() { 323 return req.cookieManager.getCookieStore(); 324 } 325 326 @Override 327 public Connection parser(Parser parser) { 328 req.parser(parser); 329 return this; 330 } 331 332 @Override 333 public Document get() throws IOException { 334 req.method(Method.GET); 335 execute(); 336 Validate.notNull(res); 337 return res.parse(); 338 } 339 340 @Override 341 public Document post() throws IOException { 342 req.method(Method.POST); 343 execute(); 344 Validate.notNull(res); 345 return res.parse(); 346 } 347 348 @Override 349 public Connection.Response execute() throws IOException { 350 res = Response.execute(req); 351 return res; 352 } 353 354 @Override 355 public Connection.Request request() { 356 return req; 357 } 358 359 @Override 360 public Connection request(Connection.Request request) { 361 req = (HttpConnection.Request) request; // will throw a class-cast exception if the user has extended some but not all of Connection; that's desired 362 return this; 363 } 364 365 @Override 366 public Connection.Response response() { 367 if (res == null) { 368 throw new IllegalArgumentException("You must execute the request before getting a response."); 369 } 370 return res; 371 } 372 373 @Override 374 public Connection response(Connection.Response response) { 375 res = response; 376 return this; 377 } 378 379 @Override 380 public Connection postDataCharset(String charset) { 381 req.postDataCharset(charset); 382 return this; 383 } 384 385 @Override public Connection auth(@Nullable RequestAuthenticator authenticator) { 386 req.auth(authenticator); 387 return this; 388 } 389 390 @Override public Connection onResponseProgress(Progress<Connection.Response> handler) { 391 req.responseProgress = handler; 392 return this; 393 } 394 395 @SuppressWarnings("unchecked") 396 private static abstract class Base<T extends Connection.Base<T>> implements Connection.Base<T> { 397 private static final URL UnsetUrl; // only used if you created a new Request() 398 static { 399 try { 400 UnsetUrl = new URL("http://undefined/"); 401 } catch (MalformedURLException e) { 402 throw new IllegalStateException(e); 403 } 404 } 405 406 URL url = UnsetUrl; 407 Method method = Method.GET; 408 Map<String, List<String>> headers; 409 Map<String, String> cookies; 410 411 private Base() { 412 headers = new LinkedHashMap<>(); 413 cookies = new LinkedHashMap<>(); 414 } 415 416 private Base(Base<T> copy) { 417 url = copy.url; // unmodifiable object 418 method = copy.method; 419 headers = new LinkedHashMap<>(); 420 for (Map.Entry<String, List<String>> entry : copy.headers.entrySet()) { 421 headers.put(entry.getKey(), new ArrayList<>(entry.getValue())); 422 } 423 cookies = new LinkedHashMap<>(); cookies.putAll(copy.cookies); // just holds strings 424 } 425 426 @Override 427 public URL url() { 428 if (url == UnsetUrl) 429 throw new IllegalArgumentException("URL not set. Make sure to call #url(...) before executing the request."); 430 return url; 431 } 432 433 @Override 434 public T url(URL url) { 435 Validate.notNullParam(url, "url"); 436 this.url = new UrlBuilder(url).build(); 437 return (T) this; 438 } 439 440 @Override 441 public Method method() { 442 return method; 443 } 444 445 @Override 446 public T method(Method method) { 447 Validate.notNullParam(method, "method"); 448 this.method = method; 449 return (T) this; 450 } 451 452 @Override @Nullable 453 public String header(String name) { 454 Validate.notNullParam(name, "name"); 455 List<String> vals = getHeadersCaseInsensitive(name); 456 if (!vals.isEmpty()) { 457 // https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 458 return StringUtil.join(vals, ", "); 459 } 460 461 return null; 462 } 463 464 @Override 465 public T addHeader(String name, @Nullable String value) { 466 Validate.notEmptyParam(name, "name"); 467 value = value == null ? "" : value; 468 469 List<String> values = headers(name); 470 if (values.isEmpty()) { 471 values = new ArrayList<>(); 472 headers.put(name, values); 473 } 474 values.add(value); 475 476 return (T) this; 477 } 478 479 @Override 480 public List<String> headers(String name) { 481 Validate.notEmptyParam(name, "name"); 482 return getHeadersCaseInsensitive(name); 483 } 484 485 @Override 486 public T header(String name, String value) { 487 Validate.notEmptyParam(name, "name"); 488 removeHeader(name); // ensures we don't get an "accept-encoding" and an "Accept-Encoding" 489 addHeader(name, value); 490 return (T) this; 491 } 492 493 @Override 494 public boolean hasHeader(String name) { 495 Validate.notEmptyParam(name, "name"); 496 return !getHeadersCaseInsensitive(name).isEmpty(); 497 } 498 499 /** 500 * Test if the request has a header with this value (case-insensitive). 501 */ 502 @Override 503 public boolean hasHeaderWithValue(String name, String value) { 504 Validate.notEmpty(name); 505 Validate.notEmpty(value); 506 List<String> values = headers(name); 507 for (String candidate : values) { 508 if (value.equalsIgnoreCase(candidate)) 509 return true; 510 } 511 return false; 512 } 513 514 @Override 515 public T removeHeader(String name) { 516 Validate.notEmptyParam(name, "name"); 517 Map.Entry<String, List<String>> entry = scanHeaders(name); // remove is case-insensitive too 518 if (entry != null) 519 headers.remove(entry.getKey()); // ensures correct case 520 return (T) this; 521 } 522 523 @Override 524 public Map<String, String> headers() { 525 LinkedHashMap<String, String> map = new LinkedHashMap<>(headers.size()); 526 for (Map.Entry<String, List<String>> entry : headers.entrySet()) { 527 String header = entry.getKey(); 528 List<String> values = entry.getValue(); 529 if (!values.isEmpty()) 530 map.put(header, values.get(0)); 531 } 532 return map; 533 } 534 535 @Override 536 public Map<String, List<String>> multiHeaders() { 537 return headers; 538 } 539 540 private List<String> getHeadersCaseInsensitive(String name) { 541 Validate.notNull(name); 542 543 for (Map.Entry<String, List<String>> entry : headers.entrySet()) { 544 if (name.equalsIgnoreCase(entry.getKey())) 545 return entry.getValue(); 546 } 547 548 return Collections.emptyList(); 549 } 550 551 private Map.@Nullable Entry<String, List<String>> scanHeaders(String name) { 552 String lc = lowerCase(name); 553 for (Map.Entry<String, List<String>> entry : headers.entrySet()) { 554 if (lowerCase(entry.getKey()).equals(lc)) 555 return entry; 556 } 557 return null; 558 } 559 560 @Override 561 public String cookie(String name) { 562 Validate.notEmptyParam(name, "name"); 563 return cookies.get(name); 564 } 565 566 @Override 567 public T cookie(String name, String value) { 568 Validate.notEmptyParam(name, "name"); 569 Validate.notNullParam(value, "value"); 570 cookies.put(name, value); 571 return (T) this; 572 } 573 574 @Override 575 public boolean hasCookie(String name) { 576 Validate.notEmptyParam(name, "name"); 577 return cookies.containsKey(name); 578 } 579 580 @Override 581 public T removeCookie(String name) { 582 Validate.notEmptyParam(name, "name"); 583 cookies.remove(name); 584 return (T) this; 585 } 586 587 @Override 588 public Map<String, String> cookies() { 589 return cookies; 590 } 591 } 592 593 public static class Request extends HttpConnection.Base<Connection.Request> implements Connection.Request { 594 static { 595 System.setProperty("sun.net.http.allowRestrictedHeaders", "true"); 596 // make sure that we can send Sec-Fetch-Site headers etc. 597 } 598 599 HttpConnection connection; 600 private @Nullable Proxy proxy; 601 private int timeoutMilliseconds; 602 private int maxBodySizeBytes; 603 private boolean followRedirects; 604 private final Collection<Connection.KeyVal> data; 605 private @Nullable String body = null; 606 @Nullable String mimeBoundary; 607 private boolean ignoreHttpErrors = false; 608 private boolean ignoreContentType = false; 609 private Parser parser; 610 private boolean parserDefined = false; // called parser(...) vs initialized in ctor 611 private String postDataCharset = DataUtil.defaultCharsetName; 612 private @Nullable SSLSocketFactory sslSocketFactory; 613 private CookieManager cookieManager; 614 @Nullable RequestAuthenticator authenticator; 615 private @Nullable Progress<Connection.Response> responseProgress; 616 617 private volatile boolean executing = false; 618 619 Request() { 620 super(); 621 timeoutMilliseconds = 30000; // 30 seconds 622 maxBodySizeBytes = 1024 * 1024 * 2; // 2MB 623 followRedirects = true; 624 data = new ArrayList<>(); 625 method = Method.GET; 626 addHeader("Accept-Encoding", "gzip"); 627 addHeader(USER_AGENT, DEFAULT_UA); 628 parser = Parser.htmlParser(); 629 cookieManager = new CookieManager(); // creates a default InMemoryCookieStore 630 } 631 632 Request(Request copy) { 633 super(copy); 634 connection = copy.connection; 635 proxy = copy.proxy; 636 postDataCharset = copy.postDataCharset; 637 timeoutMilliseconds = copy.timeoutMilliseconds; 638 maxBodySizeBytes = copy.maxBodySizeBytes; 639 followRedirects = copy.followRedirects; 640 data = new ArrayList<>(); // data not copied 641 //body not copied 642 ignoreHttpErrors = copy.ignoreHttpErrors; 643 ignoreContentType = copy.ignoreContentType; 644 parser = copy.parser.newInstance(); // parsers and their tree-builders maintain state, so need a fresh copy 645 parserDefined = copy.parserDefined; 646 sslSocketFactory = copy.sslSocketFactory; // these are all synchronized so safe to share 647 cookieManager = copy.cookieManager; 648 authenticator = copy.authenticator; 649 responseProgress = copy.responseProgress; 650 executing = false; 651 } 652 653 @Override @Nullable 654 public Proxy proxy() { 655 return proxy; 656 } 657 658 @Override 659 public Request proxy(@Nullable Proxy proxy) { 660 this.proxy = proxy; 661 return this; 662 } 663 664 @Override 665 public Request proxy(String host, int port) { 666 this.proxy = new Proxy(Proxy.Type.HTTP, InetSocketAddress.createUnresolved(host, port)); 667 return this; 668 } 669 670 @Override 671 public int timeout() { 672 return timeoutMilliseconds; 673 } 674 675 @Override 676 public Request timeout(int millis) { 677 Validate.isTrue(millis >= 0, "Timeout milliseconds must be 0 (infinite) or greater"); 678 timeoutMilliseconds = millis; 679 return this; 680 } 681 682 @Override 683 public int maxBodySize() { 684 return maxBodySizeBytes; 685 } 686 687 @Override 688 public Connection.Request maxBodySize(int bytes) { 689 Validate.isTrue(bytes >= 0, "maxSize must be 0 (unlimited) or larger"); 690 maxBodySizeBytes = bytes; 691 return this; 692 } 693 694 @Override 695 public boolean followRedirects() { 696 return followRedirects; 697 } 698 699 @Override 700 public Connection.Request followRedirects(boolean followRedirects) { 701 this.followRedirects = followRedirects; 702 return this; 703 } 704 705 @Override 706 public boolean ignoreHttpErrors() { 707 return ignoreHttpErrors; 708 } 709 710 @Override @Nullable 711 public SSLSocketFactory sslSocketFactory() { 712 return sslSocketFactory; 713 } 714 715 @Override 716 public void sslSocketFactory(SSLSocketFactory sslSocketFactory) { 717 this.sslSocketFactory = sslSocketFactory; 718 } 719 720 @Override 721 public Connection.Request ignoreHttpErrors(boolean ignoreHttpErrors) { 722 this.ignoreHttpErrors = ignoreHttpErrors; 723 return this; 724 } 725 726 @Override 727 public boolean ignoreContentType() { 728 return ignoreContentType; 729 } 730 731 @Override 732 public Connection.Request ignoreContentType(boolean ignoreContentType) { 733 this.ignoreContentType = ignoreContentType; 734 return this; 735 } 736 737 @Override 738 public Request data(Connection.KeyVal keyval) { 739 Validate.notNullParam(keyval, "keyval"); 740 data.add(keyval); 741 return this; 742 } 743 744 @Override 745 public Collection<Connection.KeyVal> data() { 746 return data; 747 } 748 749 @Override 750 public Connection.Request requestBody(@Nullable String body) { 751 this.body = body; 752 return this; 753 } 754 755 @Override @Nullable 756 public String requestBody() { 757 return body; 758 } 759 760 @Override 761 public Request parser(Parser parser) { 762 this.parser = parser; 763 parserDefined = true; 764 return this; 765 } 766 767 @Override 768 public Parser parser() { 769 return parser; 770 } 771 772 @Override 773 public Connection.Request postDataCharset(String charset) { 774 Validate.notNullParam(charset, "charset"); 775 if (!Charset.isSupported(charset)) throw new IllegalCharsetNameException(charset); 776 this.postDataCharset = charset; 777 return this; 778 } 779 780 @Override 781 public String postDataCharset() { 782 return postDataCharset; 783 } 784 785 CookieManager cookieManager() { 786 return cookieManager; 787 } 788 789 @Override public Connection.Request auth(@Nullable RequestAuthenticator authenticator) { 790 this.authenticator = authenticator; 791 return this; 792 } 793 794 @Override @Nullable public RequestAuthenticator auth() { 795 return authenticator; 796 } 797 } 798 799 public static class Response extends HttpConnection.Base<Connection.Response> implements Connection.Response { 800 private static final int MAX_REDIRECTS = 20; 801 private static final String LOCATION = "Location"; 802 int statusCode; 803 @Nullable String statusMessage; 804 private @Nullable ByteBuffer byteData; 805 private @Nullable ControllableInputStream bodyStream; 806 @Nullable RequestExecutor executor; 807 private @Nullable String charset; 808 @Nullable String contentType; 809 int contentLength; 810 private boolean executed = false; 811 private boolean inputStreamRead = false; 812 private int numRedirects = 0; 813 private final HttpConnection.Request req; 814 815 /* 816 * Matches XML content types (like text/xml, image/svg+xml, application/xhtml+xml;charset=UTF8, etc) 817 */ 818 private static final Pattern xmlContentTypeRxp = Pattern.compile("(\\w+)/\\w*\\+?xml.*"); 819 820 /** 821 <b>Internal only! </b>Creates a dummy HttpConnection.Response, useful for testing. All actual responses 822 are created from the HttpURLConnection and fields defined. 823 */ 824 Response() { 825 super(); 826 statusCode = 400; 827 statusMessage = "Request not made"; 828 req = new Request(); 829 contentType = null; 830 } 831 832 static Response execute(HttpConnection.Request req) throws IOException { 833 return execute(req, null); 834 } 835 836 static Response execute(HttpConnection.Request req, @Nullable Response prevRes) throws IOException { 837 synchronized (req) { 838 Validate.isFalse(req.executing, "Multiple threads were detected trying to execute the same request concurrently. Make sure to use Connection#newRequest() and do not share an executing request between threads."); 839 req.executing = true; 840 } 841 Validate.notNullParam(req, "req"); 842 URL url = req.url(); 843 Validate.notNull(url, "URL must be specified to connect"); 844 String protocol = url.getProtocol(); 845 if (!protocol.equals("http") && !protocol.equals("https")) 846 throw new MalformedURLException("Only http & https protocols supported"); 847 final boolean supportsBody = req.method().hasBody(); 848 final boolean hasBody = req.requestBody() != null; 849 if (!supportsBody) 850 Validate.isFalse(hasBody, "Cannot set a request body for HTTP method " + req.method()); 851 852 // set up the request for execution 853 if (!req.data().isEmpty() && (!supportsBody || hasBody)) 854 serialiseRequestUrl(req); 855 else if (supportsBody) 856 setOutputContentType(req); 857 858 long startTime = System.nanoTime(); 859 RequestExecutor executor = RequestDispatch.get(req, prevRes); 860 Response res = null; 861 try { 862 res = executor.execute(); 863 864 // redirect if there's a location header (from 3xx, or 201 etc) 865 if (res.hasHeader(LOCATION) && req.followRedirects()) { 866 if (res.statusCode != HTTP_TEMP_REDIR) { 867 req.method(Method.GET); // always redirect with a get. any data param from original req are dropped. 868 req.data().clear(); 869 req.requestBody(null); 870 req.removeHeader(CONTENT_TYPE); 871 } 872 873 String location = res.header(LOCATION); 874 Validate.notNull(location); 875 if (location.startsWith("http:/") && location.charAt(6) != '/') // fix broken Location: http:/temp/AAG_New/en/index.php 876 location = location.substring(6); 877 URL redir = StringUtil.resolve(req.url(), location); 878 req.url(redir); 879 880 req.executing = false; 881 return execute(req, res); 882 } 883 if ((res.statusCode < 200 || res.statusCode >= 400) && !req.ignoreHttpErrors()) 884 throw new HttpStatusException("HTTP error fetching URL", res.statusCode, req.url().toString()); 885 886 // check that we can handle the returned content type; if not, abort before fetching it 887 String contentType = res.contentType(); 888 if (contentType != null 889 && !req.ignoreContentType() 890 && !contentType.startsWith("text/") 891 && !xmlContentTypeRxp.matcher(contentType).matches() 892 ) 893 throw new UnsupportedMimeTypeException("Unhandled content type. Must be text/*, */xml, or */*+xml", 894 contentType, req.url().toString()); 895 896 // switch to the XML parser if content type is xml and not parser not explicitly set 897 if (contentType != null && xmlContentTypeRxp.matcher(contentType).matches()) { 898 if (!req.parserDefined) req.parser(Parser.xmlParser()); 899 } 900 901 res.charset = DataUtil.getCharsetFromContentType(res.contentType); // may be null, readInputStream deals with it 902 if (res.contentLength != 0 && req.method() != HEAD) { // -1 means unknown, chunked. sun throws an IO exception on 500 response with no content when trying to read body 903 InputStream stream = executor.responseBody(); 904 if (res.hasHeaderWithValue(CONTENT_ENCODING, "gzip")) 905 stream = new GZIPInputStream(stream); 906 else if (res.hasHeaderWithValue(CONTENT_ENCODING, "deflate")) 907 stream = new InflaterInputStream(stream, new Inflater(true)); 908 909 res.bodyStream = ControllableInputStream.wrap( 910 stream, DefaultBufferSize, req.maxBodySize()) 911 .timeout(startTime, req.timeout()); 912 913 if (req.responseProgress != null) // set response progress listener 914 res.bodyStream.onProgress(res.contentLength, req.responseProgress, res); 915 } else { 916 res.byteData = DataUtil.emptyByteBuffer(); 917 } 918 } catch (IOException e) { 919 if (res != null) res.safeClose(); // will be non-null if got to conn 920 throw e; 921 } finally { 922 req.executing = false; 923 924 // detach any thread local auth delegate 925 if (req.authenticator != null) 926 AuthenticationHandler.handler.remove(); 927 } 928 929 res.executed = true; 930 return res; 931 } 932 933 @Override 934 public int statusCode() { 935 return statusCode; 936 } 937 938 @Override 939 public String statusMessage() { 940 return statusMessage; 941 } 942 943 @Override @Nullable 944 public String charset() { 945 return charset; 946 } 947 948 @Override 949 public Response charset(String charset) { 950 this.charset = charset; 951 return this; 952 } 953 954 @Override @Nullable 955 public String contentType() { 956 return contentType; 957 } 958 959 /** Called from parse() or streamParser(), validates and prepares the input stream, and aligns common settings. */ 960 private ControllableInputStream prepareParse() { 961 Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before parsing response"); 962 ControllableInputStream stream = bodyStream; 963 if (byteData != null) { // bytes have been read in to the buffer, parse that 964 ByteArrayInputStream bytes = new ByteArrayInputStream(byteData.array(), 0, byteData.limit()); 965 stream = ControllableInputStream.wrap(bytes, 0); // no max 966 inputStreamRead = false; // ok to reparse if in bytes 967 } 968 Validate.isFalse(inputStreamRead, "Input stream already read and parsed, cannot re-read."); 969 Validate.notNull(stream); 970 inputStreamRead = true; 971 return stream; 972 } 973 974 @Override public Document parse() throws IOException { 975 ControllableInputStream stream = prepareParse(); 976 Document doc = DataUtil.parseInputStream(stream, charset, url.toExternalForm(), req.parser()); 977 doc.connection(new HttpConnection(req, this)); // because we're static, don't have the connection obj. // todo - maybe hold in the req? 978 charset = doc.outputSettings().charset().name(); // update charset from meta-equiv, possibly 979 safeClose(); 980 return doc; 981 } 982 983 @Override public StreamParser streamParser() throws IOException { 984 ControllableInputStream stream = prepareParse(); 985 String baseUri = url.toExternalForm(); 986 DataUtil.CharsetDoc charsetDoc = DataUtil.detectCharset(stream, charset, baseUri, req.parser()); 987 // note that there may be a document in CharsetDoc as a result of scanning meta-data -- but as requires a stream parse, it is not used here. todo - revisit. 988 989 // set up the stream parser and rig this connection up to the parsed doc: 990 StreamParser streamer = new StreamParser(req.parser()); 991 BufferedReader reader = new BufferedReader(new InputStreamReader(stream, charsetDoc.charset)); 992 streamer.parse(reader, baseUri); // initializes the parse and the document, but does not step() it 993 streamer.document().connection(new HttpConnection(req, this)); 994 charset = charsetDoc.charset.name(); 995 996 // we don't safeClose() as in parse(); caller must close streamParser to close InputStream stream 997 return streamer; 998 } 999 1000 private void prepareByteData() { 1001 Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body"); 1002 if (bodyStream != null && byteData == null) { 1003 Validate.isFalse(inputStreamRead, "Request has already been read (with .parse())"); 1004 try { 1005 byteData = DataUtil.readToByteBuffer(bodyStream, req.maxBodySize()); 1006 } catch (IOException e) { 1007 throw new UncheckedIOException(e); 1008 } finally { 1009 inputStreamRead = true; 1010 safeClose(); 1011 } 1012 } 1013 } 1014 1015 @Override 1016 public String body() { 1017 prepareByteData(); 1018 Validate.notNull(byteData); 1019 // charset gets set from header on execute, and from meta-equiv on parse. parse may not have happened yet 1020 String body = (charset == null ? UTF_8 : Charset.forName(charset)) 1021 .decode(byteData).toString(); 1022 ((Buffer)byteData).rewind(); // cast to avoid covariant return type change in jdk9 1023 return body; 1024 } 1025 1026 @Override 1027 public byte[] bodyAsBytes() { 1028 prepareByteData(); 1029 Validate.notNull(byteData); 1030 Validate.isTrue(byteData.hasArray()); // we made it, so it should 1031 1032 byte[] array = byteData.array(); 1033 int offset = byteData.arrayOffset(); 1034 int length = byteData.limit(); 1035 1036 if (offset == 0 && length == array.length) { // exact, just return it 1037 return array; 1038 } else { // trim to size 1039 byte[] exactArray = new byte[length]; 1040 System.arraycopy(array, offset, exactArray, 0, length); 1041 return exactArray; 1042 } 1043 } 1044 1045 @Override 1046 public Connection.Response bufferUp() { 1047 prepareByteData(); 1048 return this; 1049 } 1050 1051 @Override 1052 public BufferedInputStream bodyStream() { 1053 Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body"); 1054 1055 // if we have read to bytes (via buffer up), return those as a stream. 1056 if (byteData != null) { 1057 return new BufferedInputStream( 1058 new ByteArrayInputStream(byteData.array(), 0, byteData.limit()), 1059 DefaultBufferSize); 1060 } 1061 1062 Validate.isFalse(inputStreamRead, "Request has already been read"); 1063 Validate.notNull(bodyStream); 1064 inputStreamRead = true; 1065 return bodyStream.inputStream(); 1066 } 1067 1068 /** 1069 * Call on completion of stream read, to close the body (or error) stream. The connection.disconnect allows 1070 * keep-alives to work (as the underlying connection is actually held open, despite the name). 1071 */ 1072 private void safeClose() { 1073 if (bodyStream != null) { 1074 try { 1075 bodyStream.close(); 1076 } catch (IOException e) { 1077 // no-op 1078 } finally { 1079 bodyStream = null; 1080 } 1081 } 1082 1083 if (executor != null) executor.safeClose(); // disconnect 1084 } 1085 1086 Response(HttpConnection.Request request) { 1087 this.req = request; 1088 } 1089 1090 // set up url, method, header, cookies 1091 void prepareResponse(Map<String, List<String>> resHeaders, HttpConnection.@Nullable Response previousResponse) throws IOException { 1092 processResponseHeaders(resHeaders); // includes cookie key/val read during header scan 1093 CookieUtil.storeCookies(req, this, url, resHeaders); // add set cookies to cookie store 1094 1095 if (previousResponse != null) { // was redirected 1096 // map previous response cookies into this response cookies() object 1097 for (Map.Entry<String, String> prevCookie : previousResponse.cookies().entrySet()) { 1098 if (!hasCookie(prevCookie.getKey())) 1099 cookie(prevCookie.getKey(), prevCookie.getValue()); 1100 } 1101 previousResponse.safeClose(); 1102 1103 // enforce too many redirects: 1104 numRedirects = previousResponse.numRedirects + 1; 1105 if (numRedirects >= MAX_REDIRECTS) 1106 throw new IOException(String.format("Too many redirects occurred trying to load URL %s", previousResponse.url())); 1107 } 1108 } 1109 1110 void processResponseHeaders(Map<String, List<String>> resHeaders) { 1111 for (Map.Entry<String, List<String>> entry : resHeaders.entrySet()) { 1112 String name = entry.getKey(); 1113 if (name == null) 1114 continue; // http/1.1 line 1115 1116 List<String> values = entry.getValue(); 1117 for (String value : values) { 1118 addHeader(name, fixHeaderEncoding(value)); 1119 } 1120 } 1121 } 1122 1123 /** 1124 Servers may encode response headers in UTF-8 instead of RFC defined 8859. This method attempts to detect that 1125 and re-decode the string as UTF-8. 1126 * @param val a header value string that may have been incorrectly decoded as 8859. 1127 * @return a potentially re-decoded string. 1128 */ 1129 @Nullable 1130 private static String fixHeaderEncoding(@Nullable String val) { 1131 if (val == null) return val; 1132 byte[] bytes = val.getBytes(ISO_8859_1); 1133 if (looksLikeUtf8(bytes)) 1134 return new String(bytes, UTF_8); 1135 else 1136 return val; 1137 } 1138 1139 private static boolean looksLikeUtf8(byte[] input) { 1140 int i = 0; 1141 // BOM: 1142 if (input.length >= 3 1143 && (input[0] & 0xFF) == 0xEF 1144 && (input[1] & 0xFF) == 0xBB 1145 && (input[2] & 0xFF) == 0xBF) { 1146 i = 3; 1147 } 1148 1149 int end; 1150 boolean foundNonAscii = false; 1151 for (int j = input.length; i < j; ++i) { 1152 int o = input[i]; 1153 if ((o & 0x80) == 0) { 1154 continue; // ASCII 1155 } 1156 foundNonAscii = true; 1157 1158 // UTF-8 leading: 1159 if ((o & 0xE0) == 0xC0) { 1160 end = i + 1; 1161 } else if ((o & 0xF0) == 0xE0) { 1162 end = i + 2; 1163 } else if ((o & 0xF8) == 0xF0) { 1164 end = i + 3; 1165 } else { 1166 return false; 1167 } 1168 1169 if (end >= input.length) 1170 return false; 1171 1172 while (i < end) { 1173 i++; 1174 o = input[i]; 1175 if ((o & 0xC0) != 0x80) { 1176 return false; 1177 } 1178 } 1179 } 1180 return foundNonAscii; 1181 } 1182 1183 private static void setOutputContentType(final HttpConnection.Request req) { 1184 final String contentType = req.header(CONTENT_TYPE); 1185 String bound = null; 1186 if (contentType != null) { 1187 // no-op; don't add content type as already set (e.g. for requestBody()) 1188 // todo - if content type already set, we could add charset 1189 1190 // if user has set content type to multipart/form-data, auto add boundary. 1191 if(contentType.contains(MULTIPART_FORM_DATA) && !contentType.contains("boundary")) { 1192 bound = DataUtil.mimeBoundary(); 1193 req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound); 1194 } 1195 1196 } 1197 else if (needsMultipart(req)) { 1198 bound = DataUtil.mimeBoundary(); 1199 req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound); 1200 } else { 1201 req.header(CONTENT_TYPE, FORM_URL_ENCODED + "; charset=" + req.postDataCharset()); 1202 } 1203 req.mimeBoundary = bound; 1204 } 1205 1206 static void writePost(final HttpConnection.Request req, final OutputStream outputStream) throws IOException { 1207 final Collection<Connection.KeyVal> data = req.data(); 1208 final BufferedWriter w = new BufferedWriter(new OutputStreamWriter(outputStream, Charset.forName(req.postDataCharset()))); 1209 final String boundary = req.mimeBoundary; 1210 1211 if (boundary != null) { 1212 // boundary will be set if we're in multipart mode 1213 for (Connection.KeyVal keyVal : data) { 1214 w.write("--"); 1215 w.write(boundary); 1216 w.write("\r\n"); 1217 w.write("Content-Disposition: form-data; name=\""); 1218 w.write(encodeMimeName(keyVal.key())); // encodes " to %22 1219 w.write("\""); 1220 final InputStream input = keyVal.inputStream(); 1221 if (input != null) { 1222 w.write("; filename=\""); 1223 w.write(encodeMimeName(keyVal.value())); 1224 w.write("\"\r\nContent-Type: "); 1225 String contentType = keyVal.contentType(); 1226 w.write(contentType != null ? contentType : DefaultUploadType); 1227 w.write("\r\n\r\n"); 1228 w.flush(); // flush 1229 DataUtil.crossStreams(input, outputStream); 1230 outputStream.flush(); 1231 } else { 1232 w.write("\r\n\r\n"); 1233 w.write(keyVal.value()); 1234 } 1235 w.write("\r\n"); 1236 } 1237 w.write("--"); 1238 w.write(boundary); 1239 w.write("--"); 1240 } else { 1241 String body = req.requestBody(); 1242 if (body != null) { 1243 // data will be in query string, we're sending a plaintext body 1244 w.write(body); 1245 } 1246 else { 1247 // regular form data (application/x-www-form-urlencoded) 1248 boolean first = true; 1249 for (Connection.KeyVal keyVal : data) { 1250 if (!first) 1251 w.append('&'); 1252 else 1253 first = false; 1254 1255 w.write(URLEncoder.encode(keyVal.key(), req.postDataCharset())); 1256 w.write('='); 1257 w.write(URLEncoder.encode(keyVal.value(), req.postDataCharset())); 1258 } 1259 } 1260 } 1261 w.close(); 1262 } 1263 1264 // for get url reqs, serialise the data map into the url 1265 private static void serialiseRequestUrl(Connection.Request req) throws IOException { 1266 UrlBuilder in = new UrlBuilder(req.url()); 1267 1268 for (Connection.KeyVal keyVal : req.data()) { 1269 Validate.isFalse(keyVal.hasInputStream(), "InputStream data not supported in URL query string."); 1270 in.appendKeyVal(keyVal); 1271 } 1272 req.url(in.build()); 1273 req.data().clear(); // moved into url as get params 1274 } 1275 } 1276 1277 private static boolean needsMultipart(Connection.Request req) { 1278 // multipart mode, for files. add the header if we see something with an inputstream, and return a non-null boundary 1279 for (Connection.KeyVal keyVal : req.data()) { 1280 if (keyVal.hasInputStream()) 1281 return true; 1282 } 1283 return false; 1284 } 1285 1286 public static class KeyVal implements Connection.KeyVal { 1287 private String key; 1288 private String value; 1289 private @Nullable InputStream stream; 1290 private @Nullable String contentType; 1291 1292 public static KeyVal create(String key, String value) { 1293 return new KeyVal(key, value); 1294 } 1295 1296 public static KeyVal create(String key, String filename, InputStream stream) { 1297 return new KeyVal(key, filename) 1298 .inputStream(stream); 1299 } 1300 1301 private KeyVal(String key, String value) { 1302 Validate.notEmptyParam(key, "key"); 1303 Validate.notNullParam(value, "value"); 1304 this.key = key; 1305 this.value = value; 1306 } 1307 1308 @Override 1309 public KeyVal key(String key) { 1310 Validate.notEmptyParam(key, "key"); 1311 this.key = key; 1312 return this; 1313 } 1314 1315 @Override 1316 public String key() { 1317 return key; 1318 } 1319 1320 @Override 1321 public KeyVal value(String value) { 1322 Validate.notNullParam(value, "value"); 1323 this.value = value; 1324 return this; 1325 } 1326 1327 @Override 1328 public String value() { 1329 return value; 1330 } 1331 1332 @Override 1333 public KeyVal inputStream(InputStream inputStream) { 1334 Validate.notNullParam(value, "inputStream"); 1335 this.stream = inputStream; 1336 return this; 1337 } 1338 1339 @Override @Nullable 1340 public InputStream inputStream() { 1341 return stream; 1342 } 1343 1344 @Override 1345 public boolean hasInputStream() { 1346 return stream != null; 1347 } 1348 1349 @Override 1350 public Connection.KeyVal contentType(String contentType) { 1351 Validate.notEmpty(contentType); 1352 this.contentType = contentType; 1353 return this; 1354 } 1355 1356 @Override @Nullable 1357 public String contentType() { 1358 return contentType; 1359 } 1360 1361 @Override 1362 public String toString() { 1363 return key + "=" + value; 1364 } 1365 } 1366}