001package org.jsoup.helper; 002 003import org.jsoup.Connection; 004import org.jsoup.HttpStatusException; 005import org.jsoup.Progress; 006import org.jsoup.UncheckedIOException; 007import org.jsoup.UnsupportedMimeTypeException; 008import org.jsoup.internal.ControllableInputStream; 009import org.jsoup.internal.Functions; 010import org.jsoup.internal.SharedConstants; 011import org.jsoup.internal.StringUtil; 012import org.jsoup.nodes.Document; 013import org.jsoup.parser.Parser; 014import org.jsoup.parser.StreamParser; 015import org.jsoup.parser.TokenQueue; 016import org.jspecify.annotations.Nullable; 017 018import javax.net.ssl.HttpsURLConnection; 019import javax.net.ssl.SSLSocketFactory; 020import java.io.BufferedInputStream; 021import java.io.BufferedReader; 022import java.io.BufferedWriter; 023import java.io.ByteArrayInputStream; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.InputStreamReader; 027import java.io.OutputStream; 028import java.io.OutputStreamWriter; 029import java.net.CookieManager; 030import java.net.CookieStore; 031import java.net.HttpURLConnection; 032import java.net.InetSocketAddress; 033import java.net.MalformedURLException; 034import java.net.Proxy; 035import java.net.URL; 036import java.net.URLEncoder; 037import java.nio.Buffer; 038import java.nio.ByteBuffer; 039import java.nio.charset.Charset; 040import java.nio.charset.IllegalCharsetNameException; 041import java.util.ArrayList; 042import java.util.Collection; 043import java.util.Collections; 044import java.util.LinkedHashMap; 045import java.util.List; 046import java.util.Map; 047import java.util.regex.Pattern; 048import java.util.zip.GZIPInputStream; 049import java.util.zip.Inflater; 050import java.util.zip.InflaterInputStream; 051 052import static org.jsoup.Connection.Method.HEAD; 053import static org.jsoup.helper.DataUtil.UTF_8; 054import static org.jsoup.internal.Normalizer.lowerCase; 055 056/** 057 * Implementation of {@link Connection}. 058 * @see org.jsoup.Jsoup#connect(String) 059 */ 060@SuppressWarnings("CharsetObjectCanBeUsed") 061public class HttpConnection implements Connection { 062 public static final String CONTENT_ENCODING = "Content-Encoding"; 063 /** 064 * Many users would get caught by not setting a user-agent and therefore getting different responses on their desktop 065 * vs in jsoup, which would otherwise default to {@code Java}. So by default, use a desktop UA. 066 */ 067 public static final String DEFAULT_UA = 068 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"; 069 private static final String USER_AGENT = "User-Agent"; 070 public static final String CONTENT_TYPE = "Content-Type"; 071 public static final String MULTIPART_FORM_DATA = "multipart/form-data"; 072 public static final String FORM_URL_ENCODED = "application/x-www-form-urlencoded"; 073 private static final int HTTP_TEMP_REDIR = 307; // http/1.1 temporary redirect, not in Java's set. 074 private static final String DefaultUploadType = "application/octet-stream"; 075 private static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1"); 076 077 /** 078 Create a new Connection, with the request URL specified. 079 @param url the URL to fetch from 080 @return a new Connection object 081 */ 082 public static Connection connect(String url) { 083 Connection con = new HttpConnection(); 084 con.url(url); 085 return con; 086 } 087 088 /** 089 Create a new Connection, with the request URL specified. 090 @param url the URL to fetch from 091 @return a new Connection object 092 */ 093 public static Connection connect(URL url) { 094 Connection con = new HttpConnection(); 095 con.url(url); 096 return con; 097 } 098 099 /** 100 Create a new, empty HttpConnection. 101 */ 102 public HttpConnection() { 103 req = new Request(); 104 } 105 106 /** 107 Create a new Request by deep-copying an existing Request. Note that the data and body of the original are not 108 copied. All other settings (proxy, parser, cookies, etc) are copied. 109 @param copy the request to copy 110 */ 111 HttpConnection(Request copy) { 112 req = new Request(copy); 113 } 114 115 private static String encodeMimeName(String val) { 116 return val.replace("\"", "%22"); 117 } 118 119 private HttpConnection.Request req; 120 private Connection.@Nullable Response res; 121 122 @Override 123 public Connection newRequest() { 124 // copy the prototype request for the different settings, cookie manager, etc 125 return new HttpConnection(req); 126 } 127 128 /** Create a new Connection that just wraps the provided Request and Response */ 129 private HttpConnection(Request req, Response res) { 130 this.req = req; 131 this.res = res; 132 } 133 134 @Override 135 public Connection url(URL url) { 136 req.url(url); 137 return this; 138 } 139 140 @Override 141 public Connection url(String url) { 142 Validate.notEmptyParam(url, "url"); 143 try { 144 req.url(new URL(url)); 145 } catch (MalformedURLException e) { 146 throw new IllegalArgumentException(String.format("The supplied URL, '%s', is malformed. Make sure it is an absolute URL, and starts with 'http://' or 'https://'. See https://jsoup.org/cookbook/extracting-data/working-with-urls", url), e); 147 } 148 return this; 149 } 150 151 @Override 152 public Connection proxy(@Nullable Proxy proxy) { 153 req.proxy(proxy); 154 return this; 155 } 156 157 @Override 158 public Connection proxy(String host, int port) { 159 req.proxy(host, port); 160 return this; 161 } 162 163 @Override 164 public Connection userAgent(String userAgent) { 165 Validate.notNullParam(userAgent, "userAgent"); 166 req.header(USER_AGENT, userAgent); 167 return this; 168 } 169 170 @Override 171 public Connection timeout(int millis) { 172 req.timeout(millis); 173 return this; 174 } 175 176 @Override 177 public Connection maxBodySize(int bytes) { 178 req.maxBodySize(bytes); 179 return this; 180 } 181 182 @Override 183 public Connection followRedirects(boolean followRedirects) { 184 req.followRedirects(followRedirects); 185 return this; 186 } 187 188 @Override 189 public Connection referrer(String referrer) { 190 Validate.notNullParam(referrer, "referrer"); 191 req.header("Referer", referrer); 192 return this; 193 } 194 195 @Override 196 public Connection method(Method method) { 197 req.method(method); 198 return this; 199 } 200 201 @Override 202 public Connection ignoreHttpErrors(boolean ignoreHttpErrors) { 203 req.ignoreHttpErrors(ignoreHttpErrors); 204 return this; 205 } 206 207 @Override 208 public Connection ignoreContentType(boolean ignoreContentType) { 209 req.ignoreContentType(ignoreContentType); 210 return this; 211 } 212 213 @Override 214 public Connection data(String key, String value) { 215 req.data(KeyVal.create(key, value)); 216 return this; 217 } 218 219 @Override 220 public Connection sslSocketFactory(SSLSocketFactory sslSocketFactory) { 221 req.sslSocketFactory(sslSocketFactory); 222 return this; 223 } 224 225 @Override 226 public Connection data(String key, String filename, InputStream inputStream) { 227 req.data(KeyVal.create(key, filename, inputStream)); 228 return this; 229 } 230 231 @Override 232 public Connection data(String key, String filename, InputStream inputStream, String contentType) { 233 req.data(KeyVal.create(key, filename, inputStream).contentType(contentType)); 234 return this; 235 } 236 237 @Override 238 public Connection data(Map<String, String> data) { 239 Validate.notNullParam(data, "data"); 240 for (Map.Entry<String, String> entry : data.entrySet()) { 241 req.data(KeyVal.create(entry.getKey(), entry.getValue())); 242 } 243 return this; 244 } 245 246 @Override 247 public Connection data(String... keyvals) { 248 Validate.notNullParam(keyvals, "keyvals"); 249 Validate.isTrue(keyvals.length %2 == 0, "Must supply an even number of key value pairs"); 250 for (int i = 0; i < keyvals.length; i += 2) { 251 String key = keyvals[i]; 252 String value = keyvals[i+1]; 253 Validate.notEmpty(key, "Data key must not be empty"); 254 Validate.notNull(value, "Data value must not be null"); 255 req.data(KeyVal.create(key, value)); 256 } 257 return this; 258 } 259 260 @Override 261 public Connection data(Collection<Connection.KeyVal> data) { 262 Validate.notNullParam(data, "data"); 263 for (Connection.KeyVal entry: data) { 264 req.data(entry); 265 } 266 return this; 267 } 268 269 @Override 270 public Connection.KeyVal data(String key) { 271 Validate.notEmptyParam(key, "key"); 272 for (Connection.KeyVal keyVal : request().data()) { 273 if (keyVal.key().equals(key)) 274 return keyVal; 275 } 276 return null; 277 } 278 279 @Override 280 public Connection requestBody(String body) { 281 req.requestBody(body); 282 return this; 283 } 284 285 @Override 286 public Connection header(String name, String value) { 287 req.header(name, value); 288 return this; 289 } 290 291 @Override 292 public Connection headers(Map<String,String> headers) { 293 Validate.notNullParam(headers, "headers"); 294 for (Map.Entry<String,String> entry : headers.entrySet()) { 295 req.header(entry.getKey(),entry.getValue()); 296 } 297 return this; 298 } 299 300 @Override 301 public Connection cookie(String name, String value) { 302 req.cookie(name, value); 303 return this; 304 } 305 306 @Override 307 public Connection cookies(Map<String, String> cookies) { 308 Validate.notNullParam(cookies, "cookies"); 309 for (Map.Entry<String, String> entry : cookies.entrySet()) { 310 req.cookie(entry.getKey(), entry.getValue()); 311 } 312 return this; 313 } 314 315 @Override 316 public Connection cookieStore(CookieStore cookieStore) { 317 // create a new cookie manager using the new store 318 req.cookieManager = new CookieManager(cookieStore, null); 319 return this; 320 } 321 322 @Override 323 public CookieStore cookieStore() { 324 return req.cookieManager.getCookieStore(); 325 } 326 327 @Override 328 public Connection parser(Parser parser) { 329 req.parser(parser); 330 return this; 331 } 332 333 @Override 334 public Document get() throws IOException { 335 req.method(Method.GET); 336 execute(); 337 Validate.notNull(res); 338 return res.parse(); 339 } 340 341 @Override 342 public Document post() throws IOException { 343 req.method(Method.POST); 344 execute(); 345 Validate.notNull(res); 346 return res.parse(); 347 } 348 349 @Override 350 public Connection.Response execute() throws IOException { 351 res = Response.execute(req); 352 return res; 353 } 354 355 @Override 356 public Connection.Request request() { 357 return req; 358 } 359 360 @Override 361 public Connection request(Connection.Request request) { 362 req = (HttpConnection.Request) request; // will throw a class-cast exception if the user has extended some but not all of Connection; that's desired 363 return this; 364 } 365 366 @Override 367 public Connection.Response response() { 368 if (res == null) { 369 throw new IllegalArgumentException("You must execute the request before getting a response."); 370 } 371 return res; 372 } 373 374 @Override 375 public Connection response(Connection.Response response) { 376 res = response; 377 return this; 378 } 379 380 @Override 381 public Connection postDataCharset(String charset) { 382 req.postDataCharset(charset); 383 return this; 384 } 385 386 @Override public Connection auth(RequestAuthenticator authenticator) { 387 req.auth(authenticator); 388 return this; 389 } 390 391 @Override public Connection onResponseProgress(Progress<Connection.Response> handler) { 392 req.responseProgress = handler; 393 return this; 394 } 395 396 @SuppressWarnings("unchecked") 397 private static abstract class Base<T extends Connection.Base<T>> implements Connection.Base<T> { 398 private static final URL UnsetUrl; // only used if you created a new Request() 399 static { 400 try { 401 UnsetUrl = new URL("http://undefined/"); 402 } catch (MalformedURLException e) { 403 throw new IllegalStateException(e); 404 } 405 } 406 407 URL url = UnsetUrl; 408 Method method = Method.GET; 409 Map<String, List<String>> headers; 410 Map<String, String> cookies; 411 412 private Base() { 413 headers = new LinkedHashMap<>(); 414 cookies = new LinkedHashMap<>(); 415 } 416 417 private Base(Base<T> copy) { 418 url = copy.url; // unmodifiable object 419 method = copy.method; 420 headers = new LinkedHashMap<>(); 421 for (Map.Entry<String, List<String>> entry : copy.headers.entrySet()) { 422 headers.put(entry.getKey(), new ArrayList<>(entry.getValue())); 423 } 424 cookies = new LinkedHashMap<>(); cookies.putAll(copy.cookies); // just holds strings 425 } 426 427 @Override 428 public URL url() { 429 if (url == UnsetUrl) 430 throw new IllegalArgumentException("URL not set. Make sure to call #url(...) before executing the request."); 431 return url; 432 } 433 434 @Override 435 public T url(URL url) { 436 Validate.notNullParam(url, "url"); 437 this.url = new UrlBuilder(url).build(); 438 return (T) this; 439 } 440 441 @Override 442 public Method method() { 443 return method; 444 } 445 446 @Override 447 public T method(Method method) { 448 Validate.notNullParam(method, "method"); 449 this.method = method; 450 return (T) this; 451 } 452 453 @Override 454 public String header(String name) { 455 Validate.notNullParam(name, "name"); 456 List<String> vals = getHeadersCaseInsensitive(name); 457 if (vals.size() > 0) { 458 // https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 459 return StringUtil.join(vals, ", "); 460 } 461 462 return null; 463 } 464 465 @Override 466 public T addHeader(String name, @Nullable String value) { 467 Validate.notEmptyParam(name, "name"); 468 //noinspection ConstantConditions 469 value = value == null ? "" : value; 470 471 List<String> values = headers(name); 472 if (values.isEmpty()) { 473 values = new ArrayList<>(); 474 headers.put(name, values); 475 } 476 values.add(value); 477 478 return (T) this; 479 } 480 481 @Override 482 public List<String> headers(String name) { 483 Validate.notEmptyParam(name, "name"); 484 return getHeadersCaseInsensitive(name); 485 } 486 487 @Override 488 public T header(String name, String value) { 489 Validate.notEmptyParam(name, "name"); 490 removeHeader(name); // ensures we don't get an "accept-encoding" and a "Accept-Encoding" 491 addHeader(name, value); 492 return (T) this; 493 } 494 495 @Override 496 public boolean hasHeader(String name) { 497 Validate.notEmptyParam(name, "name"); 498 return !getHeadersCaseInsensitive(name).isEmpty(); 499 } 500 501 /** 502 * Test if the request has a header with this value (case insensitive). 503 */ 504 @Override 505 public boolean hasHeaderWithValue(String name, String value) { 506 Validate.notEmpty(name); 507 Validate.notEmpty(value); 508 List<String> values = headers(name); 509 for (String candidate : values) { 510 if (value.equalsIgnoreCase(candidate)) 511 return true; 512 } 513 return false; 514 } 515 516 @Override 517 public T removeHeader(String name) { 518 Validate.notEmptyParam(name, "name"); 519 Map.Entry<String, List<String>> entry = scanHeaders(name); // remove is case-insensitive too 520 if (entry != null) 521 headers.remove(entry.getKey()); // ensures correct case 522 return (T) this; 523 } 524 525 @Override 526 public Map<String, String> headers() { 527 LinkedHashMap<String, String> map = new LinkedHashMap<>(headers.size()); 528 for (Map.Entry<String, List<String>> entry : headers.entrySet()) { 529 String header = entry.getKey(); 530 List<String> values = entry.getValue(); 531 if (values.size() > 0) 532 map.put(header, values.get(0)); 533 } 534 return map; 535 } 536 537 @Override 538 public Map<String, List<String>> multiHeaders() { 539 return headers; 540 } 541 542 private List<String> getHeadersCaseInsensitive(String name) { 543 Validate.notNull(name); 544 545 for (Map.Entry<String, List<String>> entry : headers.entrySet()) { 546 if (name.equalsIgnoreCase(entry.getKey())) 547 return entry.getValue(); 548 } 549 550 return Collections.emptyList(); 551 } 552 553 private Map.@Nullable Entry<String, List<String>> scanHeaders(String name) { 554 String lc = lowerCase(name); 555 for (Map.Entry<String, List<String>> entry : headers.entrySet()) { 556 if (lowerCase(entry.getKey()).equals(lc)) 557 return entry; 558 } 559 return null; 560 } 561 562 @Override 563 public String cookie(String name) { 564 Validate.notEmptyParam(name, "name"); 565 return cookies.get(name); 566 } 567 568 @Override 569 public T cookie(String name, String value) { 570 Validate.notEmptyParam(name, "name"); 571 Validate.notNullParam(value, "value"); 572 cookies.put(name, value); 573 return (T) this; 574 } 575 576 @Override 577 public boolean hasCookie(String name) { 578 Validate.notEmptyParam(name, "name"); 579 return cookies.containsKey(name); 580 } 581 582 @Override 583 public T removeCookie(String name) { 584 Validate.notEmptyParam(name, "name"); 585 cookies.remove(name); 586 return (T) this; 587 } 588 589 @Override 590 public Map<String, String> cookies() { 591 return cookies; 592 } 593 } 594 595 public static class Request extends HttpConnection.Base<Connection.Request> implements Connection.Request { 596 static { 597 System.setProperty("sun.net.http.allowRestrictedHeaders", "true"); 598 // make sure that we can send Sec-Fetch-Site headers etc. 599 } 600 601 private @Nullable Proxy proxy; 602 private int timeoutMilliseconds; 603 private int maxBodySizeBytes; 604 private boolean followRedirects; 605 private final Collection<Connection.KeyVal> data; 606 private @Nullable String body = null; 607 private boolean ignoreHttpErrors = false; 608 private boolean ignoreContentType = false; 609 private Parser parser; 610 private boolean parserDefined = false; // called parser(...) vs initialized in ctor 611 private String postDataCharset = DataUtil.defaultCharsetName; 612 private @Nullable SSLSocketFactory sslSocketFactory; 613 private CookieManager cookieManager; 614 private @Nullable RequestAuthenticator authenticator; 615 private @Nullable Progress<Connection.Response> responseProgress; 616 617 private volatile boolean executing = false; 618 619 Request() { 620 super(); 621 timeoutMilliseconds = 30000; // 30 seconds 622 maxBodySizeBytes = 1024 * 1024 * 2; // 2MB 623 followRedirects = true; 624 data = new ArrayList<>(); 625 method = Method.GET; 626 addHeader("Accept-Encoding", "gzip"); 627 addHeader(USER_AGENT, DEFAULT_UA); 628 parser = Parser.htmlParser(); 629 cookieManager = new CookieManager(); // creates a default InMemoryCookieStore 630 } 631 632 Request(Request copy) { 633 super(copy); 634 proxy = copy.proxy; 635 postDataCharset = copy.postDataCharset; 636 timeoutMilliseconds = copy.timeoutMilliseconds; 637 maxBodySizeBytes = copy.maxBodySizeBytes; 638 followRedirects = copy.followRedirects; 639 data = new ArrayList<>(); // data not copied 640 //body not copied 641 ignoreHttpErrors = copy.ignoreHttpErrors; 642 ignoreContentType = copy.ignoreContentType; 643 parser = copy.parser.newInstance(); // parsers and their tree-builders maintain state, so need a fresh copy 644 parserDefined = copy.parserDefined; 645 sslSocketFactory = copy.sslSocketFactory; // these are all synchronized so safe to share 646 cookieManager = copy.cookieManager; 647 authenticator = copy.authenticator; 648 responseProgress = copy.responseProgress; 649 executing = false; 650 } 651 652 @Override 653 public Proxy proxy() { 654 return proxy; 655 } 656 657 @Override 658 public Request proxy(@Nullable Proxy proxy) { 659 this.proxy = proxy; 660 return this; 661 } 662 663 @Override 664 public Request proxy(String host, int port) { 665 this.proxy = new Proxy(Proxy.Type.HTTP, InetSocketAddress.createUnresolved(host, port)); 666 return this; 667 } 668 669 @Override 670 public int timeout() { 671 return timeoutMilliseconds; 672 } 673 674 @Override 675 public Request timeout(int millis) { 676 Validate.isTrue(millis >= 0, "Timeout milliseconds must be 0 (infinite) or greater"); 677 timeoutMilliseconds = millis; 678 return this; 679 } 680 681 @Override 682 public int maxBodySize() { 683 return maxBodySizeBytes; 684 } 685 686 @Override 687 public Connection.Request maxBodySize(int bytes) { 688 Validate.isTrue(bytes >= 0, "maxSize must be 0 (unlimited) or larger"); 689 maxBodySizeBytes = bytes; 690 return this; 691 } 692 693 @Override 694 public boolean followRedirects() { 695 return followRedirects; 696 } 697 698 @Override 699 public Connection.Request followRedirects(boolean followRedirects) { 700 this.followRedirects = followRedirects; 701 return this; 702 } 703 704 @Override 705 public boolean ignoreHttpErrors() { 706 return ignoreHttpErrors; 707 } 708 709 @Override 710 public SSLSocketFactory sslSocketFactory() { 711 return sslSocketFactory; 712 } 713 714 @Override 715 public void sslSocketFactory(SSLSocketFactory sslSocketFactory) { 716 this.sslSocketFactory = sslSocketFactory; 717 } 718 719 @Override 720 public Connection.Request ignoreHttpErrors(boolean ignoreHttpErrors) { 721 this.ignoreHttpErrors = ignoreHttpErrors; 722 return this; 723 } 724 725 @Override 726 public boolean ignoreContentType() { 727 return ignoreContentType; 728 } 729 730 @Override 731 public Connection.Request ignoreContentType(boolean ignoreContentType) { 732 this.ignoreContentType = ignoreContentType; 733 return this; 734 } 735 736 @Override 737 public Request data(Connection.KeyVal keyval) { 738 Validate.notNullParam(keyval, "keyval"); 739 data.add(keyval); 740 return this; 741 } 742 743 @Override 744 public Collection<Connection.KeyVal> data() { 745 return data; 746 } 747 748 @Override 749 public Connection.Request requestBody(@Nullable String body) { 750 this.body = body; 751 return this; 752 } 753 754 @Override 755 public String requestBody() { 756 return body; 757 } 758 759 @Override 760 public Request parser(Parser parser) { 761 this.parser = parser; 762 parserDefined = true; 763 return this; 764 } 765 766 @Override 767 public Parser parser() { 768 return parser; 769 } 770 771 @Override 772 public Connection.Request postDataCharset(String charset) { 773 Validate.notNullParam(charset, "charset"); 774 if (!Charset.isSupported(charset)) throw new IllegalCharsetNameException(charset); 775 this.postDataCharset = charset; 776 return this; 777 } 778 779 @Override 780 public String postDataCharset() { 781 return postDataCharset; 782 } 783 784 CookieManager cookieManager() { 785 return cookieManager; 786 } 787 788 @Override public Connection.Request auth(@Nullable RequestAuthenticator authenticator) { 789 this.authenticator = authenticator; 790 return this; 791 } 792 793 @Override @Nullable public RequestAuthenticator auth() { 794 return authenticator; 795 } 796 } 797 798 public static class Response extends HttpConnection.Base<Connection.Response> implements Connection.Response { 799 private static final int MAX_REDIRECTS = 20; 800 private static final String LOCATION = "Location"; 801 private final int statusCode; 802 private final String statusMessage; 803 private @Nullable ByteBuffer byteData; 804 private @Nullable ControllableInputStream bodyStream; 805 private @Nullable HttpURLConnection conn; 806 private @Nullable String charset; 807 private @Nullable final String contentType; 808 private boolean executed = false; 809 private boolean inputStreamRead = false; 810 private int numRedirects = 0; 811 private final HttpConnection.Request req; 812 813 /* 814 * Matches XML content types (like text/xml, image/svg+xml, application/xhtml+xml;charset=UTF8, etc) 815 */ 816 private static final Pattern xmlContentTypeRxp = Pattern.compile("(\\w+)/\\w*\\+?xml.*"); 817 818 /** 819 <b>Internal only! </b>Creates a dummy HttpConnection.Response, useful for testing. All actual responses 820 are created from the HttpURLConnection and fields defined. 821 */ 822 Response() { 823 super(); 824 statusCode = 400; 825 statusMessage = "Request not made"; 826 req = new Request(); 827 contentType = null; 828 } 829 830 static Response execute(HttpConnection.Request req) throws IOException { 831 return execute(req, null); 832 } 833 834 static Response execute(HttpConnection.Request req, @Nullable Response previousResponse) throws IOException { 835 synchronized (req) { 836 Validate.isFalse(req.executing, "Multiple threads were detected trying to execute the same request concurrently. Make sure to use Connection#newRequest() and do not share an executing request between threads."); 837 req.executing = true; 838 } 839 Validate.notNullParam(req, "req"); 840 URL url = req.url(); 841 Validate.notNull(url, "URL must be specified to connect"); 842 String protocol = url.getProtocol(); 843 if (!protocol.equals("http") && !protocol.equals("https")) 844 throw new MalformedURLException("Only http & https protocols supported"); 845 final boolean methodHasBody = req.method().hasBody(); 846 final boolean hasRequestBody = req.requestBody() != null; 847 if (!methodHasBody) 848 Validate.isFalse(hasRequestBody, "Cannot set a request body for HTTP method " + req.method()); 849 850 // set up the request for execution 851 String mimeBoundary = null; 852 if (req.data().size() > 0 && (!methodHasBody || hasRequestBody)) 853 serialiseRequestUrl(req); 854 else if (methodHasBody) 855 mimeBoundary = setOutputContentType(req); 856 857 long startTime = System.nanoTime(); 858 HttpURLConnection conn = createConnection(req); 859 Response res = null; 860 try { 861 conn.connect(); 862 if (conn.getDoOutput()) { 863 OutputStream out = conn.getOutputStream(); 864 try { writePost(req, out, mimeBoundary); } 865 catch (IOException e) { conn.disconnect(); throw e; } 866 finally { out.close(); } 867 } 868 869 int status = conn.getResponseCode(); 870 res = new Response(conn, req, previousResponse); 871 872 // redirect if there's a location header (from 3xx, or 201 etc) 873 if (res.hasHeader(LOCATION) && req.followRedirects()) { 874 if (status != HTTP_TEMP_REDIR) { 875 req.method(Method.GET); // always redirect with a get. any data param from original req are dropped. 876 req.data().clear(); 877 req.requestBody(null); 878 req.removeHeader(CONTENT_TYPE); 879 } 880 881 String location = res.header(LOCATION); 882 Validate.notNull(location); 883 if (location.startsWith("http:/") && location.charAt(6) != '/') // fix broken Location: http:/temp/AAG_New/en/index.php 884 location = location.substring(6); 885 URL redir = StringUtil.resolve(req.url(), location); 886 req.url(redir); 887 888 req.executing = false; 889 return execute(req, res); 890 } 891 if ((status < 200 || status >= 400) && !req.ignoreHttpErrors()) 892 throw new HttpStatusException("HTTP error fetching URL", status, req.url().toString()); 893 894 // check that we can handle the returned content type; if not, abort before fetching it 895 String contentType = res.contentType(); 896 if (contentType != null 897 && !req.ignoreContentType() 898 && !contentType.startsWith("text/") 899 && !xmlContentTypeRxp.matcher(contentType).matches() 900 ) 901 throw new UnsupportedMimeTypeException("Unhandled content type. Must be text/*, */xml, or */*+xml", 902 contentType, req.url().toString()); 903 904 // switch to the XML parser if content type is xml and not parser not explicitly set 905 if (contentType != null && xmlContentTypeRxp.matcher(contentType).matches()) { 906 if (!req.parserDefined) req.parser(Parser.xmlParser()); 907 } 908 909 res.charset = DataUtil.getCharsetFromContentType(res.contentType); // may be null, readInputStream deals with it 910 if (conn.getContentLength() != 0 && req.method() != HEAD) { // -1 means unknown, chunked. sun throws an IO exception on 500 response with no content when trying to read body 911 InputStream stream = conn.getErrorStream() != null ? conn.getErrorStream() : conn.getInputStream(); 912 if (res.hasHeaderWithValue(CONTENT_ENCODING, "gzip")) 913 stream = new GZIPInputStream(stream); 914 else if (res.hasHeaderWithValue(CONTENT_ENCODING, "deflate")) 915 stream = new InflaterInputStream(stream, new Inflater(true)); 916 917 res.bodyStream = ControllableInputStream.wrap( 918 stream, SharedConstants.DefaultBufferSize, req.maxBodySize()) 919 .timeout(startTime, req.timeout()); 920 921 if (req.responseProgress != null) // set response progress listener 922 res.bodyStream.onProgress(conn.getContentLength(), req.responseProgress, res); 923 } else { 924 res.byteData = DataUtil.emptyByteBuffer(); 925 } 926 } catch (IOException e) { 927 if (res != null) res.safeClose(); // will be non-null if got to conn 928 throw e; 929 } finally { 930 req.executing = false; 931 932 // detach any thread local auth delegate 933 if (req.authenticator != null) 934 AuthenticationHandler.handler.remove(); 935 } 936 937 res.executed = true; 938 return res; 939 } 940 941 @Override 942 public int statusCode() { 943 return statusCode; 944 } 945 946 @Override 947 public String statusMessage() { 948 return statusMessage; 949 } 950 951 @Override 952 public String charset() { 953 return charset; 954 } 955 956 @Override 957 public Response charset(String charset) { 958 this.charset = charset; 959 return this; 960 } 961 962 @Override 963 public String contentType() { 964 return contentType; 965 } 966 967 /** Called from parse() or streamParser(), validates and prepares the input stream, and aligns common settings. */ 968 private InputStream prepareParse() { 969 Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before parsing response"); 970 InputStream stream = bodyStream; 971 if (byteData != null) { // bytes have been read in to the buffer, parse that 972 stream = new ByteArrayInputStream(byteData.array()); 973 inputStreamRead = false; // ok to reparse if in bytes 974 } 975 Validate.isFalse(inputStreamRead, "Input stream already read and parsed, cannot re-read."); 976 Validate.notNull(stream); 977 inputStreamRead = true; 978 return stream; 979 } 980 981 @Override public Document parse() throws IOException { 982 InputStream stream = prepareParse(); 983 Document doc = DataUtil.parseInputStream(stream, charset, url.toExternalForm(), req.parser()); 984 doc.connection(new HttpConnection(req, this)); // because we're static, don't have the connection obj. // todo - maybe hold in the req? 985 charset = doc.outputSettings().charset().name(); // update charset from meta-equiv, possibly 986 safeClose(); 987 return doc; 988 } 989 990 @Override public StreamParser streamParser() throws IOException { 991 InputStream stream = prepareParse(); 992 String baseUri = url.toExternalForm(); 993 DataUtil.CharsetDoc charsetDoc = DataUtil.detectCharset(stream, charset, baseUri, req.parser()); 994 // note that there may be a document in CharsetDoc as a result of scanning meta-data -- but as requires a stream parse, it is not used here. todo - revisit. 995 996 // set up the stream parser and rig this connection up to the parsed doc: 997 StreamParser streamer = new StreamParser(req.parser()); 998 BufferedReader reader = new BufferedReader(new InputStreamReader(stream, charsetDoc.charset)); 999 DataUtil.maybeSkipBom(reader, charsetDoc); 1000 streamer.parse(reader, baseUri); // initializes the parse and the document, but does not step() it 1001 streamer.document().connection(new HttpConnection(req, this)); 1002 charset = charsetDoc.charset.name(); 1003 1004 // we don't safeClose() as in parse(); caller must close streamParser to close InputStream stream 1005 return streamer; 1006 } 1007 1008 private void prepareByteData() { 1009 Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body"); 1010 if (bodyStream != null && byteData == null) { 1011 Validate.isFalse(inputStreamRead, "Request has already been read (with .parse())"); 1012 try { 1013 byteData = DataUtil.readToByteBuffer(bodyStream, req.maxBodySize()); 1014 } catch (IOException e) { 1015 throw new UncheckedIOException(e); 1016 } finally { 1017 inputStreamRead = true; 1018 safeClose(); 1019 } 1020 } 1021 } 1022 1023 @Override 1024 public String body() { 1025 prepareByteData(); 1026 Validate.notNull(byteData); 1027 // charset gets set from header on execute, and from meta-equiv on parse. parse may not have happened yet 1028 String body = (charset == null ? UTF_8 : Charset.forName(charset)) 1029 .decode(byteData).toString(); 1030 ((Buffer)byteData).rewind(); // cast to avoid covariant return type change in jdk9 1031 return body; 1032 } 1033 1034 @Override 1035 public byte[] bodyAsBytes() { 1036 prepareByteData(); 1037 Validate.notNull(byteData); 1038 return byteData.array(); 1039 } 1040 1041 @Override 1042 public Connection.Response bufferUp() { 1043 prepareByteData(); 1044 return this; 1045 } 1046 1047 @Override 1048 public BufferedInputStream bodyStream() { 1049 Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body"); 1050 1051 // if we have read to bytes (via buffer up), return those as a stream. 1052 if (byteData != null) { 1053 return new BufferedInputStream(new ByteArrayInputStream(byteData.array()), SharedConstants.DefaultBufferSize); 1054 } 1055 1056 Validate.isFalse(inputStreamRead, "Request has already been read"); 1057 Validate.notNull(bodyStream); 1058 inputStreamRead = true; 1059 return bodyStream.inputStream(); 1060 } 1061 1062 // set up connection defaults, and details from request 1063 private static HttpURLConnection createConnection(HttpConnection.Request req) throws IOException { 1064 Proxy proxy = req.proxy(); 1065 final HttpURLConnection conn = (HttpURLConnection) ( 1066 proxy == null ? 1067 req.url().openConnection() : 1068 req.url().openConnection(proxy) 1069 ); 1070 1071 conn.setRequestMethod(req.method().name()); 1072 conn.setInstanceFollowRedirects(false); // don't rely on native redirection support 1073 conn.setConnectTimeout(req.timeout()); 1074 conn.setReadTimeout(req.timeout() / 2); // gets reduced after connection is made and status is read 1075 1076 if (req.sslSocketFactory() != null && conn instanceof HttpsURLConnection) 1077 ((HttpsURLConnection) conn).setSSLSocketFactory(req.sslSocketFactory()); 1078 if (req.authenticator != null) 1079 AuthenticationHandler.handler.enable(req.authenticator, conn); // removed in finally 1080 if (req.method().hasBody()) 1081 conn.setDoOutput(true); 1082 CookieUtil.applyCookiesToRequest(req, conn); // from the Request key/val cookies and the Cookie Store 1083 for (Map.Entry<String, List<String>> header : req.multiHeaders().entrySet()) { 1084 for (String value : header.getValue()) { 1085 conn.addRequestProperty(header.getKey(), value); 1086 } 1087 } 1088 return conn; 1089 } 1090 1091 /** 1092 * Call on completion of stream read, to close the body (or error) stream. The connection.disconnect allows 1093 * keep-alives to work (as the underlying connection is actually held open, despite the name). 1094 */ 1095 private void safeClose() { 1096 if (bodyStream != null) { 1097 try { 1098 bodyStream.close(); 1099 } catch (IOException e) { 1100 // no-op 1101 } finally { 1102 bodyStream = null; 1103 } 1104 } 1105 if (conn != null) { 1106 conn.disconnect(); 1107 conn = null; 1108 } 1109 } 1110 1111 // set up url, method, header, cookies 1112 private Response(HttpURLConnection conn, HttpConnection.Request request, HttpConnection.@Nullable Response previousResponse) throws IOException { 1113 this.conn = conn; 1114 this.req = request; 1115 method = Method.valueOf(conn.getRequestMethod()); 1116 url = conn.getURL(); 1117 statusCode = conn.getResponseCode(); 1118 statusMessage = conn.getResponseMessage(); 1119 contentType = conn.getContentType(); 1120 1121 Map<String, List<String>> resHeaders = createHeaderMap(conn); 1122 processResponseHeaders(resHeaders); // includes cookie key/val read during header scan 1123 CookieUtil.storeCookies(req, url, resHeaders); // add set cookies to cookie store 1124 1125 if (previousResponse != null) { // was redirected 1126 // map previous response cookies into this response cookies() object 1127 for (Map.Entry<String, String> prevCookie : previousResponse.cookies().entrySet()) { 1128 if (!hasCookie(prevCookie.getKey())) 1129 cookie(prevCookie.getKey(), prevCookie.getValue()); 1130 } 1131 previousResponse.safeClose(); 1132 1133 // enforce too many redirects: 1134 numRedirects = previousResponse.numRedirects + 1; 1135 if (numRedirects >= MAX_REDIRECTS) 1136 throw new IOException(String.format("Too many redirects occurred trying to load URL %s", previousResponse.url())); 1137 } 1138 } 1139 1140 private static LinkedHashMap<String, List<String>> createHeaderMap(HttpURLConnection conn) { 1141 // the default sun impl of conn.getHeaderFields() returns header values out of order 1142 final LinkedHashMap<String, List<String>> headers = new LinkedHashMap<>(); 1143 int i = 0; 1144 while (true) { 1145 final String key = conn.getHeaderFieldKey(i); 1146 final String val = conn.getHeaderField(i); 1147 if (key == null && val == null) 1148 break; 1149 i++; 1150 if (key == null || val == null) 1151 continue; // skip http1.1 line 1152 1153 final List<String> vals = headers.computeIfAbsent(key, Functions.listFunction()); 1154 vals.add(val); 1155 } 1156 return headers; 1157 } 1158 1159 void processResponseHeaders(Map<String, List<String>> resHeaders) { 1160 for (Map.Entry<String, List<String>> entry : resHeaders.entrySet()) { 1161 String name = entry.getKey(); 1162 if (name == null) 1163 continue; // http/1.1 line 1164 1165 List<String> values = entry.getValue(); 1166 if (name.equalsIgnoreCase("Set-Cookie")) { 1167 for (String value : values) { 1168 if (value == null) 1169 continue; 1170 TokenQueue cd = new TokenQueue(value); 1171 String cookieName = cd.chompTo("=").trim(); 1172 String cookieVal = cd.consumeTo(";").trim(); 1173 // ignores path, date, domain, validateTLSCertificates et al. full details will be available in cookiestore if required 1174 // name not blank, value not null 1175 if (cookieName.length() > 0 && !cookies.containsKey(cookieName)) // if duplicates, only keep the first 1176 cookie(cookieName, cookieVal); 1177 } 1178 } 1179 for (String value : values) { 1180 addHeader(name, fixHeaderEncoding(value)); 1181 } 1182 } 1183 } 1184 1185 /** 1186 Servers may encode response headers in UTF-8 instead of RFC defined 8859. This method attempts to detect that 1187 and re-decode the string as UTF-8. 1188 * @param val a header value string that may have been incorrectly decoded as 8859. 1189 * @return a potentially re-decoded string. 1190 */ 1191 @Nullable 1192 private static String fixHeaderEncoding(@Nullable String val) { 1193 if (val == null) return val; 1194 byte[] bytes = val.getBytes(ISO_8859_1); 1195 if (looksLikeUtf8(bytes)) 1196 return new String(bytes, UTF_8); 1197 else 1198 return val; 1199 } 1200 1201 private static boolean looksLikeUtf8(byte[] input) { 1202 int i = 0; 1203 // BOM: 1204 if (input.length >= 3 1205 && (input[0] & 0xFF) == 0xEF 1206 && (input[1] & 0xFF) == 0xBB 1207 && (input[2] & 0xFF) == 0xBF) { 1208 i = 3; 1209 } 1210 1211 int end; 1212 boolean foundNonAscii = false; 1213 for (int j = input.length; i < j; ++i) { 1214 int o = input[i]; 1215 if ((o & 0x80) == 0) { 1216 continue; // ASCII 1217 } 1218 foundNonAscii = true; 1219 1220 // UTF-8 leading: 1221 if ((o & 0xE0) == 0xC0) { 1222 end = i + 1; 1223 } else if ((o & 0xF0) == 0xE0) { 1224 end = i + 2; 1225 } else if ((o & 0xF8) == 0xF0) { 1226 end = i + 3; 1227 } else { 1228 return false; 1229 } 1230 1231 if (end >= input.length) 1232 return false; 1233 1234 while (i < end) { 1235 i++; 1236 o = input[i]; 1237 if ((o & 0xC0) != 0x80) { 1238 return false; 1239 } 1240 } 1241 } 1242 return foundNonAscii; 1243 } 1244 1245 private @Nullable static String setOutputContentType(final Connection.Request req) { 1246 final String contentType = req.header(CONTENT_TYPE); 1247 String bound = null; 1248 if (contentType != null) { 1249 // no-op; don't add content type as already set (e.g. for requestBody()) 1250 // todo - if content type already set, we could add charset 1251 1252 // if user has set content type to multipart/form-data, auto add boundary. 1253 if(contentType.contains(MULTIPART_FORM_DATA) && !contentType.contains("boundary")) { 1254 bound = DataUtil.mimeBoundary(); 1255 req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound); 1256 } 1257 1258 } 1259 else if (needsMultipart(req)) { 1260 bound = DataUtil.mimeBoundary(); 1261 req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound); 1262 } else { 1263 req.header(CONTENT_TYPE, FORM_URL_ENCODED + "; charset=" + req.postDataCharset()); 1264 } 1265 return bound; 1266 } 1267 1268 private static void writePost(final Connection.Request req, final OutputStream outputStream, @Nullable final String boundary) throws IOException { 1269 final Collection<Connection.KeyVal> data = req.data(); 1270 final BufferedWriter w = new BufferedWriter(new OutputStreamWriter(outputStream, Charset.forName(req.postDataCharset()))); 1271 1272 if (boundary != null) { 1273 // boundary will be set if we're in multipart mode 1274 for (Connection.KeyVal keyVal : data) { 1275 w.write("--"); 1276 w.write(boundary); 1277 w.write("\r\n"); 1278 w.write("Content-Disposition: form-data; name=\""); 1279 w.write(encodeMimeName(keyVal.key())); // encodes " to %22 1280 w.write("\""); 1281 final InputStream input = keyVal.inputStream(); 1282 if (input != null) { 1283 w.write("; filename=\""); 1284 w.write(encodeMimeName(keyVal.value())); 1285 w.write("\"\r\nContent-Type: "); 1286 String contentType = keyVal.contentType(); 1287 w.write(contentType != null ? contentType : DefaultUploadType); 1288 w.write("\r\n\r\n"); 1289 w.flush(); // flush 1290 DataUtil.crossStreams(input, outputStream); 1291 outputStream.flush(); 1292 } else { 1293 w.write("\r\n\r\n"); 1294 w.write(keyVal.value()); 1295 } 1296 w.write("\r\n"); 1297 } 1298 w.write("--"); 1299 w.write(boundary); 1300 w.write("--"); 1301 } else { 1302 String body = req.requestBody(); 1303 if (body != null) { 1304 // data will be in query string, we're sending a plaintext body 1305 w.write(body); 1306 } 1307 else { 1308 // regular form data (application/x-www-form-urlencoded) 1309 boolean first = true; 1310 for (Connection.KeyVal keyVal : data) { 1311 if (!first) 1312 w.append('&'); 1313 else 1314 first = false; 1315 1316 w.write(URLEncoder.encode(keyVal.key(), req.postDataCharset())); 1317 w.write('='); 1318 w.write(URLEncoder.encode(keyVal.value(), req.postDataCharset())); 1319 } 1320 } 1321 } 1322 w.close(); 1323 } 1324 1325 // for get url reqs, serialise the data map into the url 1326 private static void serialiseRequestUrl(Connection.Request req) throws IOException { 1327 UrlBuilder in = new UrlBuilder(req.url()); 1328 1329 for (Connection.KeyVal keyVal : req.data()) { 1330 Validate.isFalse(keyVal.hasInputStream(), "InputStream data not supported in URL query string."); 1331 in.appendKeyVal(keyVal); 1332 } 1333 req.url(in.build()); 1334 req.data().clear(); // moved into url as get params 1335 } 1336 } 1337 1338 private static boolean needsMultipart(Connection.Request req) { 1339 // multipart mode, for files. add the header if we see something with an inputstream, and return a non-null boundary 1340 for (Connection.KeyVal keyVal : req.data()) { 1341 if (keyVal.hasInputStream()) 1342 return true; 1343 } 1344 return false; 1345 } 1346 1347 public static class KeyVal implements Connection.KeyVal { 1348 private String key; 1349 private String value; 1350 private @Nullable InputStream stream; 1351 private @Nullable String contentType; 1352 1353 public static KeyVal create(String key, String value) { 1354 return new KeyVal(key, value); 1355 } 1356 1357 public static KeyVal create(String key, String filename, InputStream stream) { 1358 return new KeyVal(key, filename) 1359 .inputStream(stream); 1360 } 1361 1362 private KeyVal(String key, String value) { 1363 Validate.notEmptyParam(key, "key"); 1364 Validate.notNullParam(value, "value"); 1365 this.key = key; 1366 this.value = value; 1367 } 1368 1369 @Override 1370 public KeyVal key(String key) { 1371 Validate.notEmptyParam(key, "key"); 1372 this.key = key; 1373 return this; 1374 } 1375 1376 @Override 1377 public String key() { 1378 return key; 1379 } 1380 1381 @Override 1382 public KeyVal value(String value) { 1383 Validate.notNullParam(value, "value"); 1384 this.value = value; 1385 return this; 1386 } 1387 1388 @Override 1389 public String value() { 1390 return value; 1391 } 1392 1393 public KeyVal inputStream(InputStream inputStream) { 1394 Validate.notNullParam(value, "inputStream"); 1395 this.stream = inputStream; 1396 return this; 1397 } 1398 1399 @Override 1400 public InputStream inputStream() { 1401 return stream; 1402 } 1403 1404 @Override 1405 public boolean hasInputStream() { 1406 return stream != null; 1407 } 1408 1409 @Override 1410 public Connection.KeyVal contentType(String contentType) { 1411 Validate.notEmpty(contentType); 1412 this.contentType = contentType; 1413 return this; 1414 } 1415 1416 @Override 1417 public String contentType() { 1418 return contentType; 1419 } 1420 1421 @Override 1422 public String toString() { 1423 return key + "=" + value; 1424 } 1425 } 1426}