001package org.jsoup.helper; 002 003import org.jsoup.Connection; 004import org.jsoup.HttpStatusException; 005import org.jsoup.Progress; 006import org.jsoup.UncheckedIOException; 007import org.jsoup.UnsupportedMimeTypeException; 008import org.jsoup.internal.ControllableInputStream; 009import org.jsoup.internal.Functions; 010import org.jsoup.internal.StringUtil; 011import org.jsoup.nodes.Document; 012import org.jsoup.parser.Parser; 013import org.jsoup.parser.StreamParser; 014import org.jsoup.parser.TokenQueue; 015import org.jspecify.annotations.Nullable; 016 017import javax.net.ssl.HttpsURLConnection; 018import javax.net.ssl.SSLSocketFactory; 019import java.io.BufferedInputStream; 020import java.io.BufferedReader; 021import java.io.BufferedWriter; 022import java.io.ByteArrayInputStream; 023import java.io.IOException; 024import java.io.InputStream; 025import java.io.InputStreamReader; 026import java.io.OutputStream; 027import java.io.OutputStreamWriter; 028import java.net.CookieManager; 029import java.net.CookieStore; 030import java.net.HttpURLConnection; 031import java.net.InetSocketAddress; 032import java.net.MalformedURLException; 033import java.net.Proxy; 034import java.net.URL; 035import java.net.URLEncoder; 036import java.nio.Buffer; 037import java.nio.ByteBuffer; 038import java.nio.charset.Charset; 039import java.nio.charset.IllegalCharsetNameException; 040import java.util.ArrayList; 041import java.util.Collection; 042import java.util.Collections; 043import java.util.LinkedHashMap; 044import java.util.List; 045import java.util.Map; 046import java.util.regex.Pattern; 047import java.util.zip.GZIPInputStream; 048import java.util.zip.Inflater; 049import java.util.zip.InflaterInputStream; 050 051import static org.jsoup.Connection.Method.HEAD; 052import static org.jsoup.helper.DataUtil.UTF_8; 053import static org.jsoup.internal.Normalizer.lowerCase; 054import static org.jsoup.internal.SharedConstants.DefaultBufferSize; 055 056/** 057 * Implementation of {@link Connection}. 058 * @see org.jsoup.Jsoup#connect(String) 059 */ 060@SuppressWarnings("CharsetObjectCanBeUsed") 061public class HttpConnection implements Connection { 062 public static final String CONTENT_ENCODING = "Content-Encoding"; 063 /** 064 * Many users would get caught by not setting a user-agent and therefore getting different responses on their desktop 065 * vs in jsoup, which would otherwise default to {@code Java}. So by default, use a desktop UA. 066 */ 067 public static final String DEFAULT_UA = 068 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"; 069 private static final String USER_AGENT = "User-Agent"; 070 public static final String CONTENT_TYPE = "Content-Type"; 071 public static final String MULTIPART_FORM_DATA = "multipart/form-data"; 072 public static final String FORM_URL_ENCODED = "application/x-www-form-urlencoded"; 073 private static final int HTTP_TEMP_REDIR = 307; // http/1.1 temporary redirect, not in Java's set. 074 private static final String DefaultUploadType = "application/octet-stream"; 075 private static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1"); 076 077 /** 078 Create a new Connection, with the request URL specified. 079 @param url the URL to fetch from 080 @return a new Connection object 081 */ 082 public static Connection connect(String url) { 083 Connection con = new HttpConnection(); 084 con.url(url); 085 return con; 086 } 087 088 /** 089 Create a new Connection, with the request URL specified. 090 @param url the URL to fetch from 091 @return a new Connection object 092 */ 093 public static Connection connect(URL url) { 094 Connection con = new HttpConnection(); 095 con.url(url); 096 return con; 097 } 098 099 /** 100 Create a new, empty HttpConnection. 101 */ 102 public HttpConnection() { 103 req = new Request(); 104 } 105 106 /** 107 Create a new Request by deep-copying an existing Request. Note that the data and body of the original are not 108 copied. All other settings (proxy, parser, cookies, etc) are copied. 109 @param copy the request to copy 110 */ 111 HttpConnection(Request copy) { 112 req = new Request(copy); 113 } 114 115 private static String encodeMimeName(String val) { 116 return val.replace("\"", "%22"); 117 } 118 119 private HttpConnection.Request req; 120 private Connection.@Nullable Response res; 121 122 @Override 123 public Connection newRequest() { 124 // copy the prototype request for the different settings, cookie manager, etc 125 return new HttpConnection(req); 126 } 127 128 /** Create a new Connection that just wraps the provided Request and Response */ 129 private HttpConnection(Request req, Response res) { 130 this.req = req; 131 this.res = res; 132 } 133 134 @Override 135 public Connection url(URL url) { 136 req.url(url); 137 return this; 138 } 139 140 @Override 141 public Connection url(String url) { 142 Validate.notEmptyParam(url, "url"); 143 try { 144 req.url(new URL(url)); 145 } catch (MalformedURLException e) { 146 throw new IllegalArgumentException(String.format("The supplied URL, '%s', is malformed. Make sure it is an absolute URL, and starts with 'http://' or 'https://'. See https://jsoup.org/cookbook/extracting-data/working-with-urls", url), e); 147 } 148 return this; 149 } 150 151 @Override 152 public Connection proxy(@Nullable Proxy proxy) { 153 req.proxy(proxy); 154 return this; 155 } 156 157 @Override 158 public Connection proxy(String host, int port) { 159 req.proxy(host, port); 160 return this; 161 } 162 163 @Override 164 public Connection userAgent(String userAgent) { 165 Validate.notNullParam(userAgent, "userAgent"); 166 req.header(USER_AGENT, userAgent); 167 return this; 168 } 169 170 @Override 171 public Connection timeout(int millis) { 172 req.timeout(millis); 173 return this; 174 } 175 176 @Override 177 public Connection maxBodySize(int bytes) { 178 req.maxBodySize(bytes); 179 return this; 180 } 181 182 @Override 183 public Connection followRedirects(boolean followRedirects) { 184 req.followRedirects(followRedirects); 185 return this; 186 } 187 188 @Override 189 public Connection referrer(String referrer) { 190 Validate.notNullParam(referrer, "referrer"); 191 req.header("Referer", referrer); 192 return this; 193 } 194 195 @Override 196 public Connection method(Method method) { 197 req.method(method); 198 return this; 199 } 200 201 @Override 202 public Connection ignoreHttpErrors(boolean ignoreHttpErrors) { 203 req.ignoreHttpErrors(ignoreHttpErrors); 204 return this; 205 } 206 207 @Override 208 public Connection ignoreContentType(boolean ignoreContentType) { 209 req.ignoreContentType(ignoreContentType); 210 return this; 211 } 212 213 @Override 214 public Connection data(String key, String value) { 215 req.data(KeyVal.create(key, value)); 216 return this; 217 } 218 219 @Override 220 public Connection sslSocketFactory(SSLSocketFactory sslSocketFactory) { 221 req.sslSocketFactory(sslSocketFactory); 222 return this; 223 } 224 225 @Override 226 public Connection data(String key, String filename, InputStream inputStream) { 227 req.data(KeyVal.create(key, filename, inputStream)); 228 return this; 229 } 230 231 @Override 232 public Connection data(String key, String filename, InputStream inputStream, String contentType) { 233 req.data(KeyVal.create(key, filename, inputStream).contentType(contentType)); 234 return this; 235 } 236 237 @Override 238 public Connection data(Map<String, String> data) { 239 Validate.notNullParam(data, "data"); 240 for (Map.Entry<String, String> entry : data.entrySet()) { 241 req.data(KeyVal.create(entry.getKey(), entry.getValue())); 242 } 243 return this; 244 } 245 246 @Override 247 public Connection data(String... keyvals) { 248 Validate.notNullParam(keyvals, "keyvals"); 249 Validate.isTrue(keyvals.length %2 == 0, "Must supply an even number of key value pairs"); 250 for (int i = 0; i < keyvals.length; i += 2) { 251 String key = keyvals[i]; 252 String value = keyvals[i+1]; 253 Validate.notEmpty(key, "Data key must not be empty"); 254 Validate.notNull(value, "Data value must not be null"); 255 req.data(KeyVal.create(key, value)); 256 } 257 return this; 258 } 259 260 @Override 261 public Connection data(Collection<Connection.KeyVal> data) { 262 Validate.notNullParam(data, "data"); 263 for (Connection.KeyVal entry: data) { 264 req.data(entry); 265 } 266 return this; 267 } 268 269 @Override 270 public Connection.KeyVal data(String key) { 271 Validate.notEmptyParam(key, "key"); 272 for (Connection.KeyVal keyVal : request().data()) { 273 if (keyVal.key().equals(key)) 274 return keyVal; 275 } 276 return null; 277 } 278 279 @Override 280 public Connection requestBody(String body) { 281 req.requestBody(body); 282 return this; 283 } 284 285 @Override 286 public Connection header(String name, String value) { 287 req.header(name, value); 288 return this; 289 } 290 291 @Override 292 public Connection headers(Map<String,String> headers) { 293 Validate.notNullParam(headers, "headers"); 294 for (Map.Entry<String,String> entry : headers.entrySet()) { 295 req.header(entry.getKey(),entry.getValue()); 296 } 297 return this; 298 } 299 300 @Override 301 public Connection cookie(String name, String value) { 302 req.cookie(name, value); 303 return this; 304 } 305 306 @Override 307 public Connection cookies(Map<String, String> cookies) { 308 Validate.notNullParam(cookies, "cookies"); 309 for (Map.Entry<String, String> entry : cookies.entrySet()) { 310 req.cookie(entry.getKey(), entry.getValue()); 311 } 312 return this; 313 } 314 315 @Override 316 public Connection cookieStore(CookieStore cookieStore) { 317 // create a new cookie manager using the new store 318 req.cookieManager = new CookieManager(cookieStore, null); 319 return this; 320 } 321 322 @Override 323 public CookieStore cookieStore() { 324 return req.cookieManager.getCookieStore(); 325 } 326 327 @Override 328 public Connection parser(Parser parser) { 329 req.parser(parser); 330 return this; 331 } 332 333 @Override 334 public Document get() throws IOException { 335 req.method(Method.GET); 336 execute(); 337 Validate.notNull(res); 338 return res.parse(); 339 } 340 341 @Override 342 public Document post() throws IOException { 343 req.method(Method.POST); 344 execute(); 345 Validate.notNull(res); 346 return res.parse(); 347 } 348 349 @Override 350 public Connection.Response execute() throws IOException { 351 res = Response.execute(req); 352 return res; 353 } 354 355 @Override 356 public Connection.Request request() { 357 return req; 358 } 359 360 @Override 361 public Connection request(Connection.Request request) { 362 req = (HttpConnection.Request) request; // will throw a class-cast exception if the user has extended some but not all of Connection; that's desired 363 return this; 364 } 365 366 @Override 367 public Connection.Response response() { 368 if (res == null) { 369 throw new IllegalArgumentException("You must execute the request before getting a response."); 370 } 371 return res; 372 } 373 374 @Override 375 public Connection response(Connection.Response response) { 376 res = response; 377 return this; 378 } 379 380 @Override 381 public Connection postDataCharset(String charset) { 382 req.postDataCharset(charset); 383 return this; 384 } 385 386 @Override public Connection auth(RequestAuthenticator authenticator) { 387 req.auth(authenticator); 388 return this; 389 } 390 391 @Override public Connection onResponseProgress(Progress<Connection.Response> handler) { 392 req.responseProgress = handler; 393 return this; 394 } 395 396 @SuppressWarnings("unchecked") 397 private static abstract class Base<T extends Connection.Base<T>> implements Connection.Base<T> { 398 private static final URL UnsetUrl; // only used if you created a new Request() 399 static { 400 try { 401 UnsetUrl = new URL("http://undefined/"); 402 } catch (MalformedURLException e) { 403 throw new IllegalStateException(e); 404 } 405 } 406 407 URL url = UnsetUrl; 408 Method method = Method.GET; 409 Map<String, List<String>> headers; 410 Map<String, String> cookies; 411 412 private Base() { 413 headers = new LinkedHashMap<>(); 414 cookies = new LinkedHashMap<>(); 415 } 416 417 private Base(Base<T> copy) { 418 url = copy.url; // unmodifiable object 419 method = copy.method; 420 headers = new LinkedHashMap<>(); 421 for (Map.Entry<String, List<String>> entry : copy.headers.entrySet()) { 422 headers.put(entry.getKey(), new ArrayList<>(entry.getValue())); 423 } 424 cookies = new LinkedHashMap<>(); cookies.putAll(copy.cookies); // just holds strings 425 } 426 427 @Override 428 public URL url() { 429 if (url == UnsetUrl) 430 throw new IllegalArgumentException("URL not set. Make sure to call #url(...) before executing the request."); 431 return url; 432 } 433 434 @Override 435 public T url(URL url) { 436 Validate.notNullParam(url, "url"); 437 this.url = new UrlBuilder(url).build(); 438 return (T) this; 439 } 440 441 @Override 442 public Method method() { 443 return method; 444 } 445 446 @Override 447 public T method(Method method) { 448 Validate.notNullParam(method, "method"); 449 this.method = method; 450 return (T) this; 451 } 452 453 @Override 454 public String header(String name) { 455 Validate.notNullParam(name, "name"); 456 List<String> vals = getHeadersCaseInsensitive(name); 457 if (vals.size() > 0) { 458 // https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 459 return StringUtil.join(vals, ", "); 460 } 461 462 return null; 463 } 464 465 @Override 466 public T addHeader(String name, @Nullable String value) { 467 Validate.notEmptyParam(name, "name"); 468 value = value == null ? "" : value; 469 470 List<String> values = headers(name); 471 if (values.isEmpty()) { 472 values = new ArrayList<>(); 473 headers.put(name, values); 474 } 475 values.add(value); 476 477 return (T) this; 478 } 479 480 @Override 481 public List<String> headers(String name) { 482 Validate.notEmptyParam(name, "name"); 483 return getHeadersCaseInsensitive(name); 484 } 485 486 @Override 487 public T header(String name, String value) { 488 Validate.notEmptyParam(name, "name"); 489 removeHeader(name); // ensures we don't get an "accept-encoding" and a "Accept-Encoding" 490 addHeader(name, value); 491 return (T) this; 492 } 493 494 @Override 495 public boolean hasHeader(String name) { 496 Validate.notEmptyParam(name, "name"); 497 return !getHeadersCaseInsensitive(name).isEmpty(); 498 } 499 500 /** 501 * Test if the request has a header with this value (case insensitive). 502 */ 503 @Override 504 public boolean hasHeaderWithValue(String name, String value) { 505 Validate.notEmpty(name); 506 Validate.notEmpty(value); 507 List<String> values = headers(name); 508 for (String candidate : values) { 509 if (value.equalsIgnoreCase(candidate)) 510 return true; 511 } 512 return false; 513 } 514 515 @Override 516 public T removeHeader(String name) { 517 Validate.notEmptyParam(name, "name"); 518 Map.Entry<String, List<String>> entry = scanHeaders(name); // remove is case-insensitive too 519 if (entry != null) 520 headers.remove(entry.getKey()); // ensures correct case 521 return (T) this; 522 } 523 524 @Override 525 public Map<String, String> headers() { 526 LinkedHashMap<String, String> map = new LinkedHashMap<>(headers.size()); 527 for (Map.Entry<String, List<String>> entry : headers.entrySet()) { 528 String header = entry.getKey(); 529 List<String> values = entry.getValue(); 530 if (values.size() > 0) 531 map.put(header, values.get(0)); 532 } 533 return map; 534 } 535 536 @Override 537 public Map<String, List<String>> multiHeaders() { 538 return headers; 539 } 540 541 private List<String> getHeadersCaseInsensitive(String name) { 542 Validate.notNull(name); 543 544 for (Map.Entry<String, List<String>> entry : headers.entrySet()) { 545 if (name.equalsIgnoreCase(entry.getKey())) 546 return entry.getValue(); 547 } 548 549 return Collections.emptyList(); 550 } 551 552 private Map.@Nullable Entry<String, List<String>> scanHeaders(String name) { 553 String lc = lowerCase(name); 554 for (Map.Entry<String, List<String>> entry : headers.entrySet()) { 555 if (lowerCase(entry.getKey()).equals(lc)) 556 return entry; 557 } 558 return null; 559 } 560 561 @Override 562 public String cookie(String name) { 563 Validate.notEmptyParam(name, "name"); 564 return cookies.get(name); 565 } 566 567 @Override 568 public T cookie(String name, String value) { 569 Validate.notEmptyParam(name, "name"); 570 Validate.notNullParam(value, "value"); 571 cookies.put(name, value); 572 return (T) this; 573 } 574 575 @Override 576 public boolean hasCookie(String name) { 577 Validate.notEmptyParam(name, "name"); 578 return cookies.containsKey(name); 579 } 580 581 @Override 582 public T removeCookie(String name) { 583 Validate.notEmptyParam(name, "name"); 584 cookies.remove(name); 585 return (T) this; 586 } 587 588 @Override 589 public Map<String, String> cookies() { 590 return cookies; 591 } 592 } 593 594 public static class Request extends HttpConnection.Base<Connection.Request> implements Connection.Request { 595 static { 596 System.setProperty("sun.net.http.allowRestrictedHeaders", "true"); 597 // make sure that we can send Sec-Fetch-Site headers etc. 598 } 599 600 private @Nullable Proxy proxy; 601 private int timeoutMilliseconds; 602 private int maxBodySizeBytes; 603 private boolean followRedirects; 604 private final Collection<Connection.KeyVal> data; 605 private @Nullable String body = null; 606 private boolean ignoreHttpErrors = false; 607 private boolean ignoreContentType = false; 608 private Parser parser; 609 private boolean parserDefined = false; // called parser(...) vs initialized in ctor 610 private String postDataCharset = DataUtil.defaultCharsetName; 611 private @Nullable SSLSocketFactory sslSocketFactory; 612 private CookieManager cookieManager; 613 private @Nullable RequestAuthenticator authenticator; 614 private @Nullable Progress<Connection.Response> responseProgress; 615 616 private volatile boolean executing = false; 617 618 Request() { 619 super(); 620 timeoutMilliseconds = 30000; // 30 seconds 621 maxBodySizeBytes = 1024 * 1024 * 2; // 2MB 622 followRedirects = true; 623 data = new ArrayList<>(); 624 method = Method.GET; 625 addHeader("Accept-Encoding", "gzip"); 626 addHeader(USER_AGENT, DEFAULT_UA); 627 parser = Parser.htmlParser(); 628 cookieManager = new CookieManager(); // creates a default InMemoryCookieStore 629 } 630 631 Request(Request copy) { 632 super(copy); 633 proxy = copy.proxy; 634 postDataCharset = copy.postDataCharset; 635 timeoutMilliseconds = copy.timeoutMilliseconds; 636 maxBodySizeBytes = copy.maxBodySizeBytes; 637 followRedirects = copy.followRedirects; 638 data = new ArrayList<>(); // data not copied 639 //body not copied 640 ignoreHttpErrors = copy.ignoreHttpErrors; 641 ignoreContentType = copy.ignoreContentType; 642 parser = copy.parser.newInstance(); // parsers and their tree-builders maintain state, so need a fresh copy 643 parserDefined = copy.parserDefined; 644 sslSocketFactory = copy.sslSocketFactory; // these are all synchronized so safe to share 645 cookieManager = copy.cookieManager; 646 authenticator = copy.authenticator; 647 responseProgress = copy.responseProgress; 648 executing = false; 649 } 650 651 @Override 652 public Proxy proxy() { 653 return proxy; 654 } 655 656 @Override 657 public Request proxy(@Nullable Proxy proxy) { 658 this.proxy = proxy; 659 return this; 660 } 661 662 @Override 663 public Request proxy(String host, int port) { 664 this.proxy = new Proxy(Proxy.Type.HTTP, InetSocketAddress.createUnresolved(host, port)); 665 return this; 666 } 667 668 @Override 669 public int timeout() { 670 return timeoutMilliseconds; 671 } 672 673 @Override 674 public Request timeout(int millis) { 675 Validate.isTrue(millis >= 0, "Timeout milliseconds must be 0 (infinite) or greater"); 676 timeoutMilliseconds = millis; 677 return this; 678 } 679 680 @Override 681 public int maxBodySize() { 682 return maxBodySizeBytes; 683 } 684 685 @Override 686 public Connection.Request maxBodySize(int bytes) { 687 Validate.isTrue(bytes >= 0, "maxSize must be 0 (unlimited) or larger"); 688 maxBodySizeBytes = bytes; 689 return this; 690 } 691 692 @Override 693 public boolean followRedirects() { 694 return followRedirects; 695 } 696 697 @Override 698 public Connection.Request followRedirects(boolean followRedirects) { 699 this.followRedirects = followRedirects; 700 return this; 701 } 702 703 @Override 704 public boolean ignoreHttpErrors() { 705 return ignoreHttpErrors; 706 } 707 708 @Override 709 public SSLSocketFactory sslSocketFactory() { 710 return sslSocketFactory; 711 } 712 713 @Override 714 public void sslSocketFactory(SSLSocketFactory sslSocketFactory) { 715 this.sslSocketFactory = sslSocketFactory; 716 } 717 718 @Override 719 public Connection.Request ignoreHttpErrors(boolean ignoreHttpErrors) { 720 this.ignoreHttpErrors = ignoreHttpErrors; 721 return this; 722 } 723 724 @Override 725 public boolean ignoreContentType() { 726 return ignoreContentType; 727 } 728 729 @Override 730 public Connection.Request ignoreContentType(boolean ignoreContentType) { 731 this.ignoreContentType = ignoreContentType; 732 return this; 733 } 734 735 @Override 736 public Request data(Connection.KeyVal keyval) { 737 Validate.notNullParam(keyval, "keyval"); 738 data.add(keyval); 739 return this; 740 } 741 742 @Override 743 public Collection<Connection.KeyVal> data() { 744 return data; 745 } 746 747 @Override 748 public Connection.Request requestBody(@Nullable String body) { 749 this.body = body; 750 return this; 751 } 752 753 @Override 754 public String requestBody() { 755 return body; 756 } 757 758 @Override 759 public Request parser(Parser parser) { 760 this.parser = parser; 761 parserDefined = true; 762 return this; 763 } 764 765 @Override 766 public Parser parser() { 767 return parser; 768 } 769 770 @Override 771 public Connection.Request postDataCharset(String charset) { 772 Validate.notNullParam(charset, "charset"); 773 if (!Charset.isSupported(charset)) throw new IllegalCharsetNameException(charset); 774 this.postDataCharset = charset; 775 return this; 776 } 777 778 @Override 779 public String postDataCharset() { 780 return postDataCharset; 781 } 782 783 CookieManager cookieManager() { 784 return cookieManager; 785 } 786 787 @Override public Connection.Request auth(@Nullable RequestAuthenticator authenticator) { 788 this.authenticator = authenticator; 789 return this; 790 } 791 792 @Override @Nullable public RequestAuthenticator auth() { 793 return authenticator; 794 } 795 } 796 797 public static class Response extends HttpConnection.Base<Connection.Response> implements Connection.Response { 798 private static final int MAX_REDIRECTS = 20; 799 private static final String LOCATION = "Location"; 800 private final int statusCode; 801 private final String statusMessage; 802 private @Nullable ByteBuffer byteData; 803 private @Nullable ControllableInputStream bodyStream; 804 private @Nullable HttpURLConnection conn; 805 private @Nullable String charset; 806 private @Nullable final String contentType; 807 private boolean executed = false; 808 private boolean inputStreamRead = false; 809 private int numRedirects = 0; 810 private final HttpConnection.Request req; 811 812 /* 813 * Matches XML content types (like text/xml, image/svg+xml, application/xhtml+xml;charset=UTF8, etc) 814 */ 815 private static final Pattern xmlContentTypeRxp = Pattern.compile("(\\w+)/\\w*\\+?xml.*"); 816 817 /** 818 <b>Internal only! </b>Creates a dummy HttpConnection.Response, useful for testing. All actual responses 819 are created from the HttpURLConnection and fields defined. 820 */ 821 Response() { 822 super(); 823 statusCode = 400; 824 statusMessage = "Request not made"; 825 req = new Request(); 826 contentType = null; 827 } 828 829 static Response execute(HttpConnection.Request req) throws IOException { 830 return execute(req, null); 831 } 832 833 static Response execute(HttpConnection.Request req, @Nullable Response previousResponse) throws IOException { 834 synchronized (req) { 835 Validate.isFalse(req.executing, "Multiple threads were detected trying to execute the same request concurrently. Make sure to use Connection#newRequest() and do not share an executing request between threads."); 836 req.executing = true; 837 } 838 Validate.notNullParam(req, "req"); 839 URL url = req.url(); 840 Validate.notNull(url, "URL must be specified to connect"); 841 String protocol = url.getProtocol(); 842 if (!protocol.equals("http") && !protocol.equals("https")) 843 throw new MalformedURLException("Only http & https protocols supported"); 844 final boolean methodHasBody = req.method().hasBody(); 845 final boolean hasRequestBody = req.requestBody() != null; 846 if (!methodHasBody) 847 Validate.isFalse(hasRequestBody, "Cannot set a request body for HTTP method " + req.method()); 848 849 // set up the request for execution 850 String mimeBoundary = null; 851 if (req.data().size() > 0 && (!methodHasBody || hasRequestBody)) 852 serialiseRequestUrl(req); 853 else if (methodHasBody) 854 mimeBoundary = setOutputContentType(req); 855 856 long startTime = System.nanoTime(); 857 HttpURLConnection conn = createConnection(req); 858 Response res = null; 859 try { 860 conn.connect(); 861 if (conn.getDoOutput()) { 862 OutputStream out = conn.getOutputStream(); 863 try { writePost(req, out, mimeBoundary); } 864 catch (IOException e) { conn.disconnect(); throw e; } 865 finally { out.close(); } 866 } 867 868 int status = conn.getResponseCode(); 869 res = new Response(conn, req, previousResponse); 870 871 // redirect if there's a location header (from 3xx, or 201 etc) 872 if (res.hasHeader(LOCATION) && req.followRedirects()) { 873 if (status != HTTP_TEMP_REDIR) { 874 req.method(Method.GET); // always redirect with a get. any data param from original req are dropped. 875 req.data().clear(); 876 req.requestBody(null); 877 req.removeHeader(CONTENT_TYPE); 878 } 879 880 String location = res.header(LOCATION); 881 Validate.notNull(location); 882 if (location.startsWith("http:/") && location.charAt(6) != '/') // fix broken Location: http:/temp/AAG_New/en/index.php 883 location = location.substring(6); 884 URL redir = StringUtil.resolve(req.url(), location); 885 req.url(redir); 886 887 req.executing = false; 888 return execute(req, res); 889 } 890 if ((status < 200 || status >= 400) && !req.ignoreHttpErrors()) 891 throw new HttpStatusException("HTTP error fetching URL", status, req.url().toString()); 892 893 // check that we can handle the returned content type; if not, abort before fetching it 894 String contentType = res.contentType(); 895 if (contentType != null 896 && !req.ignoreContentType() 897 && !contentType.startsWith("text/") 898 && !xmlContentTypeRxp.matcher(contentType).matches() 899 ) 900 throw new UnsupportedMimeTypeException("Unhandled content type. Must be text/*, */xml, or */*+xml", 901 contentType, req.url().toString()); 902 903 // switch to the XML parser if content type is xml and not parser not explicitly set 904 if (contentType != null && xmlContentTypeRxp.matcher(contentType).matches()) { 905 if (!req.parserDefined) req.parser(Parser.xmlParser()); 906 } 907 908 res.charset = DataUtil.getCharsetFromContentType(res.contentType); // may be null, readInputStream deals with it 909 if (conn.getContentLength() != 0 && req.method() != HEAD) { // -1 means unknown, chunked. sun throws an IO exception on 500 response with no content when trying to read body 910 InputStream stream = conn.getErrorStream() != null ? conn.getErrorStream() : conn.getInputStream(); 911 if (res.hasHeaderWithValue(CONTENT_ENCODING, "gzip")) 912 stream = new GZIPInputStream(stream); 913 else if (res.hasHeaderWithValue(CONTENT_ENCODING, "deflate")) 914 stream = new InflaterInputStream(stream, new Inflater(true)); 915 916 res.bodyStream = ControllableInputStream.wrap( 917 stream, DefaultBufferSize, req.maxBodySize()) 918 .timeout(startTime, req.timeout()); 919 920 if (req.responseProgress != null) // set response progress listener 921 res.bodyStream.onProgress(conn.getContentLength(), req.responseProgress, res); 922 } else { 923 res.byteData = DataUtil.emptyByteBuffer(); 924 } 925 } catch (IOException e) { 926 if (res != null) res.safeClose(); // will be non-null if got to conn 927 throw e; 928 } finally { 929 req.executing = false; 930 931 // detach any thread local auth delegate 932 if (req.authenticator != null) 933 AuthenticationHandler.handler.remove(); 934 } 935 936 res.executed = true; 937 return res; 938 } 939 940 @Override 941 public int statusCode() { 942 return statusCode; 943 } 944 945 @Override 946 public String statusMessage() { 947 return statusMessage; 948 } 949 950 @Override 951 public String charset() { 952 return charset; 953 } 954 955 @Override 956 public Response charset(String charset) { 957 this.charset = charset; 958 return this; 959 } 960 961 @Override 962 public String contentType() { 963 return contentType; 964 } 965 966 /** Called from parse() or streamParser(), validates and prepares the input stream, and aligns common settings. */ 967 private ControllableInputStream prepareParse() { 968 Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before parsing response"); 969 ControllableInputStream stream = bodyStream; 970 if (byteData != null) { // bytes have been read in to the buffer, parse that 971 ByteArrayInputStream bytes = new ByteArrayInputStream(byteData.array(), 0, byteData.limit()); 972 stream = ControllableInputStream.wrap(bytes, 0); // no max 973 inputStreamRead = false; // ok to reparse if in bytes 974 } 975 Validate.isFalse(inputStreamRead, "Input stream already read and parsed, cannot re-read."); 976 Validate.notNull(stream); 977 inputStreamRead = true; 978 return stream; 979 } 980 981 @Override public Document parse() throws IOException { 982 ControllableInputStream stream = prepareParse(); 983 Document doc = DataUtil.parseInputStream(stream, charset, url.toExternalForm(), req.parser()); 984 doc.connection(new HttpConnection(req, this)); // because we're static, don't have the connection obj. // todo - maybe hold in the req? 985 charset = doc.outputSettings().charset().name(); // update charset from meta-equiv, possibly 986 safeClose(); 987 return doc; 988 } 989 990 @Override public StreamParser streamParser() throws IOException { 991 ControllableInputStream stream = prepareParse(); 992 String baseUri = url.toExternalForm(); 993 DataUtil.CharsetDoc charsetDoc = DataUtil.detectCharset(stream, charset, baseUri, req.parser()); 994 // note that there may be a document in CharsetDoc as a result of scanning meta-data -- but as requires a stream parse, it is not used here. todo - revisit. 995 996 // set up the stream parser and rig this connection up to the parsed doc: 997 StreamParser streamer = new StreamParser(req.parser()); 998 BufferedReader reader = new BufferedReader(new InputStreamReader(stream, charsetDoc.charset)); 999 streamer.parse(reader, baseUri); // initializes the parse and the document, but does not step() it 1000 streamer.document().connection(new HttpConnection(req, this)); 1001 charset = charsetDoc.charset.name(); 1002 1003 // we don't safeClose() as in parse(); caller must close streamParser to close InputStream stream 1004 return streamer; 1005 } 1006 1007 private void prepareByteData() { 1008 Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body"); 1009 if (bodyStream != null && byteData == null) { 1010 Validate.isFalse(inputStreamRead, "Request has already been read (with .parse())"); 1011 try { 1012 byteData = DataUtil.readToByteBuffer(bodyStream, req.maxBodySize()); 1013 } catch (IOException e) { 1014 throw new UncheckedIOException(e); 1015 } finally { 1016 inputStreamRead = true; 1017 safeClose(); 1018 } 1019 } 1020 } 1021 1022 @Override 1023 public String body() { 1024 prepareByteData(); 1025 Validate.notNull(byteData); 1026 // charset gets set from header on execute, and from meta-equiv on parse. parse may not have happened yet 1027 String body = (charset == null ? UTF_8 : Charset.forName(charset)) 1028 .decode(byteData).toString(); 1029 ((Buffer)byteData).rewind(); // cast to avoid covariant return type change in jdk9 1030 return body; 1031 } 1032 1033 @Override 1034 public byte[] bodyAsBytes() { 1035 prepareByteData(); 1036 Validate.notNull(byteData); 1037 Validate.isTrue(byteData.hasArray()); // we made it, so it should 1038 1039 byte[] array = byteData.array(); 1040 int offset = byteData.arrayOffset(); 1041 int length = byteData.limit(); 1042 1043 if (offset == 0 && length == array.length) { // exact, just return it 1044 return array; 1045 } else { // trim to size 1046 byte[] exactArray = new byte[length]; 1047 System.arraycopy(array, offset, exactArray, 0, length); 1048 return exactArray; 1049 } 1050 } 1051 1052 @Override 1053 public Connection.Response bufferUp() { 1054 prepareByteData(); 1055 return this; 1056 } 1057 1058 @Override 1059 public BufferedInputStream bodyStream() { 1060 Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body"); 1061 1062 // if we have read to bytes (via buffer up), return those as a stream. 1063 if (byteData != null) { 1064 return new BufferedInputStream( 1065 new ByteArrayInputStream(byteData.array(), 0, byteData.limit()), 1066 DefaultBufferSize); 1067 } 1068 1069 Validate.isFalse(inputStreamRead, "Request has already been read"); 1070 Validate.notNull(bodyStream); 1071 inputStreamRead = true; 1072 return bodyStream.inputStream(); 1073 } 1074 1075 // set up connection defaults, and details from request 1076 private static HttpURLConnection createConnection(HttpConnection.Request req) throws IOException { 1077 Proxy proxy = req.proxy(); 1078 final HttpURLConnection conn = (HttpURLConnection) ( 1079 proxy == null ? 1080 req.url().openConnection() : 1081 req.url().openConnection(proxy) 1082 ); 1083 1084 conn.setRequestMethod(req.method().name()); 1085 conn.setInstanceFollowRedirects(false); // don't rely on native redirection support 1086 conn.setConnectTimeout(req.timeout()); 1087 conn.setReadTimeout(req.timeout() / 2); // gets reduced after connection is made and status is read 1088 1089 if (req.sslSocketFactory() != null && conn instanceof HttpsURLConnection) 1090 ((HttpsURLConnection) conn).setSSLSocketFactory(req.sslSocketFactory()); 1091 if (req.authenticator != null) 1092 AuthenticationHandler.handler.enable(req.authenticator, conn); // removed in finally 1093 if (req.method().hasBody()) 1094 conn.setDoOutput(true); 1095 CookieUtil.applyCookiesToRequest(req, conn); // from the Request key/val cookies and the Cookie Store 1096 for (Map.Entry<String, List<String>> header : req.multiHeaders().entrySet()) { 1097 for (String value : header.getValue()) { 1098 conn.addRequestProperty(header.getKey(), value); 1099 } 1100 } 1101 return conn; 1102 } 1103 1104 /** 1105 * Call on completion of stream read, to close the body (or error) stream. The connection.disconnect allows 1106 * keep-alives to work (as the underlying connection is actually held open, despite the name). 1107 */ 1108 private void safeClose() { 1109 if (bodyStream != null) { 1110 try { 1111 bodyStream.close(); 1112 } catch (IOException e) { 1113 // no-op 1114 } finally { 1115 bodyStream = null; 1116 } 1117 } 1118 if (conn != null) { 1119 conn.disconnect(); 1120 conn = null; 1121 } 1122 } 1123 1124 // set up url, method, header, cookies 1125 private Response(HttpURLConnection conn, HttpConnection.Request request, HttpConnection.@Nullable Response previousResponse) throws IOException { 1126 this.conn = conn; 1127 this.req = request; 1128 method = Method.valueOf(conn.getRequestMethod()); 1129 url = conn.getURL(); 1130 statusCode = conn.getResponseCode(); 1131 statusMessage = conn.getResponseMessage(); 1132 contentType = conn.getContentType(); 1133 1134 Map<String, List<String>> resHeaders = createHeaderMap(conn); 1135 processResponseHeaders(resHeaders); // includes cookie key/val read during header scan 1136 CookieUtil.storeCookies(req, this, url, resHeaders); // add set cookies to cookie store 1137 1138 if (previousResponse != null) { // was redirected 1139 // map previous response cookies into this response cookies() object 1140 for (Map.Entry<String, String> prevCookie : previousResponse.cookies().entrySet()) { 1141 if (!hasCookie(prevCookie.getKey())) 1142 cookie(prevCookie.getKey(), prevCookie.getValue()); 1143 } 1144 previousResponse.safeClose(); 1145 1146 // enforce too many redirects: 1147 numRedirects = previousResponse.numRedirects + 1; 1148 if (numRedirects >= MAX_REDIRECTS) 1149 throw new IOException(String.format("Too many redirects occurred trying to load URL %s", previousResponse.url())); 1150 } 1151 } 1152 1153 private static LinkedHashMap<String, List<String>> createHeaderMap(HttpURLConnection conn) { 1154 // the default sun impl of conn.getHeaderFields() returns header values out of order 1155 final LinkedHashMap<String, List<String>> headers = new LinkedHashMap<>(); 1156 int i = 0; 1157 while (true) { 1158 final String key = conn.getHeaderFieldKey(i); 1159 final String val = conn.getHeaderField(i); 1160 if (key == null && val == null) 1161 break; 1162 i++; 1163 if (key == null || val == null) 1164 continue; // skip http1.1 line 1165 1166 final List<String> vals = headers.computeIfAbsent(key, Functions.listFunction()); 1167 vals.add(val); 1168 } 1169 return headers; 1170 } 1171 1172 void processResponseHeaders(Map<String, List<String>> resHeaders) { 1173 for (Map.Entry<String, List<String>> entry : resHeaders.entrySet()) { 1174 String name = entry.getKey(); 1175 if (name == null) 1176 continue; // http/1.1 line 1177 1178 List<String> values = entry.getValue(); 1179 for (String value : values) { 1180 addHeader(name, fixHeaderEncoding(value)); 1181 } 1182 } 1183 } 1184 1185 /** 1186 Servers may encode response headers in UTF-8 instead of RFC defined 8859. This method attempts to detect that 1187 and re-decode the string as UTF-8. 1188 * @param val a header value string that may have been incorrectly decoded as 8859. 1189 * @return a potentially re-decoded string. 1190 */ 1191 @Nullable 1192 private static String fixHeaderEncoding(@Nullable String val) { 1193 if (val == null) return val; 1194 byte[] bytes = val.getBytes(ISO_8859_1); 1195 if (looksLikeUtf8(bytes)) 1196 return new String(bytes, UTF_8); 1197 else 1198 return val; 1199 } 1200 1201 private static boolean looksLikeUtf8(byte[] input) { 1202 int i = 0; 1203 // BOM: 1204 if (input.length >= 3 1205 && (input[0] & 0xFF) == 0xEF 1206 && (input[1] & 0xFF) == 0xBB 1207 && (input[2] & 0xFF) == 0xBF) { 1208 i = 3; 1209 } 1210 1211 int end; 1212 boolean foundNonAscii = false; 1213 for (int j = input.length; i < j; ++i) { 1214 int o = input[i]; 1215 if ((o & 0x80) == 0) { 1216 continue; // ASCII 1217 } 1218 foundNonAscii = true; 1219 1220 // UTF-8 leading: 1221 if ((o & 0xE0) == 0xC0) { 1222 end = i + 1; 1223 } else if ((o & 0xF0) == 0xE0) { 1224 end = i + 2; 1225 } else if ((o & 0xF8) == 0xF0) { 1226 end = i + 3; 1227 } else { 1228 return false; 1229 } 1230 1231 if (end >= input.length) 1232 return false; 1233 1234 while (i < end) { 1235 i++; 1236 o = input[i]; 1237 if ((o & 0xC0) != 0x80) { 1238 return false; 1239 } 1240 } 1241 } 1242 return foundNonAscii; 1243 } 1244 1245 private @Nullable static String setOutputContentType(final Connection.Request req) { 1246 final String contentType = req.header(CONTENT_TYPE); 1247 String bound = null; 1248 if (contentType != null) { 1249 // no-op; don't add content type as already set (e.g. for requestBody()) 1250 // todo - if content type already set, we could add charset 1251 1252 // if user has set content type to multipart/form-data, auto add boundary. 1253 if(contentType.contains(MULTIPART_FORM_DATA) && !contentType.contains("boundary")) { 1254 bound = DataUtil.mimeBoundary(); 1255 req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound); 1256 } 1257 1258 } 1259 else if (needsMultipart(req)) { 1260 bound = DataUtil.mimeBoundary(); 1261 req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound); 1262 } else { 1263 req.header(CONTENT_TYPE, FORM_URL_ENCODED + "; charset=" + req.postDataCharset()); 1264 } 1265 return bound; 1266 } 1267 1268 private static void writePost(final Connection.Request req, final OutputStream outputStream, @Nullable final String boundary) throws IOException { 1269 final Collection<Connection.KeyVal> data = req.data(); 1270 final BufferedWriter w = new BufferedWriter(new OutputStreamWriter(outputStream, Charset.forName(req.postDataCharset()))); 1271 1272 if (boundary != null) { 1273 // boundary will be set if we're in multipart mode 1274 for (Connection.KeyVal keyVal : data) { 1275 w.write("--"); 1276 w.write(boundary); 1277 w.write("\r\n"); 1278 w.write("Content-Disposition: form-data; name=\""); 1279 w.write(encodeMimeName(keyVal.key())); // encodes " to %22 1280 w.write("\""); 1281 final InputStream input = keyVal.inputStream(); 1282 if (input != null) { 1283 w.write("; filename=\""); 1284 w.write(encodeMimeName(keyVal.value())); 1285 w.write("\"\r\nContent-Type: "); 1286 String contentType = keyVal.contentType(); 1287 w.write(contentType != null ? contentType : DefaultUploadType); 1288 w.write("\r\n\r\n"); 1289 w.flush(); // flush 1290 DataUtil.crossStreams(input, outputStream); 1291 outputStream.flush(); 1292 } else { 1293 w.write("\r\n\r\n"); 1294 w.write(keyVal.value()); 1295 } 1296 w.write("\r\n"); 1297 } 1298 w.write("--"); 1299 w.write(boundary); 1300 w.write("--"); 1301 } else { 1302 String body = req.requestBody(); 1303 if (body != null) { 1304 // data will be in query string, we're sending a plaintext body 1305 w.write(body); 1306 } 1307 else { 1308 // regular form data (application/x-www-form-urlencoded) 1309 boolean first = true; 1310 for (Connection.KeyVal keyVal : data) { 1311 if (!first) 1312 w.append('&'); 1313 else 1314 first = false; 1315 1316 w.write(URLEncoder.encode(keyVal.key(), req.postDataCharset())); 1317 w.write('='); 1318 w.write(URLEncoder.encode(keyVal.value(), req.postDataCharset())); 1319 } 1320 } 1321 } 1322 w.close(); 1323 } 1324 1325 // for get url reqs, serialise the data map into the url 1326 private static void serialiseRequestUrl(Connection.Request req) throws IOException { 1327 UrlBuilder in = new UrlBuilder(req.url()); 1328 1329 for (Connection.KeyVal keyVal : req.data()) { 1330 Validate.isFalse(keyVal.hasInputStream(), "InputStream data not supported in URL query string."); 1331 in.appendKeyVal(keyVal); 1332 } 1333 req.url(in.build()); 1334 req.data().clear(); // moved into url as get params 1335 } 1336 } 1337 1338 private static boolean needsMultipart(Connection.Request req) { 1339 // multipart mode, for files. add the header if we see something with an inputstream, and return a non-null boundary 1340 for (Connection.KeyVal keyVal : req.data()) { 1341 if (keyVal.hasInputStream()) 1342 return true; 1343 } 1344 return false; 1345 } 1346 1347 public static class KeyVal implements Connection.KeyVal { 1348 private String key; 1349 private String value; 1350 private @Nullable InputStream stream; 1351 private @Nullable String contentType; 1352 1353 public static KeyVal create(String key, String value) { 1354 return new KeyVal(key, value); 1355 } 1356 1357 public static KeyVal create(String key, String filename, InputStream stream) { 1358 return new KeyVal(key, filename) 1359 .inputStream(stream); 1360 } 1361 1362 private KeyVal(String key, String value) { 1363 Validate.notEmptyParam(key, "key"); 1364 Validate.notNullParam(value, "value"); 1365 this.key = key; 1366 this.value = value; 1367 } 1368 1369 @Override 1370 public KeyVal key(String key) { 1371 Validate.notEmptyParam(key, "key"); 1372 this.key = key; 1373 return this; 1374 } 1375 1376 @Override 1377 public String key() { 1378 return key; 1379 } 1380 1381 @Override 1382 public KeyVal value(String value) { 1383 Validate.notNullParam(value, "value"); 1384 this.value = value; 1385 return this; 1386 } 1387 1388 @Override 1389 public String value() { 1390 return value; 1391 } 1392 1393 public KeyVal inputStream(InputStream inputStream) { 1394 Validate.notNullParam(value, "inputStream"); 1395 this.stream = inputStream; 1396 return this; 1397 } 1398 1399 @Override 1400 public InputStream inputStream() { 1401 return stream; 1402 } 1403 1404 @Override 1405 public boolean hasInputStream() { 1406 return stream != null; 1407 } 1408 1409 @Override 1410 public Connection.KeyVal contentType(String contentType) { 1411 Validate.notEmpty(contentType); 1412 this.contentType = contentType; 1413 return this; 1414 } 1415 1416 @Override 1417 public String contentType() { 1418 return contentType; 1419 } 1420 1421 @Override 1422 public String toString() { 1423 return key + "=" + value; 1424 } 1425 } 1426}