001package org.jsoup.helper; 002 003import org.jsoup.Connection; 004import org.jsoup.HttpStatusException; 005import org.jsoup.Progress; 006import org.jsoup.UnsupportedMimeTypeException; 007import org.jsoup.internal.ControllableInputStream; 008import org.jsoup.internal.StringUtil; 009import org.jsoup.nodes.Document; 010import org.jsoup.parser.Parser; 011import org.jsoup.parser.StreamParser; 012import org.jspecify.annotations.Nullable; 013 014import javax.net.ssl.SSLSocketFactory; 015import java.io.BufferedInputStream; 016import java.io.BufferedReader; 017import java.io.BufferedWriter; 018import java.io.ByteArrayInputStream; 019import java.io.IOException; 020import java.io.InputStream; 021import java.io.InputStreamReader; 022import java.io.OutputStream; 023import java.io.OutputStreamWriter; 024import java.io.UncheckedIOException; 025import java.net.CookieManager; 026import java.net.CookieStore; 027import java.net.InetSocketAddress; 028import java.net.MalformedURLException; 029import java.net.Proxy; 030import java.net.URL; 031import java.net.URLEncoder; 032import java.nio.Buffer; 033import java.nio.ByteBuffer; 034import java.nio.charset.Charset; 035import java.nio.charset.IllegalCharsetNameException; 036import java.nio.charset.StandardCharsets; 037import java.util.ArrayList; 038import java.util.Collection; 039import java.util.Collections; 040import java.util.LinkedHashMap; 041import java.util.List; 042import java.util.Map; 043import java.util.concurrent.locks.ReentrantLock; 044import java.util.regex.Pattern; 045import java.util.zip.GZIPInputStream; 046import java.util.zip.Inflater; 047import java.util.zip.InflaterInputStream; 048 049import static org.jsoup.Connection.Method.HEAD; 050import static org.jsoup.helper.DataUtil.UTF_8; 051import static org.jsoup.internal.Normalizer.lowerCase; 052import static org.jsoup.internal.SharedConstants.DefaultBufferSize; 053 054/** 055 * Implementation of {@link Connection}. 056 * @see org.jsoup.Jsoup#connect(String) 057 */ 058@SuppressWarnings("CharsetObjectCanBeUsed") 059public class HttpConnection implements Connection { 060 public static final String CONTENT_ENCODING = "Content-Encoding"; 061 /** 062 * Many users would get caught by not setting a user-agent and therefore getting different responses on their desktop 063 * vs in jsoup, which would otherwise default to {@code Java}. So by default, use a desktop UA. 064 */ 065 public static final String DEFAULT_UA = 066 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"; 067 private static final String USER_AGENT = "User-Agent"; 068 public static final String CONTENT_TYPE = "Content-Type"; 069 public static final String MULTIPART_FORM_DATA = "multipart/form-data"; 070 public static final String FORM_URL_ENCODED = "application/x-www-form-urlencoded"; 071 private static final int HTTP_TEMP_REDIR = 307; // http/1.1 temporary redirect, not in Java's set. 072 static final String DefaultUploadType = "application/octet-stream"; 073 private static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1"); 074 075 private HttpConnection.Request req; 076 private Connection.@Nullable Response res; 077 @Nullable Object client; // The HttpClient for this Connection, if via the HttpClientExecutor 078 @Nullable RequestAuthenticator lastAuth; // The previous Authenticator used by this Connection, if via the HttpClientExecutor 079 080 /** 081 Create a new Connection, with the request URL specified. 082 @param url the URL to fetch from 083 @return a new Connection object 084 */ 085 public static Connection connect(String url) { 086 Connection con = new HttpConnection(); 087 con.url(url); 088 return con; 089 } 090 091 /** 092 Create a new Connection, with the request URL specified. 093 @param url the URL to fetch from 094 @return a new Connection object 095 */ 096 public static Connection connect(URL url) { 097 Connection con = new HttpConnection(); 098 con.url(url); 099 return con; 100 } 101 102 /** 103 Create a new, empty HttpConnection. 104 */ 105 public HttpConnection() { 106 req = new Request(); 107 req.connection = this; 108 } 109 110 /** 111 Create a new Request by deep-copying an existing Request. Note that the data and body of the original are not 112 copied. All other settings (proxy, parser, cookies, etc) are copied. 113 @param copy the request to copy 114 */ 115 HttpConnection(Request copy) { 116 req = new Request(copy); 117 } 118 119 static String encodeMimeName(String val) { 120 return val.replace("\"", "%22"); 121 } 122 123 @Override 124 public Connection newRequest() { 125 // copy the prototype request for the different settings, cookie manager, etc 126 return new HttpConnection(req); 127 } 128 129 /** Create a new Connection that just wraps the provided Request and Response */ 130 private HttpConnection(Request req, Response res) { 131 this.req = req; 132 this.res = res; 133 } 134 135 @Override 136 public Connection url(URL url) { 137 req.url(url); 138 return this; 139 } 140 141 @Override 142 public Connection url(String url) { 143 Validate.notEmptyParam(url, "url"); 144 try { 145 req.url(new URL(url)); 146 } catch (MalformedURLException e) { 147 throw new IllegalArgumentException(String.format("The supplied URL, '%s', is malformed. Make sure it is an absolute URL, and starts with 'http://' or 'https://'. See https://jsoup.org/cookbook/extracting-data/working-with-urls", url), e); 148 } 149 return this; 150 } 151 152 @Override 153 public Connection proxy(@Nullable Proxy proxy) { 154 req.proxy(proxy); 155 return this; 156 } 157 158 @Override 159 public Connection proxy(String host, int port) { 160 req.proxy(host, port); 161 return this; 162 } 163 164 @Override 165 public Connection userAgent(String userAgent) { 166 Validate.notNullParam(userAgent, "userAgent"); 167 req.header(USER_AGENT, userAgent); 168 return this; 169 } 170 171 @Override 172 public Connection timeout(int millis) { 173 req.timeout(millis); 174 return this; 175 } 176 177 @Override 178 public Connection maxBodySize(int bytes) { 179 req.maxBodySize(bytes); 180 return this; 181 } 182 183 @Override 184 public Connection followRedirects(boolean followRedirects) { 185 req.followRedirects(followRedirects); 186 return this; 187 } 188 189 @Override 190 public Connection referrer(String referrer) { 191 Validate.notNullParam(referrer, "referrer"); 192 req.header("Referer", referrer); 193 return this; 194 } 195 196 @Override 197 public Connection method(Method method) { 198 req.method(method); 199 return this; 200 } 201 202 @Override 203 public Connection ignoreHttpErrors(boolean ignoreHttpErrors) { 204 req.ignoreHttpErrors(ignoreHttpErrors); 205 return this; 206 } 207 208 @Override 209 public Connection ignoreContentType(boolean ignoreContentType) { 210 req.ignoreContentType(ignoreContentType); 211 return this; 212 } 213 214 @Override 215 public Connection data(String key, String value) { 216 req.data(KeyVal.create(key, value)); 217 return this; 218 } 219 220 @Override 221 public Connection sslSocketFactory(SSLSocketFactory sslSocketFactory) { 222 req.sslSocketFactory(sslSocketFactory); 223 return this; 224 } 225 226 @Override 227 public Connection data(String key, String filename, InputStream inputStream) { 228 req.data(KeyVal.create(key, filename, inputStream)); 229 return this; 230 } 231 232 @Override 233 public Connection data(String key, String filename, InputStream inputStream, String contentType) { 234 req.data(KeyVal.create(key, filename, inputStream).contentType(contentType)); 235 return this; 236 } 237 238 @Override 239 public Connection data(Map<String, String> data) { 240 Validate.notNullParam(data, "data"); 241 for (Map.Entry<String, String> entry : data.entrySet()) { 242 req.data(KeyVal.create(entry.getKey(), entry.getValue())); 243 } 244 return this; 245 } 246 247 @Override 248 public Connection data(String... keyvals) { 249 Validate.notNullParam(keyvals, "keyvals"); 250 Validate.isTrue(keyvals.length %2 == 0, "Must supply an even number of key value pairs"); 251 for (int i = 0; i < keyvals.length; i += 2) { 252 String key = keyvals[i]; 253 String value = keyvals[i+1]; 254 Validate.notEmpty(key, "Data key must not be empty"); 255 Validate.notNull(value, "Data value must not be null"); 256 req.data(KeyVal.create(key, value)); 257 } 258 return this; 259 } 260 261 @Override 262 public Connection data(Collection<Connection.KeyVal> data) { 263 Validate.notNullParam(data, "data"); 264 for (Connection.KeyVal entry: data) { 265 req.data(entry); 266 } 267 return this; 268 } 269 270 @Override 271 public Connection.@Nullable KeyVal data(String key) { 272 Validate.notEmptyParam(key, "key"); 273 for (Connection.KeyVal keyVal : request().data()) { 274 if (keyVal.key().equals(key)) 275 return keyVal; 276 } 277 return null; 278 } 279 280 @Override 281 public Connection requestBody(String body) { 282 req.requestBody(body); 283 return this; 284 } 285 286 @Override 287 public Connection requestBodyStream(InputStream stream) { 288 req.requestBodyStream(stream); 289 return this; 290 } 291 292 @Override 293 public Connection header(String name, String value) { 294 req.header(name, value); 295 return this; 296 } 297 298 @Override 299 public Connection headers(Map<String,String> headers) { 300 Validate.notNullParam(headers, "headers"); 301 for (Map.Entry<String,String> entry : headers.entrySet()) { 302 req.header(entry.getKey(),entry.getValue()); 303 } 304 return this; 305 } 306 307 @Override 308 public Connection cookie(String name, String value) { 309 req.cookie(name, value); 310 return this; 311 } 312 313 @Override 314 public Connection cookies(Map<String, String> cookies) { 315 Validate.notNullParam(cookies, "cookies"); 316 for (Map.Entry<String, String> entry : cookies.entrySet()) { 317 req.cookie(entry.getKey(), entry.getValue()); 318 } 319 return this; 320 } 321 322 @Override 323 public Connection cookieStore(CookieStore cookieStore) { 324 // create a new cookie manager using the new store 325 req.cookieManager = new CookieManager(cookieStore, null); 326 return this; 327 } 328 329 @Override 330 public CookieStore cookieStore() { 331 return req.cookieManager.getCookieStore(); 332 } 333 334 @Override 335 public Connection parser(Parser parser) { 336 req.parser(parser); 337 return this; 338 } 339 340 @Override 341 public Document get() throws IOException { 342 req.method(Method.GET); 343 execute(); 344 Validate.notNull(res); 345 return res.parse(); 346 } 347 348 @Override 349 public Document post() throws IOException { 350 req.method(Method.POST); 351 execute(); 352 Validate.notNull(res); 353 return res.parse(); 354 } 355 356 @Override 357 public Connection.Response execute() throws IOException { 358 res = Response.execute(req); 359 return res; 360 } 361 362 @Override 363 public Connection.Request request() { 364 return req; 365 } 366 367 @Override 368 public Connection request(Connection.Request request) { 369 req = (HttpConnection.Request) request; // will throw a class-cast exception if the user has extended some but not all of Connection; that's desired 370 return this; 371 } 372 373 @Override 374 public Connection.Response response() { 375 if (res == null) { 376 throw new IllegalArgumentException("You must execute the request before getting a response."); 377 } 378 return res; 379 } 380 381 @Override 382 public Connection response(Connection.Response response) { 383 res = response; 384 return this; 385 } 386 387 @Override 388 public Connection postDataCharset(String charset) { 389 req.postDataCharset(charset); 390 return this; 391 } 392 393 @Override public Connection auth(@Nullable RequestAuthenticator authenticator) { 394 req.auth(authenticator); 395 return this; 396 } 397 398 @Override public Connection onResponseProgress(Progress<Connection.Response> handler) { 399 req.responseProgress = handler; 400 return this; 401 } 402 403 @SuppressWarnings("unchecked") 404 private static abstract class Base<T extends Connection.Base<T>> implements Connection.Base<T> { 405 private static final URL UnsetUrl; // only used if you created a new Request() 406 static { 407 try { 408 UnsetUrl = new URL("http://undefined/"); 409 } catch (MalformedURLException e) { 410 throw new IllegalStateException(e); 411 } 412 } 413 414 URL url = UnsetUrl; 415 Method method = Method.GET; 416 Map<String, List<String>> headers; 417 Map<String, String> cookies; 418 419 private Base() { 420 headers = new LinkedHashMap<>(); 421 cookies = new LinkedHashMap<>(); 422 } 423 424 private Base(Base<T> copy) { 425 url = copy.url; // unmodifiable object 426 method = copy.method; 427 headers = new LinkedHashMap<>(); 428 for (Map.Entry<String, List<String>> entry : copy.headers.entrySet()) { 429 headers.put(entry.getKey(), new ArrayList<>(entry.getValue())); 430 } 431 cookies = new LinkedHashMap<>(); cookies.putAll(copy.cookies); // just holds strings 432 } 433 434 @Override 435 public URL url() { 436 if (url == UnsetUrl) 437 throw new IllegalArgumentException("URL not set. Make sure to call #url(...) before executing the request."); 438 return url; 439 } 440 441 @Override 442 public T url(URL url) { 443 Validate.notNullParam(url, "url"); 444 this.url = new UrlBuilder(url).build(); 445 return (T) this; 446 } 447 448 @Override 449 public Method method() { 450 return method; 451 } 452 453 @Override 454 public T method(Method method) { 455 Validate.notNullParam(method, "method"); 456 this.method = method; 457 return (T) this; 458 } 459 460 @Override @Nullable 461 public String header(String name) { 462 Validate.notNullParam(name, "name"); 463 List<String> vals = getHeadersCaseInsensitive(name); 464 if (!vals.isEmpty()) { 465 // https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 466 return StringUtil.join(vals, ", "); 467 } 468 469 return null; 470 } 471 472 @Override 473 public T addHeader(String name, @Nullable String value) { 474 Validate.notEmptyParam(name, "name"); 475 value = value == null ? "" : value; 476 477 List<String> values = headers(name); 478 if (values.isEmpty()) { 479 values = new ArrayList<>(); 480 headers.put(name, values); 481 } 482 values.add(value); 483 484 return (T) this; 485 } 486 487 @Override 488 public List<String> headers(String name) { 489 Validate.notEmptyParam(name, "name"); 490 return getHeadersCaseInsensitive(name); 491 } 492 493 @Override 494 public T header(String name, String value) { 495 Validate.notEmptyParam(name, "name"); 496 removeHeader(name); // ensures we don't get an "accept-encoding" and an "Accept-Encoding" 497 addHeader(name, value); 498 return (T) this; 499 } 500 501 @Override 502 public boolean hasHeader(String name) { 503 Validate.notEmptyParam(name, "name"); 504 return !getHeadersCaseInsensitive(name).isEmpty(); 505 } 506 507 /** 508 * Test if the request has a header with this value (case-insensitive). 509 */ 510 @Override 511 public boolean hasHeaderWithValue(String name, String value) { 512 Validate.notEmpty(name); 513 Validate.notEmpty(value); 514 List<String> values = headers(name); 515 for (String candidate : values) { 516 if (value.equalsIgnoreCase(candidate)) 517 return true; 518 } 519 return false; 520 } 521 522 @Override 523 public T removeHeader(String name) { 524 Validate.notEmptyParam(name, "name"); 525 Map.Entry<String, List<String>> entry = scanHeaders(name); // remove is case-insensitive too 526 if (entry != null) 527 headers.remove(entry.getKey()); // ensures correct case 528 return (T) this; 529 } 530 531 @Override 532 public Map<String, String> headers() { 533 LinkedHashMap<String, String> map = new LinkedHashMap<>(headers.size()); 534 for (Map.Entry<String, List<String>> entry : headers.entrySet()) { 535 String header = entry.getKey(); 536 List<String> values = entry.getValue(); 537 if (!values.isEmpty()) 538 map.put(header, values.get(0)); 539 } 540 return map; 541 } 542 543 @Override 544 public Map<String, List<String>> multiHeaders() { 545 return headers; 546 } 547 548 private List<String> getHeadersCaseInsensitive(String name) { 549 Validate.notNull(name); 550 551 for (Map.Entry<String, List<String>> entry : headers.entrySet()) { 552 if (name.equalsIgnoreCase(entry.getKey())) 553 return entry.getValue(); 554 } 555 556 return Collections.emptyList(); 557 } 558 559 private Map.@Nullable Entry<String, List<String>> scanHeaders(String name) { 560 String lc = lowerCase(name); 561 for (Map.Entry<String, List<String>> entry : headers.entrySet()) { 562 if (lowerCase(entry.getKey()).equals(lc)) 563 return entry; 564 } 565 return null; 566 } 567 568 @Override 569 public String cookie(String name) { 570 Validate.notEmptyParam(name, "name"); 571 return cookies.get(name); 572 } 573 574 @Override 575 public T cookie(String name, String value) { 576 Validate.notEmptyParam(name, "name"); 577 Validate.notNullParam(value, "value"); 578 cookies.put(name, value); 579 return (T) this; 580 } 581 582 @Override 583 public boolean hasCookie(String name) { 584 Validate.notEmptyParam(name, "name"); 585 return cookies.containsKey(name); 586 } 587 588 @Override 589 public T removeCookie(String name) { 590 Validate.notEmptyParam(name, "name"); 591 cookies.remove(name); 592 return (T) this; 593 } 594 595 @Override 596 public Map<String, String> cookies() { 597 return cookies; 598 } 599 } 600 601 public static class Request extends HttpConnection.Base<Connection.Request> implements Connection.Request { 602 static { 603 System.setProperty("sun.net.http.allowRestrictedHeaders", "true"); 604 // make sure that we can send Sec-Fetch-Site headers etc. 605 } 606 607 HttpConnection connection; 608 private @Nullable Proxy proxy; 609 private int timeoutMilliseconds; 610 private int maxBodySizeBytes; 611 private boolean followRedirects; 612 private final Collection<Connection.KeyVal> data; 613 private @Nullable Object body = null; // String or InputStream 614 @Nullable String mimeBoundary; 615 private boolean ignoreHttpErrors = false; 616 private boolean ignoreContentType = false; 617 private Parser parser; 618 private boolean parserDefined = false; // called parser(...) vs initialized in ctor 619 private String postDataCharset = DataUtil.defaultCharsetName; 620 private @Nullable SSLSocketFactory sslSocketFactory; 621 private CookieManager cookieManager; 622 @Nullable RequestAuthenticator authenticator; 623 private @Nullable Progress<Connection.Response> responseProgress; 624 625 private final ReentrantLock executing = new ReentrantLock(); // detects and warns if same request used concurrently 626 627 Request() { 628 super(); 629 timeoutMilliseconds = 30000; // 30 seconds 630 maxBodySizeBytes = 1024 * 1024 * 2; // 2MB 631 followRedirects = true; 632 data = new ArrayList<>(); 633 method = Method.GET; 634 addHeader("Accept-Encoding", "gzip"); 635 addHeader(USER_AGENT, DEFAULT_UA); 636 parser = Parser.htmlParser(); 637 cookieManager = new CookieManager(); // creates a default InMemoryCookieStore 638 } 639 640 Request(Request copy) { 641 super(copy); 642 connection = copy.connection; 643 proxy = copy.proxy; 644 postDataCharset = copy.postDataCharset; 645 timeoutMilliseconds = copy.timeoutMilliseconds; 646 maxBodySizeBytes = copy.maxBodySizeBytes; 647 followRedirects = copy.followRedirects; 648 data = new ArrayList<>(); // data not copied 649 //body not copied 650 ignoreHttpErrors = copy.ignoreHttpErrors; 651 ignoreContentType = copy.ignoreContentType; 652 parser = copy.parser.newInstance(); // parsers and their tree-builders maintain state, so need a fresh copy 653 parserDefined = copy.parserDefined; 654 sslSocketFactory = copy.sslSocketFactory; // these are all synchronized so safe to share 655 cookieManager = copy.cookieManager; 656 authenticator = copy.authenticator; 657 responseProgress = copy.responseProgress; 658 } 659 660 @Override @Nullable 661 public Proxy proxy() { 662 return proxy; 663 } 664 665 @Override 666 public Request proxy(@Nullable Proxy proxy) { 667 this.proxy = proxy; 668 return this; 669 } 670 671 @Override 672 public Request proxy(String host, int port) { 673 this.proxy = new Proxy(Proxy.Type.HTTP, InetSocketAddress.createUnresolved(host, port)); 674 return this; 675 } 676 677 @Override 678 public int timeout() { 679 return timeoutMilliseconds; 680 } 681 682 @Override 683 public Request timeout(int millis) { 684 Validate.isTrue(millis >= 0, "Timeout milliseconds must be 0 (infinite) or greater"); 685 timeoutMilliseconds = millis; 686 return this; 687 } 688 689 @Override 690 public int maxBodySize() { 691 return maxBodySizeBytes; 692 } 693 694 @Override 695 public Connection.Request maxBodySize(int bytes) { 696 Validate.isTrue(bytes >= 0, "maxSize must be 0 (unlimited) or larger"); 697 maxBodySizeBytes = bytes; 698 return this; 699 } 700 701 @Override 702 public boolean followRedirects() { 703 return followRedirects; 704 } 705 706 @Override 707 public Connection.Request followRedirects(boolean followRedirects) { 708 this.followRedirects = followRedirects; 709 return this; 710 } 711 712 @Override 713 public boolean ignoreHttpErrors() { 714 return ignoreHttpErrors; 715 } 716 717 @Override @Nullable 718 public SSLSocketFactory sslSocketFactory() { 719 return sslSocketFactory; 720 } 721 722 @Override 723 public void sslSocketFactory(SSLSocketFactory sslSocketFactory) { 724 this.sslSocketFactory = sslSocketFactory; 725 } 726 727 @Override 728 public Connection.Request ignoreHttpErrors(boolean ignoreHttpErrors) { 729 this.ignoreHttpErrors = ignoreHttpErrors; 730 return this; 731 } 732 733 @Override 734 public boolean ignoreContentType() { 735 return ignoreContentType; 736 } 737 738 @Override 739 public Connection.Request ignoreContentType(boolean ignoreContentType) { 740 this.ignoreContentType = ignoreContentType; 741 return this; 742 } 743 744 @Override 745 public Request data(Connection.KeyVal keyval) { 746 Validate.notNullParam(keyval, "keyval"); 747 data.add(keyval); 748 return this; 749 } 750 751 @Override 752 public Collection<Connection.KeyVal> data() { 753 return data; 754 } 755 756 @Override 757 public Connection.Request requestBody(@Nullable String body) { 758 this.body = body; 759 return this; 760 } 761 762 @Override @Nullable 763 public String requestBody() { 764 return body instanceof String ? (String) body : null; 765 } 766 767 @Override 768 public Connection.Request requestBodyStream(InputStream stream) { 769 body = stream; 770 return this; 771 } 772 773 @Override 774 public Request parser(Parser parser) { 775 this.parser = parser; 776 parserDefined = true; 777 return this; 778 } 779 780 @Override 781 public Parser parser() { 782 return parser; 783 } 784 785 @Override 786 public Connection.Request postDataCharset(String charset) { 787 Validate.notNullParam(charset, "charset"); 788 if (!Charset.isSupported(charset)) throw new IllegalCharsetNameException(charset); 789 this.postDataCharset = charset; 790 return this; 791 } 792 793 @Override 794 public String postDataCharset() { 795 return postDataCharset; 796 } 797 798 CookieManager cookieManager() { 799 return cookieManager; 800 } 801 802 @Override public Connection.Request auth(@Nullable RequestAuthenticator authenticator) { 803 this.authenticator = authenticator; 804 return this; 805 } 806 807 @Override @Nullable public RequestAuthenticator auth() { 808 return authenticator; 809 } 810 } 811 812 public static class Response extends HttpConnection.Base<Connection.Response> implements Connection.Response { 813 private static final int MAX_REDIRECTS = 20; 814 private static final String LOCATION = "Location"; 815 int statusCode; 816 @Nullable String statusMessage; 817 private @Nullable ByteBuffer byteData; 818 private @Nullable ControllableInputStream bodyStream; 819 @Nullable RequestExecutor executor; 820 private @Nullable String charset; 821 @Nullable String contentType; 822 int contentLength; 823 private boolean executed = false; 824 private boolean inputStreamRead = false; 825 private int numRedirects = 0; 826 private final HttpConnection.Request req; 827 828 /* 829 * Matches XML content types (like text/xml, image/svg+xml, application/xhtml+xml;charset=UTF8, etc) 830 */ 831 private static final Pattern xmlContentTypeRxp = Pattern.compile("(\\w+)/\\w*\\+?xml.*"); 832 833 /** 834 <b>Internal only! </b>Creates a dummy HttpConnection.Response, useful for testing. All actual responses 835 are created from the HttpURLConnection and fields defined. 836 */ 837 Response() { 838 super(); 839 statusCode = 400; 840 statusMessage = "Request not made"; 841 req = new Request(); 842 contentType = null; 843 } 844 845 static Response execute(HttpConnection.Request req) throws IOException { 846 return execute(req, null); 847 } 848 849 static Response execute(HttpConnection.Request req, @Nullable Response prevRes) throws IOException { 850 Validate.isTrue(req.executing.tryLock(), "Multiple threads were detected trying to execute the same request concurrently. Make sure to use Connection#newRequest() and do not share an executing request between threads."); 851 Validate.notNullParam(req, "req"); 852 URL url = req.url(); 853 Validate.notNull(url, "URL must be specified to connect"); 854 String protocol = url.getProtocol(); 855 if (!protocol.equals("http") && !protocol.equals("https")) 856 throw new MalformedURLException("Only http & https protocols supported"); 857 final boolean supportsBody = req.method().hasBody(); 858 final boolean hasBody = req.body != null; 859 if (!supportsBody) 860 Validate.isFalse(hasBody, "Cannot set a request body for HTTP method " + req.method()); 861 862 // set up the request for execution 863 if (!req.data().isEmpty() && (!supportsBody || hasBody)) 864 serialiseRequestUrl(req); 865 else if (supportsBody) 866 setOutputContentType(req); 867 868 long startTime = System.nanoTime(); 869 RequestExecutor executor = RequestDispatch.get(req, prevRes); 870 Response res = null; 871 try { 872 res = executor.execute(); 873 874 // redirect if there's a location header (from 3xx, or 201 etc) 875 if (res.hasHeader(LOCATION) && req.followRedirects()) { 876 if (res.statusCode != HTTP_TEMP_REDIR) { 877 req.method(Method.GET); // always redirect with a get. any data param from original req are dropped. 878 req.data().clear(); 879 req.requestBody(null); 880 req.removeHeader(CONTENT_TYPE); 881 } 882 883 String location = res.header(LOCATION); 884 Validate.notNull(location); 885 if (location.startsWith("http:/") && location.charAt(6) != '/') // fix broken Location: http:/temp/AAG_New/en/index.php 886 location = location.substring(6); 887 URL redir = StringUtil.resolve(req.url(), location); 888 req.url(redir); 889 890 return execute(req, res); 891 } 892 if ((res.statusCode < 200 || res.statusCode >= 400) && !req.ignoreHttpErrors()) 893 throw new HttpStatusException("HTTP error fetching URL", res.statusCode, req.url().toString()); 894 895 // check that we can handle the returned content type; if not, abort before fetching it 896 String contentType = res.contentType(); 897 if (contentType != null 898 && !req.ignoreContentType() 899 && !contentType.startsWith("text/") 900 && !xmlContentTypeRxp.matcher(contentType).matches() 901 ) 902 throw new UnsupportedMimeTypeException("Unhandled content type. Must be text/*, */xml, or */*+xml", 903 contentType, req.url().toString()); 904 905 // switch to the XML parser if content type is xml and not parser not explicitly set 906 if (contentType != null && xmlContentTypeRxp.matcher(contentType).matches()) { 907 if (!req.parserDefined) req.parser(Parser.xmlParser()); 908 } 909 910 res.charset = DataUtil.getCharsetFromContentType(res.contentType); // may be null, readInputStream deals with it 911 if (res.contentLength != 0 && req.method() != HEAD) { // -1 means unknown, chunked. sun throws an IO exception on 500 response with no content when trying to read body 912 InputStream stream = executor.responseBody(); 913 if (res.hasHeaderWithValue(CONTENT_ENCODING, "gzip")) 914 stream = new GZIPInputStream(stream); 915 else if (res.hasHeaderWithValue(CONTENT_ENCODING, "deflate")) 916 stream = new InflaterInputStream(stream, new Inflater(true)); 917 918 res.bodyStream = ControllableInputStream.wrap( 919 stream, DefaultBufferSize, req.maxBodySize()) 920 .timeout(startTime, req.timeout()); 921 922 if (req.responseProgress != null) // set response progress listener 923 res.bodyStream.onProgress(res.contentLength, req.responseProgress, res); 924 } else { 925 res.byteData = DataUtil.emptyByteBuffer(); 926 } 927 } catch (IOException e) { 928 if (res != null) res.safeClose(); // will be non-null if got to conn 929 throw e; 930 } finally { 931 req.executing.unlock(); 932 933 // detach any thread local auth delegate 934 if (req.authenticator != null) 935 AuthenticationHandler.handler.remove(); 936 } 937 938 res.executed = true; 939 return res; 940 } 941 942 @Override 943 public int statusCode() { 944 return statusCode; 945 } 946 947 @Override 948 public String statusMessage() { 949 return statusMessage; 950 } 951 952 @Override @Nullable 953 public String charset() { 954 return charset; 955 } 956 957 @Override 958 public Response charset(String charset) { 959 this.charset = charset; 960 return this; 961 } 962 963 @Override @Nullable 964 public String contentType() { 965 return contentType; 966 } 967 968 /** Called from parse() or streamParser(), validates and prepares the input stream, and aligns common settings. */ 969 private ControllableInputStream prepareParse() { 970 Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before parsing response"); 971 ControllableInputStream stream = bodyStream; 972 if (byteData != null) { // bytes have been read in to the buffer, parse that 973 ByteArrayInputStream bytes = new ByteArrayInputStream(byteData.array(), 0, byteData.limit()); 974 stream = ControllableInputStream.wrap(bytes, 0); // no max 975 inputStreamRead = false; // ok to reparse if in bytes 976 } 977 Validate.isFalse(inputStreamRead, "Input stream already read and parsed, cannot re-read."); 978 Validate.notNull(stream); 979 inputStreamRead = true; 980 return stream; 981 } 982 983 @Override public Document parse() throws IOException { 984 ControllableInputStream stream = prepareParse(); 985 Document doc = DataUtil.parseInputStream(stream, charset, url.toExternalForm(), req.parser()); 986 doc.connection(new HttpConnection(req, this)); // because we're static, don't have the connection obj. // todo - maybe hold in the req? 987 charset = doc.outputSettings().charset().name(); // update charset from meta-equiv, possibly 988 safeClose(); 989 return doc; 990 } 991 992 @Override public StreamParser streamParser() throws IOException { 993 ControllableInputStream stream = prepareParse(); 994 String baseUri = url.toExternalForm(); 995 DataUtil.CharsetDoc charsetDoc = DataUtil.detectCharset(stream, charset, baseUri, req.parser()); 996 // note that there may be a document in CharsetDoc as a result of scanning meta-data -- but as requires a stream parse, it is not used here. todo - revisit. 997 998 // set up the stream parser and rig this connection up to the parsed doc: 999 StreamParser streamer = new StreamParser(req.parser()); 1000 BufferedReader reader = new BufferedReader(new InputStreamReader(stream, charsetDoc.charset)); 1001 streamer.parse(reader, baseUri); // initializes the parse and the document, but does not step() it 1002 streamer.document().connection(new HttpConnection(req, this)); 1003 charset = charsetDoc.charset.name(); 1004 1005 // we don't safeClose() as in parse(); caller must close streamParser to close InputStream stream 1006 return streamer; 1007 } 1008 1009 private void prepareByteData() { 1010 Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body"); 1011 if (bodyStream != null && byteData == null) { 1012 Validate.isFalse(inputStreamRead, "Request has already been read (with .parse())"); 1013 try { 1014 byteData = DataUtil.readToByteBuffer(bodyStream, req.maxBodySize()); 1015 } catch (IOException e) { 1016 throw new UncheckedIOException(e); 1017 } finally { 1018 inputStreamRead = true; 1019 safeClose(); 1020 } 1021 } 1022 } 1023 1024 @Override 1025 public String body() { 1026 prepareByteData(); 1027 Validate.notNull(byteData); 1028 // charset gets set from header on execute, and from meta-equiv on parse. parse may not have happened yet 1029 String body = (charset == null ? UTF_8 : Charset.forName(charset)) 1030 .decode(byteData).toString(); 1031 ((Buffer)byteData).rewind(); // cast to avoid covariant return type change in jdk9 1032 return body; 1033 } 1034 1035 @Override 1036 public byte[] bodyAsBytes() { 1037 prepareByteData(); 1038 Validate.notNull(byteData); 1039 Validate.isTrue(byteData.hasArray()); // we made it, so it should 1040 1041 byte[] array = byteData.array(); 1042 int offset = byteData.arrayOffset(); 1043 int length = byteData.limit(); 1044 1045 if (offset == 0 && length == array.length) { // exact, just return it 1046 return array; 1047 } else { // trim to size 1048 byte[] exactArray = new byte[length]; 1049 System.arraycopy(array, offset, exactArray, 0, length); 1050 return exactArray; 1051 } 1052 } 1053 1054 @Override 1055 public Connection.Response bufferUp() { 1056 prepareByteData(); 1057 return this; 1058 } 1059 1060 @Override 1061 public BufferedInputStream bodyStream() { 1062 Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body"); 1063 1064 // if we have read to bytes (via buffer up), return those as a stream. 1065 if (byteData != null) { 1066 return new BufferedInputStream( 1067 new ByteArrayInputStream(byteData.array(), 0, byteData.limit()), 1068 DefaultBufferSize); 1069 } 1070 1071 Validate.isFalse(inputStreamRead, "Request has already been read"); 1072 Validate.notNull(bodyStream); 1073 inputStreamRead = true; 1074 return bodyStream.inputStream(); 1075 } 1076 1077 /** 1078 * Call on completion of stream read, to close the body (or error) stream. The connection.disconnect allows 1079 * keep-alives to work (as the underlying connection is actually held open, despite the name). 1080 */ 1081 private void safeClose() { 1082 if (bodyStream != null) { 1083 try { 1084 bodyStream.close(); 1085 } catch (IOException e) { 1086 // no-op 1087 } finally { 1088 bodyStream = null; 1089 } 1090 } 1091 1092 if (executor != null) executor.safeClose(); // disconnect 1093 } 1094 1095 Response(HttpConnection.Request request) { 1096 this.req = request; 1097 } 1098 1099 // set up url, method, header, cookies 1100 void prepareResponse(Map<String, List<String>> resHeaders, HttpConnection.@Nullable Response previousResponse) throws IOException { 1101 processResponseHeaders(resHeaders); // includes cookie key/val read during header scan 1102 CookieUtil.storeCookies(req, this, url, resHeaders); // add set cookies to cookie store 1103 1104 if (previousResponse != null) { // was redirected 1105 // map previous response cookies into this response cookies() object 1106 for (Map.Entry<String, String> prevCookie : previousResponse.cookies().entrySet()) { 1107 if (!hasCookie(prevCookie.getKey())) 1108 cookie(prevCookie.getKey(), prevCookie.getValue()); 1109 } 1110 previousResponse.safeClose(); 1111 1112 // enforce too many redirects: 1113 numRedirects = previousResponse.numRedirects + 1; 1114 if (numRedirects >= MAX_REDIRECTS) 1115 throw new IOException(String.format("Too many redirects occurred trying to load URL %s", previousResponse.url())); 1116 } 1117 } 1118 1119 void processResponseHeaders(Map<String, List<String>> resHeaders) { 1120 for (Map.Entry<String, List<String>> entry : resHeaders.entrySet()) { 1121 String name = entry.getKey(); 1122 if (name == null) 1123 continue; // http/1.1 line 1124 1125 List<String> values = entry.getValue(); 1126 for (String value : values) { 1127 addHeader(name, fixHeaderEncoding(value)); 1128 } 1129 } 1130 } 1131 1132 /** 1133 Servers may encode response headers in UTF-8 instead of RFC defined 8859. The JVM decodes the headers (before we see them) as 8859, which can lead to mojibake data. 1134 <p>This method attempts to detect that and re-decode the string as UTF-8.</p> 1135 <p>However on Android, the headers will be decoded as UTF8, so we can detect and pass those directly.</p> 1136 * @param val a header value string that may have been incorrectly decoded as 8859. 1137 * @return a potentially re-decoded string. 1138 */ 1139 @Nullable 1140 static String fixHeaderEncoding(@Nullable String val) { 1141 if (val == null) return val; 1142 // If we can't encode the string as 8859, then it couldn't have been decoded as 8859 1143 if (!StandardCharsets.ISO_8859_1.newEncoder().canEncode(val)) 1144 return val; 1145 byte[] bytes = val.getBytes(ISO_8859_1); 1146 if (looksLikeUtf8(bytes)) 1147 return new String(bytes, UTF_8); 1148 else 1149 return val; 1150 } 1151 1152 private static boolean looksLikeUtf8(byte[] input) { 1153 int i = 0; 1154 // BOM: 1155 if (input.length >= 3 1156 && (input[0] & 0xFF) == 0xEF 1157 && (input[1] & 0xFF) == 0xBB 1158 && (input[2] & 0xFF) == 0xBF) { 1159 i = 3; 1160 } 1161 1162 int end; 1163 boolean foundNonAscii = false; 1164 for (int j = input.length; i < j; ++i) { 1165 int o = input[i]; 1166 if ((o & 0x80) == 0) { 1167 continue; // ASCII 1168 } 1169 foundNonAscii = true; 1170 1171 // UTF-8 leading: 1172 if ((o & 0xE0) == 0xC0) { 1173 end = i + 1; 1174 } else if ((o & 0xF0) == 0xE0) { 1175 end = i + 2; 1176 } else if ((o & 0xF8) == 0xF0) { 1177 end = i + 3; 1178 } else { 1179 return false; 1180 } 1181 1182 if (end >= input.length) 1183 return false; 1184 1185 while (i < end) { 1186 i++; 1187 o = input[i]; 1188 if ((o & 0xC0) != 0x80) { 1189 return false; 1190 } 1191 } 1192 } 1193 return foundNonAscii; 1194 } 1195 1196 private static void setOutputContentType(final HttpConnection.Request req) { 1197 final String contentType = req.header(CONTENT_TYPE); 1198 String bound = null; 1199 if (contentType != null) { 1200 // no-op; don't add content type as already set (e.g. for requestBody()) 1201 // todo - if content type already set, we could add charset 1202 1203 // if user has set content type to multipart/form-data, auto add boundary. 1204 if(contentType.contains(MULTIPART_FORM_DATA) && !contentType.contains("boundary")) { 1205 bound = DataUtil.mimeBoundary(); 1206 req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound); 1207 } 1208 1209 } 1210 else if (needsMultipart(req)) { 1211 bound = DataUtil.mimeBoundary(); 1212 req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound); 1213 } else { 1214 req.header(CONTENT_TYPE, FORM_URL_ENCODED + "; charset=" + req.postDataCharset()); 1215 } 1216 req.mimeBoundary = bound; 1217 } 1218 1219 static void writePost(final HttpConnection.Request req, final OutputStream outputStream) throws IOException { 1220 final Collection<Connection.KeyVal> data = req.data(); 1221 final BufferedWriter w = new BufferedWriter(new OutputStreamWriter(outputStream, req.postDataCharset())); 1222 final String boundary = req.mimeBoundary; 1223 1224 if (boundary != null) { // a multipart post 1225 for (Connection.KeyVal keyVal : data) { 1226 w.write("--"); 1227 w.write(boundary); 1228 w.write("\r\n"); 1229 w.write("Content-Disposition: form-data; name=\""); 1230 w.write(encodeMimeName(keyVal.key())); // encodes " to %22 1231 w.write("\""); 1232 final InputStream input = keyVal.inputStream(); 1233 if (input != null) { 1234 w.write("; filename=\""); 1235 w.write(encodeMimeName(keyVal.value())); 1236 w.write("\"\r\nContent-Type: "); 1237 String contentType = keyVal.contentType(); 1238 w.write(contentType != null ? contentType : DefaultUploadType); 1239 w.write("\r\n\r\n"); 1240 w.flush(); 1241 DataUtil.crossStreams(input, outputStream); 1242 outputStream.flush(); 1243 } else { 1244 w.write("\r\n\r\n"); 1245 w.write(keyVal.value()); 1246 } 1247 w.write("\r\n"); 1248 } 1249 w.write("--"); 1250 w.write(boundary); 1251 w.write("--"); 1252 } else if (req.body != null) { // a single body (bytes or plain text); data will be in query string 1253 if (req.body instanceof String) { 1254 w.write((String) req.body); 1255 } else if (req.body instanceof InputStream) { 1256 DataUtil.crossStreams((InputStream) req.body, outputStream); 1257 outputStream.flush(); 1258 } else { 1259 throw new IllegalStateException(); 1260 } 1261 } else { // regular form data (application/x-www-form-urlencoded) 1262 boolean first = true; 1263 for (Connection.KeyVal keyVal : data) { 1264 if (!first) w.append('&'); 1265 else first = false; 1266 1267 w.write(URLEncoder.encode(keyVal.key(), req.postDataCharset())); 1268 w.write('='); 1269 w.write(URLEncoder.encode(keyVal.value(), req.postDataCharset())); 1270 } 1271 } 1272 w.close(); 1273 } 1274 1275 // for get url reqs, serialise the data map into the url 1276 private static void serialiseRequestUrl(Connection.Request req) throws IOException { 1277 UrlBuilder in = new UrlBuilder(req.url()); 1278 1279 for (Connection.KeyVal keyVal : req.data()) { 1280 Validate.isFalse(keyVal.hasInputStream(), "InputStream data not supported in URL query string."); 1281 in.appendKeyVal(keyVal); 1282 } 1283 req.url(in.build()); 1284 req.data().clear(); // moved into url as get params 1285 } 1286 } 1287 1288 private static boolean needsMultipart(Connection.Request req) { 1289 // multipart mode, for files. add the header if we see something with an inputstream, and return a non-null boundary 1290 for (Connection.KeyVal keyVal : req.data()) { 1291 if (keyVal.hasInputStream()) 1292 return true; 1293 } 1294 return false; 1295 } 1296 1297 public static class KeyVal implements Connection.KeyVal { 1298 private String key; 1299 private String value; 1300 private @Nullable InputStream stream; 1301 private @Nullable String contentType; 1302 1303 public static KeyVal create(String key, String value) { 1304 return new KeyVal(key, value); 1305 } 1306 1307 public static KeyVal create(String key, String filename, InputStream stream) { 1308 return new KeyVal(key, filename) 1309 .inputStream(stream); 1310 } 1311 1312 private KeyVal(String key, String value) { 1313 Validate.notEmptyParam(key, "key"); 1314 Validate.notNullParam(value, "value"); 1315 this.key = key; 1316 this.value = value; 1317 } 1318 1319 @Override 1320 public KeyVal key(String key) { 1321 Validate.notEmptyParam(key, "key"); 1322 this.key = key; 1323 return this; 1324 } 1325 1326 @Override 1327 public String key() { 1328 return key; 1329 } 1330 1331 @Override 1332 public KeyVal value(String value) { 1333 Validate.notNullParam(value, "value"); 1334 this.value = value; 1335 return this; 1336 } 1337 1338 @Override 1339 public String value() { 1340 return value; 1341 } 1342 1343 @Override 1344 public KeyVal inputStream(InputStream inputStream) { 1345 Validate.notNullParam(value, "inputStream"); 1346 this.stream = inputStream; 1347 return this; 1348 } 1349 1350 @Override @Nullable 1351 public InputStream inputStream() { 1352 return stream; 1353 } 1354 1355 @Override 1356 public boolean hasInputStream() { 1357 return stream != null; 1358 } 1359 1360 @Override 1361 public Connection.KeyVal contentType(String contentType) { 1362 Validate.notEmpty(contentType); 1363 this.contentType = contentType; 1364 return this; 1365 } 1366 1367 @Override @Nullable 1368 public String contentType() { 1369 return contentType; 1370 } 1371 1372 @Override 1373 public String toString() { 1374 return key + "=" + value; 1375 } 1376 } 1377}