001package org.jsoup; 002 003import org.jsoup.helper.RequestAuthenticator; 004import org.jsoup.nodes.Document; 005import org.jsoup.parser.Parser; 006import org.jsoup.parser.StreamParser; 007import org.jspecify.annotations.Nullable; 008 009import javax.net.ssl.SSLSocketFactory; 010import java.io.BufferedInputStream; 011import java.io.IOException; 012import java.io.InputStream; 013import java.io.UncheckedIOException; 014import java.net.Authenticator; 015import java.net.CookieStore; 016import java.net.Proxy; 017import java.net.URL; 018import java.util.Collection; 019import java.util.List; 020import java.util.Map; 021 022/** 023 The Connection interface is a convenient HTTP client and session object to fetch content from the web, and parse them 024 into Documents. 025 <p>To start a new session, use either {@link org.jsoup.Jsoup#newSession()} or {@link org.jsoup.Jsoup#connect(String)}. 026 Connections contain {@link Connection.Request} and {@link Connection.Response} objects (once executed). Configuration 027 settings (URL, timeout, useragent, etc) set on a session will be applied by default to each subsequent request.</p> 028 <p>To start a new request from the session, use {@link #newRequest()}.</p> 029 <p>Cookies are stored in memory for the duration of the session. For that reason, do not use one single session for all 030 requests in a long-lived application, or you are likely to run out of memory, unless care is taken to clean up the 031 cookie store. The cookie store for the session is available via {@link #cookieStore()}. You may provide your own 032 implementation via {@link #cookieStore(java.net.CookieStore)} before making requests.</p> 033 <p>Request configuration can be made using either the shortcut methods in Connection (e.g. {@link #userAgent(String)}), 034 or by methods in the {@link Connection.Request} object directly. All request configuration must be made before the request is 035 executed. When used as an ongoing session, initialize all defaults prior to making multi-threaded {@link 036#newRequest()}s.</p> 037 <p>Note that the term "Connection" used here does not mean that a long-lived connection is held against a server for 038 the lifetime of the Connection object. A socket connection is only made at the point of request execution ({@link 039#execute()}, {@link #get()}, or {@link #post()}), and the server's response consumed.</p> 040 <p>For multi-threaded implementations, it is important to use a {@link #newRequest()} for each request. The session may 041 be shared across concurrent threads, but a not a specific request.</p> 042 */ 043@SuppressWarnings("unused") 044public interface Connection { 045 046 /** 047 * GET and POST http methods. 048 */ 049 enum Method { 050 GET(false), 051 POST(true), 052 PUT(true), 053 DELETE(true), 054 /** 055 Note that unfortunately, PATCH is not supported in many JDKs. 056 */ 057 PATCH(true), 058 HEAD(false), 059 OPTIONS(false), 060 TRACE(false); 061 062 private final boolean hasBody; 063 064 Method(boolean hasBody) { 065 this.hasBody = hasBody; 066 } 067 068 /** 069 * Check if this HTTP method has/needs a request body 070 * @return if body needed 071 */ 072 public final boolean hasBody() { 073 return hasBody; 074 } 075 } 076 077 /** 078 Creates a new request, using this Connection as the session-state and to initialize the connection settings (which 079 may then be independently changed on the returned {@link Connection.Request} object). 080 @return a new Connection object, with a shared Cookie Store and initialized settings from this Connection and Request 081 @since 1.14.1 082 */ 083 Connection newRequest(); 084 085 /** 086 Creates a new request, using this Connection as the session-state and to initialize the connection settings (which 087 may then be independently changed on the returned {@link Connection.Request} object). 088 @return a new Connection object, with a shared Cookie Store and initialized settings from this Connection and Request 089 @param url URL for the new request 090 @since 1.17.1 091 */ 092 default Connection newRequest(String url) { 093 return newRequest().url(url); 094 } 095 096 /** 097 Creates a new request, using this Connection as the session-state and to initialize the connection settings (which 098 may then be independently changed on the returned {@link Connection.Request} object). 099 @return a new Connection object, with a shared Cookie Store and initialized settings from this Connection and Request 100 @param url URL for the new request 101 @since 1.17.1 102 */ 103 default Connection newRequest(URL url) { 104 return newRequest().url(url); 105 } 106 107 /** 108 * Set the request URL to fetch. The protocol must be HTTP or HTTPS. 109 * @param url URL to connect to 110 * @return this Connection, for chaining 111 */ 112 Connection url(URL url); 113 114 /** 115 * Set the request URL to fetch. The protocol must be HTTP or HTTPS. 116 * @param url URL to connect to 117 * @return this Connection, for chaining 118 */ 119 Connection url(String url); 120 121 /** 122 * Set the proxy to use for this request. Set to <code>null</code> to disable a previously set proxy. 123 * @param proxy proxy to use 124 * @return this Connection, for chaining 125 */ 126 Connection proxy(@Nullable Proxy proxy); 127 128 /** 129 * Set the HTTP proxy to use for this request. 130 * @param host the proxy hostname 131 * @param port the proxy port 132 * @return this Connection, for chaining 133 */ 134 Connection proxy(String host, int port); 135 136 /** 137 * Set the request user-agent header. 138 * @param userAgent user-agent to use 139 * @return this Connection, for chaining 140 * @see org.jsoup.helper.HttpConnection#DEFAULT_UA 141 */ 142 Connection userAgent(String userAgent); 143 144 /** 145 * Set the total request timeout duration. If a timeout occurs, an {@link java.net.SocketTimeoutException} will be thrown. 146 * <p>The default timeout is <b>30 seconds</b> (30,000 millis). A timeout of zero is treated as an infinite timeout. 147 * <p>Note that this timeout specifies the combined maximum duration of the connection time and the time to read 148 * the full response. 149 * @param millis number of milliseconds (thousandths of a second) before timing out connects or reads. 150 * @return this Connection, for chaining 151 * @see #maxBodySize(int) 152 */ 153 Connection timeout(int millis); 154 155 /** 156 * Set the maximum bytes to read from the (uncompressed) connection into the body, before the connection is closed, 157 * and the input truncated (i.e. the body content will be trimmed). <b>The default maximum is 2MB</b>. A max size of 158 * <code>0</code> is treated as an infinite amount (bounded only by your patience and the memory available on your 159 * machine). 160 * 161 * @param bytes number of bytes to read from the input before truncating 162 * @return this Connection, for chaining 163 */ 164 Connection maxBodySize(int bytes); 165 166 /** 167 * Set the request referrer (aka "referer") header. 168 * @param referrer referrer to use 169 * @return this Connection, for chaining 170 */ 171 Connection referrer(String referrer); 172 173 /** 174 * Configures the connection to (not) follow server redirects. By default, this is <b>true</b>. 175 * @param followRedirects true if server redirects should be followed. 176 * @return this Connection, for chaining 177 */ 178 Connection followRedirects(boolean followRedirects); 179 180 /** 181 * Set the request method to use, GET or POST. Default is GET. 182 * @param method HTTP request method 183 * @return this Connection, for chaining 184 */ 185 Connection method(Method method); 186 187 /** 188 * Configures the connection to not throw exceptions when an HTTP error occurs. (4xx - 5xx, e.g. 404 or 500). By 189 * default, this is <b>false</b>; an IOException is thrown if an error is encountered. If set to <b>true</b>, the 190 * response is populated with the error body, and the status message will reflect the error. 191 * @param ignoreHttpErrors - false (default) if HTTP errors should be ignored. 192 * @return this Connection, for chaining 193 */ 194 Connection ignoreHttpErrors(boolean ignoreHttpErrors); 195 196 /** 197 * Ignore the document's Content-Type when parsing the response. By default, this is <b>false</b>, an unrecognised 198 * content-type will cause an IOException to be thrown. (This is to prevent producing garbage by attempting to parse 199 * a JPEG binary image, for example.) Set to true to force a parse attempt regardless of content type. 200 * @param ignoreContentType set to true if you would like the content type ignored on parsing the response into a 201 * Document. 202 * @return this Connection, for chaining 203 */ 204 Connection ignoreContentType(boolean ignoreContentType); 205 206 /** 207 * Set custom SSL socket factory 208 * @param sslSocketFactory custom SSL socket factory 209 * @return this Connection, for chaining 210 */ 211 Connection sslSocketFactory(SSLSocketFactory sslSocketFactory); 212 213 /** 214 * Add a request data parameter. Request parameters are sent in the request query string for GETs, and in the 215 * request body for POSTs. A request may have multiple values of the same name. 216 * @param key data key 217 * @param value data value 218 * @return this Connection, for chaining 219 */ 220 Connection data(String key, String value); 221 222 /** 223 * Add an input stream as a request data parameter. For GETs, has no effect, but for POSTS this will upload the 224 * input stream. 225 * <p>Use the {@link #data(String, String, InputStream, String)} method to set the uploaded file's mimetype.</p> 226 * @param key data key (form item name) 227 * @param filename the name of the file to present to the remove server. Typically just the name, not path, 228 * component. 229 * @param inputStream the input stream to upload, that you probably obtained from a {@link java.io.FileInputStream}. 230 * You must close the InputStream in a {@code finally} block. 231 * @return this Connection, for chaining 232 * @see #data(String, String, InputStream, String) 233 */ 234 Connection data(String key, String filename, InputStream inputStream); 235 236 /** 237 * Add an input stream as a request data parameter. For GETs, has no effect, but for POSTS this will upload the 238 * input stream. 239 * @param key data key (form item name) 240 * @param filename the name of the file to present to the remove server. Typically just the name, not path, 241 * component. 242 * @param inputStream the input stream to upload, that you probably obtained from a {@link java.io.FileInputStream}. 243 * @param contentType the Content Type (aka mimetype) to specify for this file. 244 * You must close the InputStream in a {@code finally} block. 245 * @return this Connection, for chaining 246 */ 247 Connection data(String key, String filename, InputStream inputStream, String contentType); 248 249 /** 250 * Adds all of the supplied data to the request data parameters 251 * @param data collection of data parameters 252 * @return this Connection, for chaining 253 */ 254 Connection data(Collection<KeyVal> data); 255 256 /** 257 * Adds all of the supplied data to the request data parameters 258 * @param data map of data parameters 259 * @return this Connection, for chaining 260 */ 261 Connection data(Map<String, String> data); 262 263 /** 264 Add one or more request {@code key, val} data parameter pairs. 265 <p>Multiple parameters may be set at once, e.g.: 266 <code>.data("name", "jsoup", "language", "Java", "language", "English");</code> creates a query string like: 267 <code>{@literal ?name=jsoup&language=Java&language=English}</code></p> 268 <p>For GET requests, data parameters will be sent on the request query string. For POST (and other methods that 269 contain a body), they will be sent as body form parameters, unless the body is explicitly set by 270 {@link #requestBody(String)}, in which case they will be query string parameters.</p> 271 272 @param keyvals a set of key value pairs. 273 @return this Connection, for chaining 274 */ 275 Connection data(String... keyvals); 276 277 /** 278 * Get the data KeyVal for this key, if any 279 * @param key the data key 280 * @return null if not set 281 */ 282 @Nullable KeyVal data(String key); 283 284 /** 285 * Set a POST (or PUT) request body. Useful when a server expects a plain request body (such as JSON), and not a set 286 * of URL encoded form key/value pairs. E.g.: 287 * <code><pre>Jsoup.connect(url) 288 * .requestBody(json) 289 * .header("Content-Type", "application/json") 290 * .post();</pre></code> 291 * If any data key/vals are supplied, they will be sent as URL query params. 292 * @return this Request, for chaining 293 */ 294 Connection requestBody(String body); 295 296 /** 297 * Set a request header. Replaces any existing header with the same case-insensitive name. 298 * @param name header name 299 * @param value header value 300 * @return this Connection, for chaining 301 * @see org.jsoup.Connection.Request#header(String, String) 302 * @see org.jsoup.Connection.Request#headers() 303 */ 304 Connection header(String name, String value); 305 306 /** 307 * Sets each of the supplied headers on the request. Existing headers with the same case-insensitive name will be 308 * replaced with the new value. 309 * @param headers map of headers name {@literal ->} value pairs 310 * @return this Connection, for chaining 311 * @see org.jsoup.Connection.Request#headers() 312 */ 313 Connection headers(Map<String,String> headers); 314 315 /** 316 * Set a cookie to be sent in the request. 317 * @param name name of cookie 318 * @param value value of cookie 319 * @return this Connection, for chaining 320 */ 321 Connection cookie(String name, String value); 322 323 /** 324 * Adds each of the supplied cookies to the request. 325 * @param cookies map of cookie name {@literal ->} value pairs 326 * @return this Connection, for chaining 327 */ 328 Connection cookies(Map<String, String> cookies); 329 330 /** 331 Provide a custom or pre-filled CookieStore to be used on requests made by this Connection. 332 @param cookieStore a cookie store to use for subsequent requests 333 @return this Connection, for chaining 334 @since 1.14.1 335 */ 336 Connection cookieStore(CookieStore cookieStore); 337 338 /** 339 Get the cookie store used by this Connection. 340 @return the cookie store 341 @since 1.14.1 342 */ 343 CookieStore cookieStore(); 344 345 /** 346 * Provide a specific parser to use when parsing the response to a Document. If not set, jsoup defaults to the 347 * {@link Parser#htmlParser() HTML parser}, unless the response content-type is XML, in which case the 348 * {@link Parser#xmlParser() XML parser} is used. 349 * @param parser alternate parser 350 * @return this Connection, for chaining 351 */ 352 Connection parser(Parser parser); 353 354 /** 355 * Set the character-set used to encode for x-www-form-urlencoded post data. Defaults to {@code UTF-8}. 356 * @param charset character set to encode post data 357 * @return this Connection, for chaining 358 */ 359 Connection postDataCharset(String charset); 360 361 /** 362 Set the authenticator to use for this connection, enabling requests to URLs, and via proxies, that require 363 authentication credentials. 364 <p>The authentication scheme used is automatically detected during the request execution. 365 Supported schemes (subject to the platform) are {@code basic}, {@code digest}, {@code NTLM}, 366 and {@code Kerberos}.</p> 367 368 <p>To use, supply a {@link RequestAuthenticator} function that: 369 <ol> 370 <li>validates the URL that is requesting authentication, and</li> 371 <li>returns the appropriate credentials (username and password)</li> 372 </ol> 373 </p> 374 375 <p>For example, to authenticate both to a proxy and a downstream web server: 376 <code><pre> 377 Connection session = Jsoup.newSession() 378 .proxy("proxy.example.com", 8080) 379 .auth(auth -> { 380 if (auth.isServer()) { // provide credentials for the request url 381 Validate.isTrue(auth.url().getHost().equals("example.com")); 382 // check that we're sending credentials were we expect, and not redirected out 383 return auth.credentials("username", "password"); 384 } else { // auth.isProxy() 385 return auth.credentials("proxy-user", "proxy-password"); 386 } 387 }); 388 389 Connection.Response response = session.newRequest("https://example.com/adminzone/").execute(); 390 </pre></code> 391 </p> 392 393 <p>The system may cache the authentication and use it for subsequent requests to the same resource.</p> 394 395 <p><b>Implementation notes</b></p> 396 <p>For compatibility, on a Java 8 platform, authentication is set up via the system-wide default 397 {@link java.net.Authenticator#setDefault(Authenticator)} method via a ThreadLocal delegator. Whilst the 398 authenticator used is request specific and thread-safe, if you have other calls to {@code setDefault}, they will be 399 incompatible with this implementation.</p> 400 <p>On Java 9 and above, the preceding note does not apply; authenticators are directly set on the request. </p> 401 <p>If you are attempting to authenticate to a proxy that uses the {@code basic} scheme and will be fetching HTTPS 402 URLs, you need to configure your Java platform to enable that, by setting the 403 {@code jdk.http.auth.tunneling.disabledSchemes} system property to {@code ""}. 404 This must be executed prior to any authorization attempts. E.g.: 405 <code><pre> 406 static { 407 System.setProperty("jdk.http.auth.tunneling.disabledSchemes", ""); 408 // removes Basic, which is otherwise excluded from auth for CONNECT tunnels 409 }</pre></code> 410 </p> 411 * @param authenticator the authenticator to use in this connection 412 * @return this Connection, for chaining 413 * @since 1.17.1 414 */ 415 default Connection auth(@Nullable RequestAuthenticator authenticator) { 416 throw new UnsupportedOperationException(); 417 } 418 419 /** 420 * Execute the request as a GET, and parse the result. 421 * @return parsed Document 422 * @throws java.net.MalformedURLException if the request URL is not an HTTP or HTTPS URL, or is otherwise malformed 423 * @throws HttpStatusException if the response is not OK and HTTP response errors are not ignored 424 * @throws UnsupportedMimeTypeException if the response mime type is not supported and those errors are not ignored 425 * @throws java.net.SocketTimeoutException if the connection times out 426 * @throws IOException on error 427 */ 428 Document get() throws IOException; 429 430 /** 431 * Execute the request as a POST, and parse the result. 432 * @return parsed Document 433 * @throws java.net.MalformedURLException if the request URL is not a HTTP or HTTPS URL, or is otherwise malformed 434 * @throws HttpStatusException if the response is not OK and HTTP response errors are not ignored 435 * @throws UnsupportedMimeTypeException if the response mime type is not supported and those errors are not ignored 436 * @throws java.net.SocketTimeoutException if the connection times out 437 * @throws IOException on error 438 */ 439 Document post() throws IOException; 440 441 /** 442 * Execute the request. 443 * @return the executed {@link Response} 444 * @throws java.net.MalformedURLException if the request URL is not a HTTP or HTTPS URL, or is otherwise malformed 445 * @throws HttpStatusException if the response is not OK and HTTP response errors are not ignored 446 * @throws UnsupportedMimeTypeException if the response mime type is not supported and those errors are not ignored 447 * @throws java.net.SocketTimeoutException if the connection times out 448 * @throws IOException on error 449 */ 450 Response execute() throws IOException; 451 452 /** 453 * Get the request object associated with this connection 454 * @return request 455 */ 456 Request request(); 457 458 /** 459 * Set the connection's request 460 * @param request new request object 461 * @return this Connection, for chaining 462 */ 463 Connection request(Request request); 464 465 /** 466 * Get the response, once the request has been executed. 467 * @return response 468 * @throws IllegalArgumentException if called before the response has been executed. 469 */ 470 Response response(); 471 472 /** 473 * Set the connection's response 474 * @param response new response 475 * @return this Connection, for chaining 476 */ 477 Connection response(Response response); 478 479 /** 480 Set the response progress handler, which will be called periodically as the response body is downloaded. Since 481 documents are parsed as they are downloaded, this is also a good proxy for the parse progress. 482 <p>The Response object is supplied as the progress context, and may be read from to obtain headers etc.</p> 483 @param handler the progress handler 484 @return this Connection, for chaining 485 @since 1.18.1 486 */ 487 default Connection onResponseProgress(Progress<Response> handler) { 488 throw new UnsupportedOperationException(); 489 } 490 491 /** 492 * Common methods for Requests and Responses 493 * @param <T> Type of Base, either Request or Response 494 */ 495 @SuppressWarnings("UnusedReturnValue") 496 interface Base<T extends Base<T>> { 497 /** 498 * Get the URL of this Request or Response. For redirected responses, this will be the final destination URL. 499 * @return URL 500 * @throws IllegalArgumentException if called on a Request that was created without a URL. 501 */ 502 URL url(); 503 504 /** 505 * Set the URL 506 * @param url new URL 507 * @return this, for chaining 508 */ 509 T url(URL url); 510 511 /** 512 * Get the request method, which defaults to <code>GET</code> 513 * @return method 514 */ 515 Method method(); 516 517 /** 518 * Set the request method 519 * @param method new method 520 * @return this, for chaining 521 */ 522 T method(Method method); 523 524 /** 525 * Get the value of a header. If there is more than one header value with the same name, the headers are returned 526 * comma separated, per <a href="https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2">rfc2616-sec4</a>. 527 * <p> 528 * Header names are case-insensitive. 529 * </p> 530 * @param name name of header (case-insensitive) 531 * @return value of header, or null if not set. 532 * @see #hasHeader(String) 533 * @see #cookie(String) 534 */ 535 @Nullable String header(String name); 536 537 /** 538 * Get the values of a header. 539 * @param name header name, case-insensitive. 540 * @return a list of values for this header, or an empty list if not set. 541 */ 542 List<String> headers(String name); 543 544 /** 545 * Set a header. This method will overwrite any existing header with the same case-insensitive name. If there 546 * is more than one value for this header, this method will update the first matching header. 547 * <p>For compatibility, if the content of the header includes text that cannot be represented by ISO-8859-1, 548 * then it should be encoded first per <a href="https://www.ietf.org/rfc/rfc2047.txt">RFC 2047</a>.</p> 549 * @param name Name of header 550 * @param value Value of header 551 * @return this, for chaining 552 * @see #addHeader(String, String) 553 */ 554 T header(String name, String value); 555 556 /** 557 * Add a header. The header will be added regardless of whether a header with the same name already exists. 558 * <p>For compatibility, if the content of the header includes text that cannot be represented by ISO-8859-1, 559 * then it should be encoded first per <a href="https://www.ietf.org/rfc/rfc2047.txt">RFC 2047</a>.</p> 560 * @param name Name of new header 561 * @param value Value of new header 562 * @return this, for chaining 563 */ 564 T addHeader(String name, String value); 565 566 /** 567 * Check if a header is present 568 * @param name name of header (case-insensitive) 569 * @return if the header is present in this request/response 570 */ 571 boolean hasHeader(String name); 572 573 /** 574 * Check if a header is present, with the given value 575 * @param name header name (case-insensitive) 576 * @param value value (case-insensitive) 577 * @return if the header and value pair are set in this req/res 578 */ 579 boolean hasHeaderWithValue(String name, String value); 580 581 /** 582 * Remove headers by name. If there is more than one header with this name, they will all be removed. 583 * @param name name of header to remove (case-insensitive) 584 * @return this, for chaining 585 */ 586 T removeHeader(String name); 587 588 /** 589 * Retrieve all of the request/response header names and corresponding values as a map. For headers with multiple 590 * values, only the first header is returned. 591 * <p>Note that this is a view of the headers only, and changes made to this map will not be reflected in the 592 * request/response object.</p> 593 * @return headers 594 * @see #multiHeaders() 595 596 */ 597 Map<String, String> headers(); 598 599 /** 600 * Retreive all of the headers, keyed by the header name, and with a list of values per header. 601 * @return a list of multiple values per header. 602 */ 603 Map<String, List<String>> multiHeaders(); 604 605 /** 606 * Get a cookie value by name from this request/response. 607 * @param name name of cookie to retrieve. 608 * @return value of cookie, or null if not set 609 */ 610 @Nullable String cookie(String name); 611 612 /** 613 * Set a cookie in this request/response. 614 * @param name name of cookie 615 * @param value value of cookie 616 * @return this, for chaining 617 */ 618 T cookie(String name, String value); 619 620 /** 621 * Check if a cookie is present 622 * @param name name of cookie 623 * @return if the cookie is present in this request/response 624 */ 625 boolean hasCookie(String name); 626 627 /** 628 * Remove a cookie by name 629 * @param name name of cookie to remove 630 * @return this, for chaining 631 */ 632 T removeCookie(String name); 633 634 /** 635 Retrieve the request/response cookies as a map. For response cookies, if duplicate cookie names were sent, the 636 last one set will be the one included. For session management, rather than using these response cookies, prefer 637 to use {@link Jsoup#newSession()} and related methods. 638 639 @return simple cookie map 640 @see #cookieStore() 641 */ 642 Map<String, String> cookies(); 643 } 644 645 /** 646 * Represents a HTTP request. 647 */ 648 @SuppressWarnings("UnusedReturnValue") 649 interface Request extends Base<Request> { 650 /** 651 * Get the proxy used for this request. 652 * @return the proxy; <code>null</code> if not enabled. 653 */ 654 @Nullable Proxy proxy(); 655 656 /** 657 * Update the proxy for this request. 658 * @param proxy the proxy ot use; <code>null</code> to disable. 659 * @return this Request, for chaining 660 */ 661 Request proxy(@Nullable Proxy proxy); 662 663 /** 664 * Set the HTTP proxy to use for this request. 665 * @param host the proxy hostname 666 * @param port the proxy port 667 * @return this Connection, for chaining 668 */ 669 Request proxy(String host, int port); 670 671 /** 672 * Get the request timeout, in milliseconds. 673 * @return the timeout in milliseconds. 674 */ 675 int timeout(); 676 677 /** 678 * Update the request timeout. 679 * @param millis timeout, in milliseconds 680 * @return this Request, for chaining 681 */ 682 Request timeout(int millis); 683 684 /** 685 * Get the maximum body size, in bytes. 686 * @return the maximum body size, in bytes. 687 */ 688 int maxBodySize(); 689 690 /** 691 * Update the maximum body size, in bytes. 692 * @param bytes maximum body size, in bytes. 693 * @return this Request, for chaining 694 */ 695 Request maxBodySize(int bytes); 696 697 /** 698 * Get the current followRedirects configuration. 699 * @return true if followRedirects is enabled. 700 */ 701 boolean followRedirects(); 702 703 /** 704 * Configures the request to (not) follow server redirects. By default this is <b>true</b>. 705 * @param followRedirects true if server redirects should be followed. 706 * @return this Request, for chaining 707 */ 708 Request followRedirects(boolean followRedirects); 709 710 /** 711 * Get the current ignoreHttpErrors configuration. 712 * @return true if errors will be ignored; false (default) if HTTP errors will cause an IOException to be 713 * thrown. 714 */ 715 boolean ignoreHttpErrors(); 716 717 /** 718 * Configures the request to ignore HTTP errors in the response. 719 * @param ignoreHttpErrors set to true to ignore HTTP errors. 720 * @return this Request, for chaining 721 */ 722 Request ignoreHttpErrors(boolean ignoreHttpErrors); 723 724 /** 725 * Get the current ignoreContentType configuration. 726 * @return true if invalid content-types will be ignored; false (default) if they will cause an IOException to 727 * be thrown. 728 */ 729 boolean ignoreContentType(); 730 731 /** 732 * Configures the request to ignore the Content-Type of the response. 733 * @param ignoreContentType set to true to ignore the content type. 734 * @return this Request, for chaining 735 */ 736 Request ignoreContentType(boolean ignoreContentType); 737 738 /** 739 * Get the current custom SSL socket factory, if any. 740 * @return custom SSL socket factory if set, null otherwise 741 */ 742 @Nullable SSLSocketFactory sslSocketFactory(); 743 744 /** 745 * Set a custom SSL socket factory. 746 * @param sslSocketFactory SSL socket factory 747 */ 748 void sslSocketFactory(SSLSocketFactory sslSocketFactory); 749 750 /** 751 * Add a data parameter to the request 752 * @param keyval data to add. 753 * @return this Request, for chaining 754 */ 755 Request data(KeyVal keyval); 756 757 /** 758 * Get all of the request's data parameters 759 * @return collection of keyvals 760 */ 761 Collection<KeyVal> data(); 762 763 /** 764 * Set a POST (or PUT) request body. Useful when a server expects a plain request body, not a set of URL 765 * encoded form key/value pairs. E.g.: 766 * <code><pre>Jsoup.connect(url) 767 * .requestBody(json) 768 * .header("Content-Type", "application/json") 769 * .post();</pre></code> 770 * If any data key/vals are supplied, they will be sent as URL query params. 771 * @param body to use as the request body. Set to null to clear a previously set body. 772 * @return this Request, for chaining 773 */ 774 Request requestBody(@Nullable String body); 775 776 /** 777 * Get the current request body. 778 * @return null if not set. 779 */ 780 @Nullable String requestBody(); 781 782 /** 783 * Specify the parser to use when parsing the document. 784 * @param parser parser to use. 785 * @return this Request, for chaining 786 */ 787 Request parser(Parser parser); 788 789 /** 790 * Get the current parser to use when parsing the document. 791 * @return current Parser 792 */ 793 Parser parser(); 794 795 /** 796 * Sets the post data character set for x-www-form-urlencoded post data 797 * @param charset character set to encode post data 798 * @return this Request, for chaining 799 */ 800 Request postDataCharset(String charset); 801 802 /** 803 * Gets the post data character set for x-www-form-urlencoded post data 804 * @return character set to encode post data 805 */ 806 String postDataCharset(); 807 808 /** 809 Set the authenticator to use for this request. 810 See {@link Connection#auth(RequestAuthenticator) Connection.auth(authenticator)} for examples and 811 implementation notes. 812 * @param authenticator the authenticator 813 * @return this Request, for chaining. 814 * @since 1.17.1 815 */ 816 default Request auth(@Nullable RequestAuthenticator authenticator) { 817 throw new UnsupportedOperationException(); 818 } 819 820 /** 821 Get the RequestAuthenticator, if any, that will be used on this request. 822 * @return the RequestAuthenticator, or {@code null} if not set 823 * @since 1.17.1 824 */ 825 @Nullable 826 default RequestAuthenticator auth() { 827 throw new UnsupportedOperationException(); 828 } 829 } 830 831 /** 832 * Represents a HTTP response. 833 */ 834 interface Response extends Base<Response> { 835 836 /** 837 * Get the status code of the response. 838 * @return status code 839 */ 840 int statusCode(); 841 842 /** 843 * Get the status message of the response. 844 * @return status message 845 */ 846 String statusMessage(); 847 848 /** 849 * Get the character set name of the response, derived from the content-type header. 850 * @return character set name if set, <b>null</b> if not 851 */ 852 @Nullable String charset(); 853 854 /** 855 * Set / override the response character set. When the document body is parsed it will be with this charset. 856 * @param charset to decode body as 857 * @return this Response, for chaining 858 */ 859 Response charset(String charset); 860 861 /** 862 * Get the response content type (e.g. "text/html"); 863 * @return the response content type, or <b>null</b> if one was not set 864 */ 865 @Nullable String contentType(); 866 867 /** 868 * Read and parse the body of the response as a Document. If you intend to parse the same response multiple 869 * times, you should {@link #bufferUp()} first. 870 * @return a parsed Document 871 * @throws IOException on error 872 */ 873 Document parse() throws IOException; 874 875 /** 876 * Get the body of the response as a plain string. 877 * @return body 878 */ 879 String body(); 880 881 /** 882 * Get the body of the response as an array of bytes. 883 * @return body bytes 884 */ 885 byte[] bodyAsBytes(); 886 887 /** 888 * Read the body of the response into a local buffer, so that {@link #parse()} may be called repeatedly on the 889 * same connection response. Otherwise, once the response is read, its InputStream will have been drained and 890 * may not be re-read. 891 * <p>Calling {@link #body() } or {@link #bodyAsBytes()} has the same effect.</p> 892 * @return this response, for chaining 893 * @throws UncheckedIOException if an IO exception occurs during buffering. 894 */ 895 Response bufferUp(); 896 897 /** 898 Get the body of the response as a (buffered) InputStream. You should close the input stream when you're done 899 with it. 900 <p>Other body methods (like bufferUp, body, parse, etc) will generally not work in conjunction with this method, 901 as it consumes the InputStream.</p> 902 <p>Any configured max size or maximum read timeout applied to the connection will not be applied to this stream, 903 unless {@link #bufferUp()} is called prior.</p> 904 <p>This method is useful for writing large responses to disk, without buffering them completely into memory 905 first.</p> 906 @return the response body input stream 907 */ 908 BufferedInputStream bodyStream(); 909 910 /** 911 Returns a {@link StreamParser} that will parse the Response progressively. 912 * @return a StreamParser, prepared to parse this response. 913 * @throws IOException if an IO exception occurs preparing the parser. 914 */ 915 default StreamParser streamParser() throws IOException { 916 throw new UnsupportedOperationException(); 917 } 918 } 919 920 /** 921 * A Key:Value tuple(+), used for form data. 922 */ 923 interface KeyVal { 924 925 /** 926 * Update the key of a keyval 927 * @param key new key 928 * @return this KeyVal, for chaining 929 */ 930 KeyVal key(String key); 931 932 /** 933 * Get the key of a keyval 934 * @return the key 935 */ 936 String key(); 937 938 /** 939 * Update the value of a keyval 940 * @param value the new value 941 * @return this KeyVal, for chaining 942 */ 943 KeyVal value(String value); 944 945 /** 946 * Get the value of a keyval 947 * @return the value 948 */ 949 String value(); 950 951 /** 952 * Add or update an input stream to this keyVal 953 * @param inputStream new input stream 954 * @return this KeyVal, for chaining 955 */ 956 KeyVal inputStream(InputStream inputStream); 957 958 /** 959 * Get the input stream associated with this keyval, if any 960 * @return input stream if set, or null 961 */ 962 @Nullable InputStream inputStream(); 963 964 /** 965 * Does this keyval have an input stream? 966 * @return true if this keyval does indeed have an input stream 967 */ 968 boolean hasInputStream(); 969 970 /** 971 * Set the Content Type header used in the MIME body (aka mimetype) when uploading files. 972 * Only useful if {@link #inputStream(InputStream)} is set. 973 * <p>Will default to {@code application/octet-stream}.</p> 974 * @param contentType the new content type 975 * @return this KeyVal 976 */ 977 KeyVal contentType(String contentType); 978 979 /** 980 * Get the current Content Type, or {@code null} if not set. 981 * @return the current Content Type. 982 */ 983 @Nullable String contentType(); 984 } 985}