001package org.jsoup; 002 003import org.jsoup.nodes.Document; 004import org.jsoup.parser.Parser; 005 006import java.io.BufferedInputStream; 007import java.io.IOException; 008import java.io.InputStream; 009import java.net.Proxy; 010import java.net.URL; 011import java.util.Collection; 012import java.util.List; 013import java.util.Map; 014 015/** 016 * A Connection provides a convenient interface to fetch content from the web, and parse them into Documents. 017 * <p> 018 * To get a new Connection, use {@link org.jsoup.Jsoup#connect(String)}. Connections contain {@link Connection.Request} 019 * and {@link Connection.Response} objects. The request objects are reusable as prototype requests. 020 * </p> 021 * <p> 022 * Request configuration can be made using either the shortcut methods in Connection (e.g. {@link #userAgent(String)}), 023 * or by methods in the Connection.Request object directly. All request configuration must be made before the request is 024 * executed. 025 * </p> 026 */ 027public interface Connection { 028 029 /** 030 * GET and POST http methods. 031 */ 032 enum Method { 033 GET(false), POST(true), PUT(true), DELETE(false), PATCH(true), HEAD(false), OPTIONS(false), TRACE(false); 034 035 private final boolean hasBody; 036 037 Method(boolean hasBody) { 038 this.hasBody = hasBody; 039 } 040 041 /** 042 * Check if this HTTP method has/needs a request body 043 * @return if body needed 044 */ 045 public final boolean hasBody() { 046 return hasBody; 047 } 048 } 049 050 /** 051 * Set the request URL to fetch. The protocol must be HTTP or HTTPS. 052 * @param url URL to connect to 053 * @return this Connection, for chaining 054 */ 055 Connection url(URL url); 056 057 /** 058 * Set the request URL to fetch. The protocol must be HTTP or HTTPS. 059 * @param url URL to connect to 060 * @return this Connection, for chaining 061 */ 062 Connection url(String url); 063 064 /** 065 * Set the proxy to use for this request. Set to <code>null</code> to disable. 066 * @param proxy proxy to use 067 * @return this Connection, for chaining 068 */ 069 Connection proxy(Proxy proxy); 070 071 /** 072 * Set the HTTP proxy to use for this request. 073 * @param host the proxy hostname 074 * @param port the proxy port 075 * @return this Connection, for chaining 076 */ 077 Connection proxy(String host, int port); 078 079 /** 080 * Set the request user-agent header. 081 * @param userAgent user-agent to use 082 * @return this Connection, for chaining 083 * @see org.jsoup.helper.HttpConnection#DEFAULT_UA 084 */ 085 Connection userAgent(String userAgent); 086 087 /** 088 * Set the total request timeout duration. If a timeout occurs, an {@link java.net.SocketTimeoutException} will be thrown. 089 * <p>The default timeout is <b>30 seconds</b> (30,000 millis). A timeout of zero is treated as an infinite timeout. 090 * <p>Note that this timeout specifies the combined maximum duration of the connection time and the time to read 091 * the full response. 092 * @param millis number of milliseconds (thousandths of a second) before timing out connects or reads. 093 * @return this Connection, for chaining 094 * @see #maxBodySize(int) 095 */ 096 Connection timeout(int millis); 097 098 /** 099 * Set the maximum bytes to read from the (uncompressed) connection into the body, before the connection is closed, 100 * and the input truncated. The default maximum is 1MB. A max size of zero is treated as an infinite amount (bounded 101 * only by your patience and the memory available on your machine). 102 * @param bytes number of bytes to read from the input before truncating 103 * @return this Connection, for chaining 104 */ 105 Connection maxBodySize(int bytes); 106 107 /** 108 * Set the request referrer (aka "referer") header. 109 * @param referrer referrer to use 110 * @return this Connection, for chaining 111 */ 112 Connection referrer(String referrer); 113 114 /** 115 * Configures the connection to (not) follow server redirects. By default this is <b>true</b>. 116 * @param followRedirects true if server redirects should be followed. 117 * @return this Connection, for chaining 118 */ 119 Connection followRedirects(boolean followRedirects); 120 121 /** 122 * Set the request method to use, GET or POST. Default is GET. 123 * @param method HTTP request method 124 * @return this Connection, for chaining 125 */ 126 Connection method(Method method); 127 128 /** 129 * Configures the connection to not throw exceptions when a HTTP error occurs. (4xx - 5xx, e.g. 404 or 500). By 130 * default this is <b>false</b>; an IOException is thrown if an error is encountered. If set to <b>true</b>, the 131 * response is populated with the error body, and the status message will reflect the error. 132 * @param ignoreHttpErrors - false (default) if HTTP errors should be ignored. 133 * @return this Connection, for chaining 134 */ 135 Connection ignoreHttpErrors(boolean ignoreHttpErrors); 136 137 /** 138 * Ignore the document's Content-Type when parsing the response. By default this is <b>false</b>, an unrecognised 139 * content-type will cause an IOException to be thrown. (This is to prevent producing garbage by attempting to parse 140 * a JPEG binary image, for example.) Set to true to force a parse attempt regardless of content type. 141 * @param ignoreContentType set to true if you would like the content type ignored on parsing the response into a 142 * Document. 143 * @return this Connection, for chaining 144 */ 145 Connection ignoreContentType(boolean ignoreContentType); 146 147 /** 148 * Disable/enable TLS certificates validation for HTTPS requests. 149 * <p> 150 * By default this is <b>true</b>; all 151 * connections over HTTPS perform normal validation of certificates, and will abort requests if the provided 152 * certificate does not validate. 153 * </p> 154 * <p> 155 * Some servers use expired, self-generated certificates; or your JDK may not 156 * support SNI hosts. In which case, you may want to enable this setting. 157 * </p> 158 * <p> 159 * <b>Be careful</b> and understand why you need to disable these validations. 160 * </p> 161 * @param value if should validate TLS (SSL) certificates. <b>true</b> by default. 162 * @return this Connection, for chaining 163 */ 164 Connection validateTLSCertificates(boolean value); 165 166 /** 167 * Add a request data parameter. Request parameters are sent in the request query string for GETs, and in the 168 * request body for POSTs. A request may have multiple values of the same name. 169 * @param key data key 170 * @param value data value 171 * @return this Connection, for chaining 172 */ 173 Connection data(String key, String value); 174 175 /** 176 * Add an input stream as a request data parameter. For GETs, has no effect, but for POSTS this will upload the 177 * input stream. 178 * @param key data key (form item name) 179 * @param filename the name of the file to present to the remove server. Typically just the name, not path, 180 * component. 181 * @param inputStream the input stream to upload, that you probably obtained from a {@link java.io.FileInputStream}. 182 * You must close the InputStream in a {@code finally} block. 183 * @return this Connections, for chaining 184 * @see #data(String, String, InputStream, String) if you want to set the uploaded file's mimetype. 185 */ 186 Connection data(String key, String filename, InputStream inputStream); 187 188 /** 189 * Add an input stream as a request data parameter. For GETs, has no effect, but for POSTS this will upload the 190 * input stream. 191 * @param key data key (form item name) 192 * @param filename the name of the file to present to the remove server. Typically just the name, not path, 193 * component. 194 * @param inputStream the input stream to upload, that you probably obtained from a {@link java.io.FileInputStream}. 195 * @param contentType the Content Type (aka mimetype) to specify for this file. 196 * You must close the InputStream in a {@code finally} block. 197 * @return this Connections, for chaining 198 */ 199 Connection data(String key, String filename, InputStream inputStream, String contentType); 200 201 /** 202 * Adds all of the supplied data to the request data parameters 203 * @param data collection of data parameters 204 * @return this Connection, for chaining 205 */ 206 Connection data(Collection<KeyVal> data); 207 208 /** 209 * Adds all of the supplied data to the request data parameters 210 * @param data map of data parameters 211 * @return this Connection, for chaining 212 */ 213 Connection data(Map<String, String> data); 214 215 /** 216 * Add a number of request data parameters. Multiple parameters may be set at once, e.g.: <code>.data("name", 217 * "jsoup", "language", "Java", "language", "English");</code> creates a query string like: 218 * <code>{@literal ?name=jsoup&language=Java&language=English}</code> 219 * @param keyvals a set of key value pairs. 220 * @return this Connection, for chaining 221 */ 222 Connection data(String... keyvals); 223 224 /** 225 * Get the data KeyVal for this key, if any 226 * @param key the data key 227 * @return null if not set 228 */ 229 KeyVal data(String key); 230 231 /** 232 * Set a POST (or PUT) request body. Useful when a server expects a plain request body, not a set for URL 233 * encoded form key/value pairs. E.g.: 234 * <code><pre>Jsoup.connect(url) 235 * .requestBody(json) 236 * .header("Content-Type", "application/json") 237 * .post();</pre></code> 238 * If any data key/vals are supplied, they will be sent as URL query params. 239 * @return this Request, for chaining 240 */ 241 Connection requestBody(String body); 242 243 /** 244 * Set a request header. 245 * @param name header name 246 * @param value header value 247 * @return this Connection, for chaining 248 * @see org.jsoup.Connection.Request#headers() 249 */ 250 Connection header(String name, String value); 251 252 /** 253 * Adds each of the supplied headers to the request. 254 * @param headers map of headers name {@literal ->} value pairs 255 * @return this Connection, for chaining 256 * @see org.jsoup.Connection.Request#headers() 257 */ 258 Connection headers(Map<String,String> headers); 259 260 /** 261 * Set a cookie to be sent in the request. 262 * @param name name of cookie 263 * @param value value of cookie 264 * @return this Connection, for chaining 265 */ 266 Connection cookie(String name, String value); 267 268 /** 269 * Adds each of the supplied cookies to the request. 270 * @param cookies map of cookie name {@literal ->} value pairs 271 * @return this Connection, for chaining 272 */ 273 Connection cookies(Map<String, String> cookies); 274 275 /** 276 * Provide an alternate parser to use when parsing the response to a Document. If not set, defaults to the HTML 277 * parser, unless the response content-type is XML, in which case the XML parser is used. 278 * @param parser alternate parser 279 * @return this Connection, for chaining 280 */ 281 Connection parser(Parser parser); 282 283 /** 284 * Sets the default post data character set for x-www-form-urlencoded post data 285 * @param charset character set to encode post data 286 * @return this Connection, for chaining 287 */ 288 Connection postDataCharset(String charset); 289 290 /** 291 * Execute the request as a GET, and parse the result. 292 * @return parsed Document 293 * @throws java.net.MalformedURLException if the request URL is not a HTTP or HTTPS URL, or is otherwise malformed 294 * @throws HttpStatusException if the response is not OK and HTTP response errors are not ignored 295 * @throws UnsupportedMimeTypeException if the response mime type is not supported and those errors are not ignored 296 * @throws java.net.SocketTimeoutException if the connection times out 297 * @throws IOException on error 298 */ 299 Document get() throws IOException; 300 301 /** 302 * Execute the request as a POST, and parse the result. 303 * @return parsed Document 304 * @throws java.net.MalformedURLException if the request URL is not a HTTP or HTTPS URL, or is otherwise malformed 305 * @throws HttpStatusException if the response is not OK and HTTP response errors are not ignored 306 * @throws UnsupportedMimeTypeException if the response mime type is not supported and those errors are not ignored 307 * @throws java.net.SocketTimeoutException if the connection times out 308 * @throws IOException on error 309 */ 310 Document post() throws IOException; 311 312 /** 313 * Execute the request. 314 * @return a response object 315 * @throws java.net.MalformedURLException if the request URL is not a HTTP or HTTPS URL, or is otherwise malformed 316 * @throws HttpStatusException if the response is not OK and HTTP response errors are not ignored 317 * @throws UnsupportedMimeTypeException if the response mime type is not supported and those errors are not ignored 318 * @throws java.net.SocketTimeoutException if the connection times out 319 * @throws IOException on error 320 */ 321 Response execute() throws IOException; 322 323 /** 324 * Get the request object associated with this connection 325 * @return request 326 */ 327 Request request(); 328 329 /** 330 * Set the connection's request 331 * @param request new request object 332 * @return this Connection, for chaining 333 */ 334 Connection request(Request request); 335 336 /** 337 * Get the response, once the request has been executed 338 * @return response 339 */ 340 Response response(); 341 342 /** 343 * Set the connection's response 344 * @param response new response 345 * @return this Connection, for chaining 346 */ 347 Connection response(Response response); 348 349 /** 350 * Common methods for Requests and Responses 351 * @param <T> Type of Base, either Request or Response 352 */ 353 interface Base<T extends Base> { 354 355 /** 356 * Get the URL 357 * @return URL 358 */ 359 URL url(); 360 361 /** 362 * Set the URL 363 * @param url new URL 364 * @return this, for chaining 365 */ 366 T url(URL url); 367 368 /** 369 * Get the request method 370 * @return method 371 */ 372 Method method(); 373 374 /** 375 * Set the request method 376 * @param method new method 377 * @return this, for chaining 378 */ 379 T method(Method method); 380 381 /** 382 * Get the value of a header. If there is more than one header value with the same name, the headers are returned 383 * comma seperated, per <a href="https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2">rfc2616-sec4</a>. 384 * <p> 385 * Header names are case insensitive. 386 * </p> 387 * @param name name of header (case insensitive) 388 * @return value of header, or null if not set. 389 * @see #hasHeader(String) 390 * @see #cookie(String) 391 */ 392 String header(String name); 393 394 /** 395 * Get the values of a header. 396 * @param name header name, case insensitive. 397 * @return a list of values for this header, or an empty list if not set. 398 */ 399 List<String> headers(String name); 400 401 /** 402 * Set a header. This method will overwrite any existing header with the same case insensitive name. (If there 403 * is more than one value for this header, this method will update the first matching header. 404 * @param name Name of header 405 * @param value Value of header 406 * @return this, for chaining 407 * @see #addHeader(String, String) 408 */ 409 T header(String name, String value); 410 411 /** 412 * Add a header. The header will be added regardless of whether a header with the same name already exists. 413 * @param name Name of new header 414 * @param value Value of new header 415 * @return this, for chaining 416 */ 417 T addHeader(String name, String value); 418 419 /** 420 * Check if a header is present 421 * @param name name of header (case insensitive) 422 * @return if the header is present in this request/response 423 */ 424 boolean hasHeader(String name); 425 426 /** 427 * Check if a header is present, with the given value 428 * @param name header name (case insensitive) 429 * @param value value (case insensitive) 430 * @return if the header and value pair are set in this req/res 431 */ 432 boolean hasHeaderWithValue(String name, String value); 433 434 /** 435 * Remove headers by name. If there is more than one header with this name, they will all be removed. 436 * @param name name of header to remove (case insensitive) 437 * @return this, for chaining 438 */ 439 T removeHeader(String name); 440 441 /** 442 * Retrieve all of the request/response header names and corresponding values as a map. For headers with multiple 443 * values, only the first header is returned. 444 * <p>Note that this is a view of the headers only, and changes made to this map will not be reflected in the 445 * request/response object.</p> 446 * @return headers 447 * @see #multiHeaders() 448 449 */ 450 Map<String, String> headers(); 451 452 /** 453 * Retreive all of the headers, keyed by the header name, and with a list of values per header. 454 * @return a list of multiple values per header. 455 */ 456 Map<String, List<String>> multiHeaders(); 457 458 /** 459 * Get a cookie value by name from this request/response. 460 * <p> 461 * Response objects have a simplified cookie model. Each cookie set in the response is added to the response 462 * object's cookie key=value map. The cookie's path, domain, and expiry date are ignored. 463 * </p> 464 * @param name name of cookie to retrieve. 465 * @return value of cookie, or null if not set 466 */ 467 String cookie(String name); 468 469 /** 470 * Set a cookie in this request/response. 471 * @param name name of cookie 472 * @param value value of cookie 473 * @return this, for chaining 474 */ 475 T cookie(String name, String value); 476 477 /** 478 * Check if a cookie is present 479 * @param name name of cookie 480 * @return if the cookie is present in this request/response 481 */ 482 boolean hasCookie(String name); 483 484 /** 485 * Remove a cookie by name 486 * @param name name of cookie to remove 487 * @return this, for chaining 488 */ 489 T removeCookie(String name); 490 491 /** 492 * Retrieve all of the request/response cookies as a map 493 * @return cookies 494 */ 495 Map<String, String> cookies(); 496 } 497 498 /** 499 * Represents a HTTP request. 500 */ 501 interface Request extends Base<Request> { 502 /** 503 * Get the proxy used for this request. 504 * @return the proxy; <code>null</code> if not enabled. 505 */ 506 Proxy proxy(); 507 508 /** 509 * Update the proxy for this request. 510 * @param proxy the proxy ot use; <code>null</code> to disable. 511 * @return this Request, for chaining 512 */ 513 Request proxy(Proxy proxy); 514 515 /** 516 * Set the HTTP proxy to use for this request. 517 * @param host the proxy hostname 518 * @param port the proxy port 519 * @return this Connection, for chaining 520 */ 521 Request proxy(String host, int port); 522 523 /** 524 * Get the request timeout, in milliseconds. 525 * @return the timeout in milliseconds. 526 */ 527 int timeout(); 528 529 /** 530 * Update the request timeout. 531 * @param millis timeout, in milliseconds 532 * @return this Request, for chaining 533 */ 534 Request timeout(int millis); 535 536 /** 537 * Get the maximum body size, in bytes. 538 * @return the maximum body size, in bytes. 539 */ 540 int maxBodySize(); 541 542 /** 543 * Update the maximum body size, in bytes. 544 * @param bytes maximum body size, in bytes. 545 * @return this Request, for chaining 546 */ 547 Request maxBodySize(int bytes); 548 549 /** 550 * Get the current followRedirects configuration. 551 * @return true if followRedirects is enabled. 552 */ 553 boolean followRedirects(); 554 555 /** 556 * Configures the request to (not) follow server redirects. By default this is <b>true</b>. 557 * @param followRedirects true if server redirects should be followed. 558 * @return this Request, for chaining 559 */ 560 Request followRedirects(boolean followRedirects); 561 562 /** 563 * Get the current ignoreHttpErrors configuration. 564 * @return true if errors will be ignored; false (default) if HTTP errors will cause an IOException to be 565 * thrown. 566 */ 567 boolean ignoreHttpErrors(); 568 569 /** 570 * Configures the request to ignore HTTP errors in the response. 571 * @param ignoreHttpErrors set to true to ignore HTTP errors. 572 * @return this Request, for chaining 573 */ 574 Request ignoreHttpErrors(boolean ignoreHttpErrors); 575 576 /** 577 * Get the current ignoreContentType configuration. 578 * @return true if invalid content-types will be ignored; false (default) if they will cause an IOException to 579 * be thrown. 580 */ 581 boolean ignoreContentType(); 582 583 /** 584 * Configures the request to ignore the Content-Type of the response. 585 * @param ignoreContentType set to true to ignore the content type. 586 * @return this Request, for chaining 587 */ 588 Request ignoreContentType(boolean ignoreContentType); 589 590 /** 591 * Get the current state of TLS (SSL) certificate validation. 592 * @return true if TLS cert validation enabled 593 */ 594 boolean validateTLSCertificates(); 595 596 /** 597 * Set TLS certificate validation. 598 * @param value set false to ignore TLS (SSL) certificates 599 */ 600 void validateTLSCertificates(boolean value); 601 602 /** 603 * Add a data parameter to the request 604 * @param keyval data to add. 605 * @return this Request, for chaining 606 */ 607 Request data(KeyVal keyval); 608 609 /** 610 * Get all of the request's data parameters 611 * @return collection of keyvals 612 */ 613 Collection<KeyVal> data(); 614 615 /** 616 * Set a POST (or PUT) request body. Useful when a server expects a plain request body, not a set for URL 617 * encoded form key/value pairs. E.g.: 618 * <code><pre>Jsoup.connect(url) 619 * .requestBody(json) 620 * .header("Content-Type", "application/json") 621 * .post();</pre></code> 622 * If any data key/vals are supplied, they will be sent as URL query params. 623 * @return this Request, for chaining 624 */ 625 Request requestBody(String body); 626 627 /** 628 * Get the current request body. 629 * @return null if not set. 630 */ 631 String requestBody(); 632 633 /** 634 * Specify the parser to use when parsing the document. 635 * @param parser parser to use. 636 * @return this Request, for chaining 637 */ 638 Request parser(Parser parser); 639 640 /** 641 * Get the current parser to use when parsing the document. 642 * @return current Parser 643 */ 644 Parser parser(); 645 646 /** 647 * Sets the post data character set for x-www-form-urlencoded post data 648 * @param charset character set to encode post data 649 * @return this Request, for chaining 650 */ 651 Request postDataCharset(String charset); 652 653 /** 654 * Gets the post data character set for x-www-form-urlencoded post data 655 * @return character set to encode post data 656 */ 657 String postDataCharset(); 658 659 } 660 661 /** 662 * Represents a HTTP response. 663 */ 664 interface Response extends Base<Response> { 665 666 /** 667 * Get the status code of the response. 668 * @return status code 669 */ 670 int statusCode(); 671 672 /** 673 * Get the status message of the response. 674 * @return status message 675 */ 676 String statusMessage(); 677 678 /** 679 * Get the character set name of the response, derived from the content-type header. 680 * @return character set name 681 */ 682 String charset(); 683 684 /** 685 * Set / override the response character set. When the document body is parsed it will be with this charset. 686 * @param charset to decode body as 687 * @return this Response, for chaining 688 */ 689 Response charset(String charset); 690 691 /** 692 * Get the response content type (e.g. "text/html"); 693 * @return the response content type 694 */ 695 String contentType(); 696 697 /** 698 * Read and parse the body of the response as a Document. If you intend to parse the same response multiple 699 * times, you should {@link #bufferUp()} first. 700 * @return a parsed Document 701 * @throws IOException on error 702 */ 703 Document parse() throws IOException; 704 705 /** 706 * Get the body of the response as a plain string. 707 * @return body 708 */ 709 String body(); 710 711 /** 712 * Get the body of the response as an array of bytes. 713 * @return body bytes 714 */ 715 byte[] bodyAsBytes(); 716 717 /** 718 * Read the body of the response into a local buffer, so that {@link #parse()} may be called repeatedly on the 719 * same connection response (otherwise, once the response is read, its InputStream will have been drained and 720 * may not be re-read). Calling {@link #body() } or {@link #bodyAsBytes()} has the same effect. 721 * @return this response, for chaining 722 */ 723 Response bufferUp(); 724 725 /** 726 * Get the body of the response as a (buffered) InputStream. You should close the input stream when you're done with it. 727 * Other body methods (like bufferUp, body, parse, etc) will not work in conjunction with this method. 728 * <p>This method is useful for writing large responses to disk, without buffering them completely into memory first.</p> 729 * @return the response body input stream 730 */ 731 BufferedInputStream bodyStream(); 732 } 733 734 /** 735 * A Key:Value tuple(+), used for form data. 736 */ 737 interface KeyVal { 738 739 /** 740 * Update the key of a keyval 741 * @param key new key 742 * @return this KeyVal, for chaining 743 */ 744 KeyVal key(String key); 745 746 /** 747 * Get the key of a keyval 748 * @return the key 749 */ 750 String key(); 751 752 /** 753 * Update the value of a keyval 754 * @param value the new value 755 * @return this KeyVal, for chaining 756 */ 757 KeyVal value(String value); 758 759 /** 760 * Get the value of a keyval 761 * @return the value 762 */ 763 String value(); 764 765 /** 766 * Add or update an input stream to this keyVal 767 * @param inputStream new input stream 768 * @return this KeyVal, for chaining 769 */ 770 KeyVal inputStream(InputStream inputStream); 771 772 /** 773 * Get the input stream associated with this keyval, if any 774 * @return input stream if set, or null 775 */ 776 InputStream inputStream(); 777 778 /** 779 * Does this keyval have an input stream? 780 * @return true if this keyval does indeed have an input stream 781 */ 782 boolean hasInputStream(); 783 784 /** 785 * Set the Content Type header used in the MIME body (aka mimetype) when uploading files. 786 * Only useful if {@link #inputStream(InputStream)} is set. 787 * <p>Will default to {@code application/octet-stream}.</p> 788 * @param contentType the new content type 789 * @return this KeyVal 790 */ 791 KeyVal contentType(String contentType); 792 793 /** 794 * Get the current Content Type, or {@code null} if not set. 795 * @return the current Content Type. 796 */ 797 String contentType(); 798 } 799}