View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit;
16  
17  import static java.nio.charset.StandardCharsets.ISO_8859_1;
18  
19  import java.io.IOException;
20  import java.io.InputStream;
21  import java.io.Serializable;
22  import java.util.Collections;
23  import java.util.List;
24  import java.util.zip.GZIPInputStream;
25  import java.util.zip.Inflater;
26  import java.util.zip.InflaterInputStream;
27  
28  import org.apache.commons.io.ByteOrderMark;
29  import org.apache.commons.io.IOUtils;
30  import org.apache.commons.io.input.BOMInputStream;
31  import org.apache.commons.lang3.ArrayUtils;
32  import org.apache.commons.lang3.StringUtils;
33  import org.apache.commons.logging.Log;
34  import org.apache.commons.logging.LogFactory;
35  import org.brotli.dec.BrotliInputStream;
36  import org.htmlunit.util.MimeType;
37  import org.htmlunit.util.NameValuePair;
38  
39  /**
40   * Simple data object to simplify WebResponse creation.
41   *
42   * @author Brad Clarke
43   * @author Daniel Gredler
44   * @author Ahmed Ashour
45   * @author Ronald Brill
46   */
47  public class WebResponseData implements Serializable {
48      private static final Log LOG = LogFactory.getLog(WebResponseData.class);
49  
50      private final int statusCode_;
51      private final String statusMessage_;
52      private final List<NameValuePair> responseHeaders_;
53      private final DownloadedContent downloadedContent_;
54  
55      /**
56       * Constructs with a raw byte[] (mostly for testing).
57       *
58       * @param body              Body of this response
59       * @param statusCode        Status code from the server
60       * @param statusMessage     Status message from the server
61       * @param responseHeaders   Headers in this response
62       */
63      public WebResponseData(final byte[] body, final int statusCode, final String statusMessage,
64              final List<NameValuePair> responseHeaders) {
65          this(new DownloadedContent.InMemory(body), statusCode, statusMessage, responseHeaders);
66      }
67  
68      /**
69       * Constructs without data stream for subclasses that override getBody().
70       *
71       * @param statusCode        Status code from the server
72       * @param statusMessage     Status message from the server
73       * @param responseHeaders   Headers in this response
74       */
75      protected WebResponseData(final int statusCode,
76              final String statusMessage, final List<NameValuePair> responseHeaders) {
77          this(ArrayUtils.EMPTY_BYTE_ARRAY, statusCode, statusMessage, responseHeaders);
78      }
79  
80      /**
81       * Constructor.
82       * @param downloadedContent the downloaded content
83       * @param statusCode        Status code from the server
84       * @param statusMessage     Status message from the server
85       * @param responseHeaders   Headers in this response
86       */
87      public WebResponseData(final DownloadedContent downloadedContent, final int statusCode, final String statusMessage,
88              final List<NameValuePair> responseHeaders) {
89          statusCode_ = statusCode;
90          statusMessage_ = statusMessage;
91          responseHeaders_ = Collections.unmodifiableList(responseHeaders);
92          downloadedContent_ = downloadedContent;
93      }
94  
95      private InputStream getStream(final ByteOrderMark... bomHeaders) throws IOException {
96          InputStream stream = downloadedContent_.getInputStream();
97          if (downloadedContent_.isEmpty()) {
98              return stream;
99          }
100 
101         final List<NameValuePair> headers = getResponseHeaders();
102         final String encoding = getHeader(headers, "content-encoding");
103         if (encoding != null) {
104             boolean isGzip = StringUtils.contains(encoding, "gzip") && !"no-gzip".equals(encoding);
105             if ("gzip-only-text/html".equals(encoding)) {
106                 isGzip = MimeType.TEXT_HTML.equals(getHeader(headers, "content-type"));
107             }
108             if (isGzip) {
109                 try {
110                     stream = new GZIPInputStream(stream);
111                 }
112                 catch (final IOException e) {
113                     LOG.error("Reading gzip encodec content failed.", e);
114                     stream.close();
115                     stream = IOUtils.toInputStream(
116                                 "<!DOCTYPE html><html>\n"
117                                  + "<head><title>Problem loading page</title></head>\n"
118                                  + "<body>\n"
119                                  + "<h1>Content Encoding Error</h1>\n"
120                                  + "<p>The page you are trying to view cannot be shown because"
121                                  + " it uses an invalid or unsupported form of compression.</p>\n"
122                                  + "</body>\n"
123                                  + "</html>", ISO_8859_1);
124                 }
125                 if (stream != null && bomHeaders != null) {
126                     stream = new BOMInputStream(stream, bomHeaders);
127                 }
128                 return stream;
129             }
130 
131             if ("br".equals(encoding)) {
132                 try {
133                     stream = new BrotliInputStream(stream);
134                 }
135                 catch (final IOException e) {
136                     LOG.error("Reading Brotli encodec content failed.", e);
137                     stream.close();
138                     stream = IOUtils.toInputStream(
139                                 "<!DOCTYPE html><html>\n"
140                                  + "<head><title>Problem loading page</title></head>\n"
141                                  + "<body>\n"
142                                  + "<h1>Content Encoding Error</h1>\n"
143                                  + "<p>The page you are trying to view cannot be shown because"
144                                  + " it uses an invalid or unsupported form of compression.</p>\n"
145                                  + "</body>\n"
146                                  + "</html>", ISO_8859_1);
147                 }
148                 return stream;
149             }
150 
151             if (StringUtils.contains(encoding, "deflate")) {
152                 boolean zlibHeader = false;
153                 if (stream.markSupported()) { // should be always the case as the content is in a byte[] or in a file
154                     stream.mark(2);
155                     final byte[] buffer = new byte[2];
156                     final int byteCount = IOUtils.read(stream, buffer, 0, 2);
157                     zlibHeader = byteCount == 2 && (((buffer[0] & 0xff) << 8) | (buffer[1] & 0xff)) == 0x789c;
158                     stream.reset();
159                 }
160                 if (zlibHeader) {
161                     stream = new InflaterInputStream(stream);
162                 }
163                 else {
164                     stream = new InflaterInputStream(stream, new Inflater(true));
165                 }
166                 return stream;
167             }
168         }
169 
170         if (stream != null && bomHeaders != null) {
171             stream = new BOMInputStream(stream, bomHeaders);
172         }
173         return stream;
174     }
175 
176     private static String getHeader(final List<NameValuePair> headers, final String name) {
177         for (final NameValuePair header : headers) {
178             final String headerName = header.getName().trim();
179             if (name.equalsIgnoreCase(headerName)) {
180                 return header.getValue();
181             }
182         }
183 
184         return null;
185     }
186 
187     /**
188      * Returns the response body.
189      * This may cause memory problem for very large responses.
190      * @return response body
191      */
192     public byte[] getBody() {
193         try (InputStream is = getInputStream()) {
194             return IOUtils.toByteArray(is);
195         }
196         catch (final IOException e) {
197             throw new RuntimeException(e); // shouldn't we allow the method to throw IOException?
198         }
199     }
200 
201     /**
202      * Returns a new {@link InputStream} allowing to read the downloaded content.
203      * @return the associated InputStream
204      * @throws IOException in case of IO problems
205      */
206     public InputStream getInputStream() throws IOException {
207         return getStream((ByteOrderMark[]) null);
208     }
209 
210     /**
211      * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br>
212      *
213      * @param bomHeaders the supported bomHeaders
214      * @return the associated InputStream wrapped with a bom input stream if applicable
215      * @throws IOException in case of IO problems
216      */
217     public InputStream getInputStreamWithBomIfApplicable(final ByteOrderMark... bomHeaders) throws IOException {
218         return getStream(bomHeaders);
219     }
220 
221     /**
222      * @return response headers
223      */
224     public List<NameValuePair> getResponseHeaders() {
225         return responseHeaders_;
226     }
227 
228     /**
229      * @return response status code
230      */
231     public int getStatusCode() {
232         return statusCode_;
233     }
234 
235     /**
236      * @return response status message
237      */
238     public String getStatusMessage() {
239         return statusMessage_;
240     }
241 
242     /**
243      * Returns length of the content data.
244      * @return the length
245      */
246     public long getContentLength() {
247         return downloadedContent_.length();
248     }
249 
250     /**
251      * Clean up the downloaded content.
252      */
253     public void cleanUp() {
254         downloadedContent_.cleanUp();
255     }
256 }