1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package org.htmlunit;
16
17 import static java.nio.charset.StandardCharsets.UTF_16BE;
18 import static java.nio.charset.StandardCharsets.UTF_16LE;
19 import static java.nio.charset.StandardCharsets.UTF_8;
20
21 import java.io.IOException;
22 import java.io.InputStream;
23 import java.io.Serializable;
24 import java.net.URL;
25 import java.nio.charset.Charset;
26 import java.util.List;
27
28 import org.apache.commons.io.ByteOrderMark;
29 import org.apache.commons.io.IOUtils;
30 import org.apache.commons.io.input.BOMInputStream;
31 import org.apache.commons.lang3.StringUtils;
32 import org.apache.commons.logging.Log;
33 import org.apache.commons.logging.LogFactory;
34 import org.htmlunit.http.HttpStatus;
35 import org.htmlunit.util.EncodingSniffer;
36 import org.htmlunit.util.MimeType;
37 import org.htmlunit.util.NameValuePair;
38
39
40
41
42
43
44
45
46
47
48
49
50 public class WebResponse implements Serializable {
51
52 private static final Log LOG = LogFactory.getLog(WebResponse.class);
53 private static final ByteOrderMark[] BOM_HEADERS = {
54 ByteOrderMark.UTF_8,
55 ByteOrderMark.UTF_16LE,
56 ByteOrderMark.UTF_16BE};
57
58 private final long loadTime_;
59 private final WebResponseData responseData_;
60 private final WebRequest request_;
61 private boolean wasContentCharsetTentative_;
62 private boolean wasBlocked_;
63 private String blockReason_;
64
65
66
67
68
69
70
71
72
73 public WebResponse(final WebResponseData responseData, final URL url,
74 final HttpMethod requestMethod, final long loadTime) {
75 this(responseData, new WebRequest(url, requestMethod), loadTime);
76 }
77
78
79
80
81
82
83
84
85 public WebResponse(final WebResponseData responseData,
86 final WebRequest request, final long loadTime) {
87 responseData_ = responseData;
88 request_ = request;
89 loadTime_ = loadTime;
90 }
91
92
93
94
95
96 public WebRequest getWebRequest() {
97 return request_;
98 }
99
100
101
102
103
104 public List<NameValuePair> getResponseHeaders() {
105 return responseData_.getResponseHeaders();
106 }
107
108
109
110
111
112
113 public String getResponseHeaderValue(final String headerName) {
114 for (final NameValuePair pair : responseData_.getResponseHeaders()) {
115 if (pair.getName().equalsIgnoreCase(headerName)) {
116 return pair.getValue();
117 }
118 }
119 return null;
120 }
121
122
123
124
125
126 public int getStatusCode() {
127 return responseData_.getStatusCode();
128 }
129
130
131
132
133
134 public String getStatusMessage() {
135 return responseData_.getStatusMessage();
136 }
137
138
139
140
141
142 public String getContentType() {
143 final String contentTypeHeader = getResponseHeaderValue(HttpHeader.CONTENT_TYPE_LC);
144 if (contentTypeHeader == null) {
145
146 return "";
147 }
148 final int index = contentTypeHeader.indexOf(';');
149 if (index == -1) {
150 return contentTypeHeader;
151 }
152 return contentTypeHeader.substring(0, index);
153 }
154
155
156
157
158
159
160 public Charset getHeaderContentCharset() {
161 final String contentType = getResponseHeaderValue(HttpHeader.CONTENT_TYPE_LC);
162 if (contentType == null) {
163 return null;
164 }
165
166 final int index = contentType.indexOf(';');
167 if (index == -1 || index == 0) {
168 return null;
169 }
170 if (StringUtils.isBlank(contentType.substring(0, index))) {
171 return null;
172 }
173
174 return EncodingSniffer.extractEncodingFromContentType(contentType);
175 }
176
177
178
179
180
181
182
183
184
185
186
187 public Charset getContentCharset() {
188 wasContentCharsetTentative_ = false;
189
190 try (InputStream is = getContentAsStreamWithBomIfApplicable()) {
191 if (is instanceof BOMInputStream) {
192 final String bomCharsetName = ((BOMInputStream) is).getBOMCharsetName();
193 if (bomCharsetName != null) {
194 return Charset.forName(bomCharsetName);
195 }
196 }
197
198 Charset charset = getHeaderContentCharset();
199 if (charset != null) {
200 return charset;
201 }
202
203 final String contentType = getContentType();
204 switch (DefaultPageCreator.determinePageType(contentType)) {
205 case HTML:
206 charset = EncodingSniffer.sniffEncodingFromMetaTag(is);
207 wasContentCharsetTentative_ = true;
208 break;
209 case XML:
210 charset = EncodingSniffer.sniffEncodingFromXmlDeclaration(is);
211 if (charset == null) {
212 charset = UTF_8;
213 }
214 break;
215 default:
216 if (MimeType.TEXT_CSS.equals(contentType)) {
217 charset = EncodingSniffer.sniffEncodingFromCssDeclaration(is);
218 }
219 break;
220 }
221
222 if (charset != null) {
223 return charset;
224 }
225 }
226 catch (final IOException e) {
227 LOG.warn("Error trying to sniff encoding.", e);
228 wasContentCharsetTentative_ = true;
229 }
230 return getWebRequest().getDefaultResponseContentCharset();
231 }
232
233
234
235
236
237
238
239
240
241
242
243
244
245 public boolean wasContentCharsetTentative() {
246 return wasContentCharsetTentative_;
247 }
248
249
250
251
252
253
254 public String getContentAsString() {
255 return getContentAsString(getContentCharset());
256 }
257
258
259
260
261
262
263
264
265 public String getContentAsString(final Charset encoding) {
266 if (responseData_ != null) {
267 try (InputStream in = responseData_.getInputStreamWithBomIfApplicable(BOM_HEADERS)) {
268 if (in instanceof BOMInputStream) {
269 try (BOMInputStream bomIn = (BOMInputStream) in) {
270
271
272 if (bomIn.hasBOM()) {
273 if (bomIn.hasBOM(ByteOrderMark.UTF_8)) {
274 return IOUtils.toString(bomIn, UTF_8);
275 }
276 if (bomIn.hasBOM(ByteOrderMark.UTF_16BE)) {
277 return IOUtils.toString(bomIn, UTF_16BE);
278 }
279 if (bomIn.hasBOM(ByteOrderMark.UTF_16LE)) {
280 return IOUtils.toString(bomIn, UTF_16LE);
281 }
282 }
283 return IOUtils.toString(bomIn, encoding);
284 }
285 }
286
287 return IOUtils.toString(in, encoding);
288 }
289 catch (final IOException e) {
290 LOG.warn(e.getMessage(), e);
291 }
292 }
293 return null;
294 }
295
296
297
298
299
300 public long getContentLength() {
301 if (responseData_ == null) {
302 return 0;
303 }
304 return responseData_.getContentLength();
305 }
306
307
308
309
310
311
312 public InputStream getContentAsStream() throws IOException {
313 return responseData_.getInputStream();
314 }
315
316
317
318
319
320
321
322 public InputStream getContentAsStreamWithBomIfApplicable() throws IOException {
323 if (responseData_ != null) {
324 return responseData_.getInputStreamWithBomIfApplicable(BOM_HEADERS);
325 }
326 return null;
327 }
328
329
330
331
332
333 public long getLoadTime() {
334 return loadTime_;
335 }
336
337
338
339
340 public void cleanUp() {
341 if (responseData_ != null) {
342 responseData_.cleanUp();
343 }
344 }
345
346
347
348
349 public boolean isSuccess() {
350 final int statusCode = getStatusCode();
351 return statusCode >= HttpStatus.OK_200 && statusCode < HttpStatus.MULTIPLE_CHOICES_300;
352 }
353
354
355
356
357 public boolean isSuccessOrUseProxy() {
358 final int statusCode = getStatusCode();
359 return (statusCode >= HttpStatus.OK_200 && statusCode < HttpStatus.MULTIPLE_CHOICES_300)
360 || statusCode == HttpStatus.USE_PROXY_305;
361 }
362
363
364
365
366 public boolean isSuccessOrUseProxyOrNotModified() {
367 final int statusCode = getStatusCode();
368 return (statusCode >= HttpStatus.OK_200 && statusCode < HttpStatus.MULTIPLE_CHOICES_300)
369 || statusCode == HttpStatus.USE_PROXY_305
370 || statusCode == HttpStatus.NOT_MODIFIED_304;
371 }
372
373
374
375
376 public boolean wasBlocked() {
377 return wasBlocked_;
378 }
379
380
381
382
383 public String getBlockReason() {
384 return blockReason_;
385 }
386
387
388
389
390
391
392 public void markAsBlocked(final String blockReason) {
393 wasBlocked_ = true;
394 blockReason_ = blockReason;
395 }
396 }