1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package org.htmlunit.protocol.data;
16
17 import static java.nio.charset.StandardCharsets.US_ASCII;
18 import static org.htmlunit.protocol.data.DataURLConnection.DATA_PREFIX;
19
20 import java.io.UnsupportedEncodingException;
21 import java.net.URL;
22 import java.nio.charset.Charset;
23 import java.nio.charset.IllegalCharsetNameException;
24 import java.nio.charset.UnsupportedCharsetException;
25 import java.util.Base64;
26
27 import org.apache.commons.lang3.StringUtils;
28 import org.htmlunit.util.MimeType;
29 import org.htmlunit.util.UrlUtils;
30
31
32
33
34
35
36
37
38 public class DataUrlDecoder {
39 private static final Charset DEFAULT_CHARSET = US_ASCII;
40 private static final String DEFAULT_MEDIA_TYPE = MimeType.TEXT_PLAIN;
41 private final String mediaType_;
42 private final Charset charset_;
43 private final byte[] content_;
44
45
46
47
48
49
50
51 protected DataUrlDecoder(final byte[] data, final String mediaType, final Charset charset) {
52 content_ = data;
53 mediaType_ = mediaType;
54 charset_ = charset;
55 }
56
57
58
59
60
61
62
63 public static DataUrlDecoder decode(final URL url) throws UnsupportedEncodingException {
64 return decodeDataURL(url.toExternalForm());
65 }
66
67
68
69
70
71
72
73
74 public static DataUrlDecoder decodeDataURL(final String url) throws UnsupportedEncodingException {
75 if (!url.startsWith(DATA_PREFIX)) {
76 throw new UnsupportedEncodingException("Invalid data url: '" + url + "' (wrong prefix)");
77 }
78 final int comma = url.indexOf(',');
79 if (comma < 0) {
80 throw new UnsupportedEncodingException("Invalid data url: '" + url + "' (no data)");
81 }
82
83 String beforeData = url.substring(DATA_PREFIX.length(), comma);
84 final boolean base64 = beforeData.endsWith(";base64");
85 if (base64) {
86 beforeData = beforeData.substring(0, beforeData.length() - 7);
87 }
88 final String mediaType = extractMediaType(beforeData);
89 final Charset charset = extractCharset(beforeData);
90
91 try {
92 byte[] data = url.substring(comma + 1).getBytes(charset);
93 data = UrlUtils.decodeDataUrl(data, base64);
94 if (base64) {
95 data = Base64.getDecoder().decode(data);
96 }
97 return new DataUrlDecoder(data, mediaType, charset);
98 }
99 catch (final IllegalArgumentException e) {
100 final UnsupportedEncodingException ex =
101 new UnsupportedEncodingException("Invalid data url: '" + url + "' (data decoding failed)");
102 ex.initCause(e);
103 throw ex;
104 }
105 }
106
107 private static Charset extractCharset(final String beforeData) {
108 if (beforeData.contains(";")) {
109 String charsetName = StringUtils.substringAfter(beforeData, ";");
110 charsetName = charsetName.trim();
111 if (charsetName.startsWith("charset=")) {
112 charsetName = charsetName.substring(8);
113 }
114 try {
115 return Charset.forName(charsetName);
116 }
117 catch (final UnsupportedCharsetException | IllegalCharsetNameException e) {
118 return DEFAULT_CHARSET;
119 }
120 }
121 return DEFAULT_CHARSET;
122 }
123
124 private static String extractMediaType(final String beforeData) {
125 if (beforeData.contains("/")) {
126 if (beforeData.contains(";")) {
127 return StringUtils.substringBefore(beforeData, ";");
128 }
129 return beforeData;
130 }
131 return DEFAULT_MEDIA_TYPE;
132 }
133
134
135
136
137
138 public String getMediaType() {
139 return mediaType_;
140 }
141
142
143
144
145
146 public String getCharset() {
147 return charset_.name();
148 }
149
150
151
152
153
154 public byte[] getBytes() {
155 return content_;
156 }
157
158
159
160
161
162
163
164 public String getDataAsString() throws UnsupportedEncodingException {
165 return new String(content_, charset_);
166 }
167 }