1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 package org.htmlunit.util;
16
17 import static java.nio.charset.StandardCharsets.UTF_8;
18 import static org.htmlunit.util.EncodingSniffer.extractEncodingFromContentType;
19 import static org.htmlunit.util.EncodingSniffer.sniffEncodingFromCssDeclaration;
20 import static org.htmlunit.util.EncodingSniffer.sniffEncodingFromMetaTag;
21 import static org.htmlunit.util.EncodingSniffer.sniffEncodingFromXmlDeclaration;
22 import static org.junit.Assert.assertSame;
23
24 import java.io.ByteArrayInputStream;
25 import java.nio.charset.Charset;
26
27 import org.htmlunit.HttpHeader;
28 import org.junit.Test;
29
30
31
32
33
34
35
36
37
38 public class EncodingSnifferTest {
39
40
41
42
43 @Test
44 public void fromHttpHeaders() throws Exception {
45 header(null, null, null);
46 header(null, "foo", "bar");
47 header(null, HttpHeader.CONTENT_TYPE, "blah");
48
49 header(null, HttpHeader.CONTENT_TYPE, "text/html;charset=blah");
50 header(UTF_8, HttpHeader.CONTENT_TYPE, "text/html;charset=utf-8");
51 header(UTF_8, HttpHeader.CONTENT_TYPE, "text/html;charset=utf-8;");
52
53 header(UTF_8, HttpHeader.CONTENT_TYPE, "text/xml;charset=UTF-8");
54 }
55
56
57
58
59 @Test
60 public void fromHttpHeadersNoContentType() throws Exception {
61 header(null, HttpHeader.CONTENT_TYPE, "charset=blah");
62 header(UTF_8, HttpHeader.CONTENT_TYPE, "charset=utf-8");
63 header(UTF_8, HttpHeader.CONTENT_TYPE, ";charset=utf-8;");
64 }
65
66 private static void header(final Charset expectedEncoding, final String headerName, final String headerValue) {
67 assertSame(expectedEncoding, extractEncodingFromContentType(headerValue));
68 }
69
70
71
72
73 @Test
74 public void fromMetaTag() throws Exception {
75 meta(null, "");
76 meta(null, "foo");
77 meta(null, "<!--");
78 meta(null, " <!-- blah");
79 meta(null, " <!-- blah --> ");
80 meta(null, "<");
81 meta(null, "</");
82 meta(null, "<meta/>");
83 meta(null, "<meta />");
84 meta(null, "<meta blah />");
85 meta(null, "<meta");
86 meta(null, "<meta ");
87 meta(null, "<meta blah");
88 meta(null, "<meta blah ");
89 meta(null, "<meta a='b'");
90 meta(null, "<meta a='b' c=d e=\"f\"/>");
91 meta(null, "<meta a='b' c=d e=\"f\" content='text/html; charset=blah' />");
92 meta(UTF_8, "<meta a='b' c=d e=\"f\" content='text/html; charset=utf-8' />");
93 meta(UTF_8, "abc <meta http-equiv='Content-Type' content='text/html; charset=utf-8'/>");
94 meta(UTF_8, "abc <meta http-equiv='Content-Type' content='text/html; CHARSET=UTF-8'/>");
95 meta(UTF_8, "abc <meta http-equiv='Content-Type' content='text/html; chArsEt=UtF-8'/>");
96 meta(UTF_8, "<meta a='b' c=d e=\"f\" CONTENT='text/html; CHARSET=utf-8' />");
97 }
98
99 private static void meta(final Charset expectedEncoding, final String content) throws Exception {
100 assertSame(expectedEncoding, sniffEncodingFromMetaTag(new ByteArrayInputStream(content.getBytes())));
101 }
102
103
104
105
106 @Test
107 public void fromXmlDeclaration() throws Exception {
108 xmlDeclaration(null, "");
109 xmlDeclaration(null, "foo");
110 xmlDeclaration(null, "<?");
111 xmlDeclaration(null, "<?xml");
112 xmlDeclaration(null, "<?xml ");
113 xmlDeclaration(null, "<?xml encoding");
114 xmlDeclaration(null, "<?xml encoding=");
115 xmlDeclaration(null, "<?xml encoding='utf-8");
116 xmlDeclaration(null, "<?xml encoding='utf-8'");
117 xmlDeclaration(null, "<?xml encoding='blah'?>");
118 xmlDeclaration(UTF_8, "<?xml encoding='utf-8'?>");
119 xmlDeclaration(null, "<?xml encoding=\"utf-8");
120 xmlDeclaration(null, "<?xml encoding=\"utf-8\"");
121 xmlDeclaration(UTF_8, "<?xml encoding=\"utf-8\"?>");
122 }
123
124 private static void xmlDeclaration(final Charset expectedEncoding, final String content) throws Exception {
125 assertSame(expectedEncoding, sniffEncodingFromXmlDeclaration(new ByteArrayInputStream(content.getBytes())));
126 }
127
128
129
130
131 @Test
132 public void fromCssDeclaration() throws Exception {
133 cssDeclaration(null, "");
134 cssDeclaration(null, "foo");
135 cssDeclaration(null, "@charset");
136 cssDeclaration(null, "@charset \"utf-8");
137 cssDeclaration(null, "@charset \"utf-8\"");
138 cssDeclaration(null, "@charset\"utf-8\";");
139 cssDeclaration(null, "@charset 'utf-8';");
140 cssDeclaration(UTF_8, "@charset \"utf-8\";");
141 cssDeclaration(null, " @charset \"utf-8\";");
142 cssDeclaration(null, "@charset \"blah\";");
143 }
144
145 private static void cssDeclaration(final Charset expectedEncoding, final String content) throws Exception {
146 assertSame(expectedEncoding, sniffEncodingFromCssDeclaration(new ByteArrayInputStream(content.getBytes())));
147 }
148
149
150
151
152 @Test
153 public void fromContentType() throws Exception {
154 contentType(null, null);
155 contentType(null, "");
156 contentType(null, " \t \n ");
157 contentType(null, "foo");
158 contentType(null, MimeType.TEXT_HTML);
159 contentType(null, "\n text/html \t");
160 contentType(null, "\n text/html ; char \t");
161 contentType(null, "\n text/html ; charset \t");
162 contentType(null, "\n text/html ; charset=");
163 contentType(null, "\n text/html ; charset= \t");
164 contentType(null, "\n text/html ; charset =");
165 contentType(null, "\n text/html ; charset = \n");
166 contentType(null, "\n text/html ; charset=blah");
167 contentType(UTF_8, "\n text/html ; charset=utf-8");
168 contentType(UTF_8, "\n text/html ; charset=utf-8;");
169 contentType(UTF_8, "\n text/html ; charset = \n utf-8 ");
170 contentType(UTF_8, "\n text/html ; charset = \n utf-8 ; ");
171 contentType(null, "\n text/html ; charset = \n'");
172 contentType(null, "\n text/html ; charset = \n' ");
173 contentType(null, "\n text/html ; charset = \n' utf-8");
174 contentType(UTF_8, "\n text/html ; charset = \n'utf-8'");
175 contentType(null, "\n text/html ; charset = \n\"");
176 contentType(null, "\n text/html ; charset = \n\" ");
177 contentType(null, "\n text/html ; charset = \n\" utf-8");
178 contentType(UTF_8, "\n text/html ; charset = \n\"utf-8\"");
179 }
180
181 private static void contentType(final Charset expectedEncoding, final String contentType) {
182 assertSame(expectedEncoding, extractEncodingFromContentType(contentType));
183 }
184
185 }