View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.html.parser;
16  
17  import static java.nio.charset.StandardCharsets.ISO_8859_1;
18  import static java.nio.charset.StandardCharsets.UTF_8;
19  
20  import java.io.IOException;
21  import java.io.OutputStreamWriter;
22  import java.io.Writer;
23  import java.nio.charset.Charset;
24  import java.util.HashMap;
25  import java.util.Map;
26  
27  import javax.servlet.Servlet;
28  import javax.servlet.http.HttpServlet;
29  import javax.servlet.http.HttpServletRequest;
30  import javax.servlet.http.HttpServletResponse;
31  
32  import org.htmlunit.WebClient;
33  import org.htmlunit.WebServerTestCase;
34  import org.htmlunit.html.HtmlPage;
35  import org.htmlunit.junit.BrowserRunner;
36  import org.htmlunit.util.MimeType;
37  import org.junit.Test;
38  import org.junit.runner.RunWith;
39  
40  /**
41   * Test class for {@link HTMLParser}.
42   *
43   * @author <a href="mailto:cse@dynabean.de">Christian Sell</a>
44   * @author Marc Guillemot
45   * @author Ahmed Ashour
46   * @author Sudhan Moghe
47   */
48  @RunWith(BrowserRunner.class)
49  public class HTMLParser3Test extends WebServerTestCase {
50  
51      /**
52       * @throws Exception if the test fails
53       */
54      @Test
55      public void headerVsMetaTagContentType_both() throws Exception {
56          HeaderVsMetaTagContentTypeServlet.setEncoding(UTF_8, ISO_8859_1);
57          headerVsMetaTagContentType(true);
58      }
59  
60      /**
61       * @throws Exception if the test fails
62       */
63      @Test
64      public void headerVsMetaTagContentType_bothReversed() throws Exception {
65          HeaderVsMetaTagContentTypeServlet.setEncoding(ISO_8859_1, UTF_8);
66          headerVsMetaTagContentType(false);
67      }
68  
69      /**
70       * @throws Exception if the test fails
71       */
72      @Test
73      public void headerVsMetaTagContentType4_headerOnly() throws Exception {
74          HeaderVsMetaTagContentTypeServlet.setEncoding(UTF_8, null);
75          headerVsMetaTagContentType(true);
76      }
77  
78      /**
79       * @throws Exception if the test fails
80       */
81      @Test
82      public void headerVsMetaTagContentType_metaOnly() throws Exception {
83          HeaderVsMetaTagContentTypeServlet.setEncoding(null, UTF_8);
84          headerVsMetaTagContentType(true);
85      }
86  
87      private void headerVsMetaTagContentType(final boolean utf8Encoded) throws Exception {
88          final Map<String, Class<? extends Servlet>> servlets = new HashMap<>();
89          servlets.put("/test", HeaderVsMetaTagContentTypeServlet.class);
90          startWebServer("./", null, servlets);
91  
92          final WebClient client = getWebClient();
93          final HtmlPage page = client.getPage(URL_FIRST + "test");
94          assertEquals(utf8Encoded, HeaderVsMetaTagContentTypeServlet.UTF8_STRING.equals(page.asNormalizedText()));
95      }
96  
97      /**
98       * Servlet for headerVsMetaTagContentType(boolean).
99       */
100     public static class HeaderVsMetaTagContentTypeServlet extends HttpServlet {
101         private static final String UTF8_STRING = "\u064A\u0627 \u0644\u064A\u064A\u064A\u064A\u0644";
102         private static Charset HEADER_ENCODING_;
103         private static Charset META_TAG_ENCODING_;
104 
105         private static void setEncoding(final Charset headerEncoding, final Charset metaTagEncoding) {
106             HEADER_ENCODING_ = headerEncoding;
107             META_TAG_ENCODING_ = metaTagEncoding;
108         }
109 
110         /**
111          * {@inheritDoc}
112          */
113         @Override
114         protected void doGet(final HttpServletRequest request, final HttpServletResponse response) throws IOException {
115             response.setContentType(MimeType.TEXT_HTML);
116             if (HEADER_ENCODING_ != null) {
117                 response.setCharacterEncoding(HEADER_ENCODING_.name());
118             }
119             try (Writer writer = new OutputStreamWriter(response.getOutputStream(), UTF_8)) {
120                 String html = "<html><head>";
121                 if (META_TAG_ENCODING_ != null) {
122                     html += "<META HTTP-EQUIV='Content-Type' CONTENT='text/html; charset=" + META_TAG_ENCODING_ + "'>";
123                 }
124                 html += "</head><body>" + UTF8_STRING + "</body></html>";
125                 writer.write(html);
126             }
127         }
128     }
129 
130 }