View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.html.parser;
16  
17  import static java.nio.charset.StandardCharsets.ISO_8859_1;
18  import static java.nio.charset.StandardCharsets.UTF_8;
19  
20  import java.io.IOException;
21  import java.io.OutputStreamWriter;
22  import java.io.Writer;
23  import java.nio.charset.Charset;
24  import java.util.HashMap;
25  import java.util.Map;
26  
27  import javax.servlet.Servlet;
28  import javax.servlet.http.HttpServlet;
29  import javax.servlet.http.HttpServletRequest;
30  import javax.servlet.http.HttpServletResponse;
31  
32  import org.htmlunit.WebClient;
33  import org.htmlunit.WebServerTestCase;
34  import org.htmlunit.html.HtmlPage;
35  import org.htmlunit.util.MimeType;
36  import org.junit.jupiter.api.Test;
37  
38  /**
39   * Test class for {@link HTMLParser}.
40   *
41   * @author Christian Sell
42   * @author Marc Guillemot
43   * @author Ahmed Ashour
44   * @author Sudhan Moghe
45   */
46  public class HTMLParser3Test extends WebServerTestCase {
47  
48      /**
49       * @throws Exception if the test fails
50       */
51      @Test
52      public void headerVsMetaTagContentType_both() throws Exception {
53          HeaderVsMetaTagContentTypeServlet.setEncoding(UTF_8, ISO_8859_1);
54          headerVsMetaTagContentType(true);
55      }
56  
57      /**
58       * @throws Exception if the test fails
59       */
60      @Test
61      public void headerVsMetaTagContentType_bothReversed() throws Exception {
62          HeaderVsMetaTagContentTypeServlet.setEncoding(ISO_8859_1, UTF_8);
63          headerVsMetaTagContentType(false);
64      }
65  
66      /**
67       * @throws Exception if the test fails
68       */
69      @Test
70      public void headerVsMetaTagContentType4_headerOnly() throws Exception {
71          HeaderVsMetaTagContentTypeServlet.setEncoding(UTF_8, null);
72          headerVsMetaTagContentType(true);
73      }
74  
75      /**
76       * @throws Exception if the test fails
77       */
78      @Test
79      public void headerVsMetaTagContentType_metaOnly() throws Exception {
80          HeaderVsMetaTagContentTypeServlet.setEncoding(null, UTF_8);
81          headerVsMetaTagContentType(true);
82      }
83  
84      private void headerVsMetaTagContentType(final boolean utf8Encoded) throws Exception {
85          final Map<String, Class<? extends Servlet>> servlets = new HashMap<>();
86          servlets.put("/test", HeaderVsMetaTagContentTypeServlet.class);
87          startWebServer("./", null, servlets);
88  
89          final WebClient client = getWebClient();
90          final HtmlPage page = client.getPage(URL_FIRST + "test");
91          assertEquals(utf8Encoded, HeaderVsMetaTagContentTypeServlet.UTF8_STRING.equals(page.asNormalizedText()));
92      }
93  
94      /**
95       * Servlet for headerVsMetaTagContentType(boolean).
96       */
97      public static class HeaderVsMetaTagContentTypeServlet extends HttpServlet {
98          private static final String UTF8_STRING = "\u064A\u0627 \u0644\u064A\u064A\u064A\u064A\u0644";
99          private static Charset HEADER_ENCODING_;
100         private static Charset META_TAG_ENCODING_;
101 
102         private static void setEncoding(final Charset headerEncoding, final Charset metaTagEncoding) {
103             HEADER_ENCODING_ = headerEncoding;
104             META_TAG_ENCODING_ = metaTagEncoding;
105         }
106 
107         /**
108          * {@inheritDoc}
109          */
110         @Override
111         protected void doGet(final HttpServletRequest request, final HttpServletResponse response) throws IOException {
112             response.setContentType(MimeType.TEXT_HTML);
113             if (HEADER_ENCODING_ != null) {
114                 response.setCharacterEncoding(HEADER_ENCODING_.name());
115             }
116             try (Writer writer = new OutputStreamWriter(response.getOutputStream(), UTF_8)) {
117                 String html = "<html><head>";
118                 if (META_TAG_ENCODING_ != null) {
119                     html += "<META HTTP-EQUIV='Content-Type' CONTENT='text/html; charset=" + META_TAG_ENCODING_ + "'>";
120                 }
121                 html += "</head><body>" + UTF8_STRING + "</body></html>";
122                 writer.write(html);
123             }
124         }
125     }
126 
127 }