View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.javascript.host;
16  
17  import org.htmlunit.WebDriverTestCase;
18  import org.htmlunit.junit.BrowserRunner;
19  import org.htmlunit.junit.annotation.Alerts;
20  import org.junit.Test;
21  import org.junit.runner.RunWith;
22  
23  /**
24   * Tests for {@link TextDecoder}.
25   *
26   * @author Ronald Brill
27   * @see <a href="https://developer.mozilla.org/en-US/docs/Web/API/TextDecoder">TextDecoder() - Web APIs | MDN</a>
28   */
29  @RunWith(BrowserRunner.class)
30  public class TextDecoderTest extends WebDriverTestCase {
31  
32      /**
33       * @throws Exception on test failure
34       */
35      @Test
36      @Alerts({"utf-8", "utf-8", "utf-8", "utf-8"})
37      public void encoding() throws Exception {
38          final String html = DOCTYPE_HTML
39              + "<html>\n"
40              + "<head>\n"
41              + "  <script>\n"
42              + LOG_TITLE_FUNCTION
43              + "    function doTest() {\n"
44              + "      var enc = new TextDecoder();\n"
45              + "      log(enc.encoding);\n"
46  
47              + "      enc = new TextDecoder(undefined);\n"
48              + "      log(enc.encoding);\n"
49  
50              + "      enc = new TextDecoder('utf-8');\n"
51              + "      log(enc.encoding);\n"
52  
53              + "      enc = new TextDecoder('utf8');\n"
54              + "      log(enc.encoding);\n"
55  
56              + "    }\n"
57              + "  </script>\n"
58              + "</head>\n"
59              + "<body onload='doTest()'>\n"
60              + "</body></html>";
61  
62          loadPageVerifyTitle2(html);
63      }
64  
65      /**
66       * @throws Exception on test failure
67       */
68      @Test
69      @Alerts("utf-8")
70      public void encoding_utf8() throws Exception {
71          encoding("unicode-1-1-utf-8");
72          encoding("utf-8");
73          encoding("utf8");
74      }
75  
76      /**
77       * @throws Exception on test failure
78       */
79      @Test
80      @Alerts("ibm866")
81      public void encoding_ibm866() throws Exception {
82          encoding("866");
83          encoding("cp866");
84          encoding("csibm866");
85          encoding("ibm866");
86      }
87  
88      /**
89       * @throws Exception on test failure
90       */
91      @Test
92      @Alerts("iso-8859-2")
93      public void encoding_iso_8859_2() throws Exception {
94          encoding("csisolatin2");
95          encoding("iso-8859-2");
96          encoding("iso-ir-101");
97          encoding("iso8859-2");
98          encoding("iso88592");
99          encoding("iso_8859-2");
100         encoding("iso_8859-2:1987");
101         encoding("l2");
102         encoding("latin2");
103     }
104 
105     /**
106      * @throws Exception on test failure
107      */
108     @Test
109     @Alerts("iso-8859-3")
110     public void encoding_iso_8859_3() throws Exception {
111         encoding("csisolatin3");
112         encoding("iso-8859-3");
113         encoding("iso-ir-109");
114         encoding("iso8859-3");
115         encoding("iso88593");
116         encoding("iso_8859-3");
117         encoding("iso_8859-3:1988");
118         encoding("l3");
119         encoding("latin3");
120     }
121 
122     /**
123      * @throws Exception on test failure
124      */
125     @Test
126     @Alerts("iso-8859-4")
127     public void encoding_iso_8859_4() throws Exception {
128         encoding("csisolatin4");
129         encoding("iso-8859-4");
130         encoding("iso-ir-110");
131         encoding("iso8859-4");
132         encoding("iso88594");
133         encoding("iso_8859-4");
134         encoding("iso_8859-4:1988");
135         encoding("l4");
136         encoding("latin4");
137     }
138 
139     /**
140      * @throws Exception on test failure
141      */
142     @Test
143     @Alerts("iso-8859-5")
144     public void encoding_iso_8859_5() throws Exception {
145         encoding("csisolatincyrillic");
146         encoding("cyrillic");
147         encoding("iso-8859-5");
148         encoding("iso-ir-144");
149         encoding("iso88595");
150         encoding("iso_8859-5");
151         encoding("iso_8859-5:1988");
152     }
153 
154     /**
155      * @throws Exception on test failure
156      */
157     @Test
158     @Alerts("iso-8859-6")
159     public void encoding_iso_8859_6() throws Exception {
160         encoding("arabic");
161         encoding("asmo-708");
162         encoding("csiso88596e");
163         encoding("csiso88596i");
164         encoding("csisolatinarabic");
165         encoding("ecma-114");
166         encoding("iso-8859-6");
167         encoding("iso-8859-6-e");
168         encoding("iso-8859-6-i");
169         encoding("iso-ir-127");
170         encoding("iso8859-6");
171         encoding("iso88596");
172         encoding("iso_8859-6");
173         encoding("iso_8859-6:1987");
174     }
175 
176     /**
177      * @throws Exception on test failure
178      */
179     @Test
180     @Alerts("iso-8859-7")
181     public void encoding_iso_8859_7() throws Exception {
182         encoding("csisolatingreek");
183         encoding("ecma-118");
184         encoding("elot_928");
185         encoding("greek");
186         encoding("greek8");
187         encoding("iso-8859-7");
188         encoding("iso-ir-126");
189         encoding("iso8859-7");
190         encoding("iso88597");
191         encoding("iso_8859-7");
192         encoding("iso_8859-7:1987");
193         encoding("sun_eu_greek");
194     }
195 
196     /**
197      * @throws Exception on test failure
198      */
199     @Test
200     @Alerts("iso-8859-8")
201     public void encoding_iso_8859_8() throws Exception {
202         encoding("csiso88598e");
203         encoding("csisolatinhebrew");
204         encoding("hebrew");
205         encoding("iso-8859-8");
206         encoding("iso-8859-8-e");
207         encoding("iso-ir-138");
208         encoding("iso8859-8");
209         encoding("iso88598");
210         encoding("iso_8859-8");
211         encoding("iso_8859-8:1988");
212         encoding("visual");
213     }
214 
215     /**
216      * @throws Exception on test failure
217      */
218     @Test
219     @Alerts("iso-8859-8-i")
220     public void encoding_iso_8859_8i() throws Exception {
221         encoding("csiso88598i");
222         encoding("iso-8859-8-i");
223         encoding("logical");
224     }
225 
226     /**
227      * @throws Exception on test failure
228      */
229     @Test
230     @Alerts("iso-8859-10")
231     public void encoding_iso_8859_10() throws Exception {
232         encoding("csisolatin6");
233         encoding("iso-8859-10");
234         encoding("iso-ir-157");
235         encoding("iso8859-10");
236         encoding("iso885910");
237         encoding("l6");
238         encoding("latin6");
239     }
240 
241     /**
242      * @throws Exception on test failure
243      */
244     @Test
245     @Alerts("iso-8859-13")
246     public void encoding_iso_8859_13() throws Exception {
247         encoding("iso-8859-13");
248         encoding("iso8859-13");
249         encoding("iso885913");
250     }
251 
252     /**
253      * @throws Exception on test failure
254      */
255     @Test
256     @Alerts("iso-8859-14")
257     public void encoding_iso_8859_14() throws Exception {
258         encoding("iso-8859-14");
259         encoding("iso8859-14");
260         encoding("iso885914");
261     }
262 
263     /**
264      * @throws Exception on test failure
265      */
266     @Test
267     @Alerts("iso-8859-15")
268     public void encoding_iso_8859_15() throws Exception {
269         encoding("csisolatin9");
270         encoding("iso-8859-15");
271         encoding("iso8859-15");
272         encoding("iso885915");
273         encoding("l9");
274         // encoding("latin9");
275     }
276 
277     /**
278      * @throws Exception on test failure
279      */
280     @Test
281     @Alerts("RangeError")
282     public void encoding_iso_8859_15_ex() throws Exception {
283         encoding("latin9");
284     }
285 
286     /**
287      * @throws Exception on test failure
288      */
289     @Test
290     @Alerts("iso-8859-16")
291     public void encoding_iso_8859_16() throws Exception {
292         encoding("iso-8859-16");
293     }
294 
295     /**
296      * @throws Exception on test failure
297      */
298     @Test
299     @Alerts("koi8-r")
300     public void encoding_koi8_r() throws Exception {
301         encoding("cskoi8r");
302         encoding("koi");
303         encoding("koi8");
304         encoding("koi8-r");
305         encoding("koi8_r");
306     }
307 
308     /**
309      * @throws Exception on test failure
310      */
311     @Test
312     @Alerts("koi8-u")
313     public void encoding_koi8_u() throws Exception {
314         encoding("koi8-u");
315     }
316 
317     /**
318      * @throws Exception on test failure
319      */
320     @Test
321     @Alerts("macintosh")
322     public void encoding_macintosh() throws Exception {
323         encoding("csmacintosh");
324         encoding("mac");
325         encoding("macintosh");
326         encoding("x-mac-roman");
327     }
328 
329     /**
330      * @throws Exception on test failure
331      */
332     @Test
333     @Alerts("windows-874")
334     public void encoding_windows_874() throws Exception {
335         encoding("dos-874");
336         encoding("iso-8859-11");
337         encoding("iso8859-11");
338         encoding("iso885911");
339         encoding("tis-620");
340         encoding("windows-874");
341     }
342 
343     /**
344      * @throws Exception on test failure
345      */
346     @Test
347     @Alerts("windows-1250")
348     public void encoding_windows_1250() throws Exception {
349         encoding("cp1250");
350         encoding("windows-1250");
351         encoding("x-cp1250");
352     }
353 
354     /**
355      * @throws Exception on test failure
356      */
357     @Test
358     @Alerts("windows-1251")
359     public void encoding_windows_1251() throws Exception {
360         encoding("cp1251");
361         encoding("windows-1251");
362         encoding("x-cp1251");
363     }
364 
365     /**
366      * @throws Exception on test failure
367      */
368     @Test
369     @Alerts("windows-1252")
370     public void encoding_windows_1252() throws Exception {
371         encoding("ansi_x3.4-1968");
372         encoding("ascii");
373         encoding("cp1252");
374         encoding("cp819");
375         encoding("csisolatin1");
376         encoding("ibm819");
377         encoding("iso-8859-1");
378         encoding("iso-ir-100");
379         encoding("iso8859-1");
380         encoding("iso88591");
381         encoding("iso_8859-1");
382         encoding("iso_8859-1:1987");
383         encoding("l1");
384         encoding("latin1");
385         encoding("us-ascii");
386         encoding("windows-1252");
387         encoding("x-cp1252");
388     }
389 
390     /**
391      * @throws Exception on test failure
392      */
393     @Test
394     @Alerts("windows-1253")
395     public void encoding_windows_1253() throws Exception {
396         encoding("cp1253");
397         encoding("windows-1253");
398         encoding("x-cp1253");
399     }
400 
401     /**
402      * @throws Exception on test failure
403      */
404     @Test
405     @Alerts("windows-1254")
406     public void encoding_windows_1254() throws Exception {
407         encoding("cp1254");
408         encoding("csisolatin5");
409         encoding("iso-8859-9");
410         encoding("iso-ir-148");
411         encoding("iso8859-9");
412         encoding("iso88599");
413         encoding("iso_8859-9");
414         encoding("iso_8859-9:1989");
415         encoding("l5");
416         encoding("latin5");
417         encoding("windows-1254");
418         encoding("x-cp1254");
419     }
420 
421     /**
422      * @throws Exception on test failure
423      */
424     @Test
425     @Alerts("windows-1255")
426     public void encoding_windows_1255() throws Exception {
427         encoding("cp1255");
428         encoding("windows-1255");
429         encoding("x-cp1255");
430     }
431 
432     /**
433      * @throws Exception on test failure
434      */
435     @Test
436     @Alerts("windows-1256")
437     public void encoding_windows_1256() throws Exception {
438         encoding("cp1256");
439         encoding("windows-1256");
440         encoding("x-cp1256");
441     }
442 
443     /**
444      * @throws Exception on test failure
445      */
446     @Test
447     @Alerts("windows-1257")
448     public void encoding_windows_1257() throws Exception {
449         encoding("cp1257");
450         encoding("windows-1257");
451         encoding("x-cp1257");
452     }
453 
454     /**
455      * @throws Exception on test failure
456      */
457     @Test
458     @Alerts("windows-1258")
459     public void encoding_windows_1258() throws Exception {
460         encoding("cp1258");
461         encoding("windows-1258");
462         encoding("x-cp1258");
463     }
464 
465     /**
466      * @throws Exception on test failure
467      */
468     @Test
469     @Alerts("x-mac-cyrillic")
470     public void encoding_x_mac_cyrillic() throws Exception {
471         encoding("x-mac-cyrillic");
472         encoding("x-mac-ukrainian");
473     }
474 
475     /**
476      * @throws Exception on test failure
477      */
478     @Test
479     @Alerts("gbk")
480     public void encoding_gbk() throws Exception {
481         encoding("chinese");
482         encoding("csgb2312");
483         encoding("csiso58gb231280");
484         encoding("gb2312");
485         encoding("gb_2312");
486         encoding("gb_2312-80");
487         encoding("gbk");
488         encoding("iso-ir-58");
489         encoding("x-gbk");
490     }
491 
492     /**
493      * @throws Exception on test failure
494      */
495     @Test
496     @Alerts("gb18030")
497     public void encoding_gb18030() throws Exception {
498         encoding("gb18030");
499     }
500 
501     /**
502      * @throws Exception on test failure
503      */
504     @Test
505     @Alerts("RangeError")
506     public void encoding_hz_gb_2312() throws Exception {
507         encoding("hz-gb-2312");
508     }
509 
510     /**
511      * @throws Exception on test failure
512      */
513     @Test
514     @Alerts("big5")
515     public void encoding_big5() throws Exception {
516         encoding("big5");
517         encoding("big5-hkscs");
518         encoding("cn-big5");
519         encoding("csbig5");
520         encoding("x-x-big5");
521     }
522 
523     /**
524      * @throws Exception on test failure
525      */
526     @Test
527     @Alerts("euc-jp")
528     public void encoding_euc_jp() throws Exception {
529         encoding("cseucpkdfmtjapanese");
530         encoding("euc-jp");
531         encoding("x-euc-jp");
532     }
533 
534     /**
535      * @throws Exception on test failure
536      */
537     @Test
538     @Alerts("iso-2022-jp")
539     public void encoding_iso_2022_jp() throws Exception {
540         encoding("csiso2022jp");
541         encoding("iso-2022-jp");
542     }
543 
544     /**
545      * @throws Exception on test failure
546      */
547     @Test
548     @Alerts("shift_jis")
549     public void encoding_shift_jis() throws Exception {
550         encoding("csshiftjis");
551         encoding("ms_kanji");
552         encoding("shift-jis");
553         encoding("shift_jis");
554         encoding("sjis");
555         encoding("windows-31j");
556         encoding("x-sjis");
557     }
558 
559     /**
560      * @throws Exception on test failure
561      */
562     @Test
563     @Alerts("euc-kr")
564     public void encoding_euc_kr() throws Exception {
565         encoding("cseuckr");
566         encoding("csksc56011987");
567         encoding("euc-kr");
568         encoding("iso-ir-149");
569         encoding("korean");
570         encoding("ks_c_5601-1987");
571         encoding("ks_c_5601-1989");
572         encoding("ksc5601");
573         encoding("ksc_5601");
574         encoding("windows-949");
575     }
576 
577     /**
578      * @throws Exception on test failure
579      */
580     @Test
581     @Alerts("RangeError")
582     public void encoding_iso_2022_kr() throws Exception {
583         encoding("csiso2022kr");
584         encoding("iso-2022-kr");
585     }
586 
587     /**
588      * @throws Exception on test failure
589      */
590     @Test
591     @Alerts("utf-16be")
592     public void encoding_utf_16be() throws Exception {
593         encoding("utf-16be");
594     }
595 
596     /**
597      * @throws Exception on test failure
598      */
599     @Test
600     @Alerts("utf-16le")
601     public void encoding_utf_16le() throws Exception {
602         encoding("utf-16");
603         encoding("utf-16le");
604     }
605 
606     /**
607      * @throws Exception on test failure
608      */
609     @Test
610     @Alerts("x-user-defined")
611     public void encoding_x_user_defined() throws Exception {
612         encoding("x-user-defined");
613     }
614 
615     /**
616      * @throws Exception on test failure
617      */
618     @Test
619     @Alerts("RangeError")
620     public void encoding_replacement() throws Exception {
621         encoding("iso-2022-cn");
622         encoding("iso-2022-cn-ext");
623     }
624 
625     private void encoding(final String encoding) throws Exception {
626         final String html = DOCTYPE_HTML
627             + "<html>\n"
628             + "<head>\n"
629             + "  <script>\n"
630             + LOG_TITLE_FUNCTION
631             + "    function doTest() {\n"
632             + "      try {\n"
633             + "        enc = new TextDecoder('" + encoding + "');\n"
634             + "        log(enc.encoding);\n"
635             + "      } catch(e) { logEx(e); }\n"
636             + "    }\n"
637             + "  </script>\n"
638             + "</head>\n"
639             + "<body onload='doTest()'>\n"
640             + "</body></html>";
641 
642         loadPageVerifyTitle2(html);
643     }
644 
645     /**
646      * @throws Exception on test failure
647      */
648     @Test
649     @Alerts({"0", "8", "72", "116"})
650     public void encode() throws Exception {
651         final String html = DOCTYPE_HTML
652             + "<html>\n"
653             + "<head>\n"
654             + "  <script>\n"
655             + LOG_TITLE_FUNCTION
656             + "    function doTest() {\n"
657             + "      var enc = new TextEncoder();\n"
658 
659             + "      var encoded = enc.encode('');\n"
660             + "      log(encoded.length);\n"
661 
662             + "      encoded = enc.encode('HtmlUnit');\n"
663             + "      log(encoded.length);\n"
664             + "      log(encoded[0]);\n"
665             + "      log(encoded[encoded.length - 1]);\n"
666             + "    }\n"
667             + "  </script>\n"
668             + "</head>\n"
669             + "<body onload='doTest()'>\n"
670             + "</body></html>";
671 
672         loadPageVerifyTitle2(html);
673     }
674 
675     /**
676      * @throws Exception on test failure
677      */
678     @Test
679     @Alerts({"HtmlUnit", "mlU"})
680     public void decode() throws Exception {
681         final String html = DOCTYPE_HTML
682             + "<html>\n"
683             + "<head>\n"
684             + "  <script>\n"
685             + LOG_TITLE_FUNCTION
686             + "    function doTest() {\n"
687             + "      var enc = new TextEncoder();\n"
688             + "      var encoded = enc.encode('HtmlUnit');\n"
689 
690             + "      var dec = new TextDecoder('utf-8');\n"
691             + "      var decoded = dec.decode(encoded);\n"
692             + "      log(decoded);\n"
693 
694             + "      var arrayBuffer = encoded.buffer;\n"
695             + "      var typedArray = new Uint8Array(arrayBuffer, 2, 3);\n"
696             + "      log(dec.decode(typedArray));\n"
697             + "    }\n"
698             + "  </script>\n"
699             + "</head>\n"
700             + "<body onload='doTest()'>\n"
701             + "</body></html>";
702 
703         loadPageVerifyTitle2(html);
704     }
705 
706     /**
707      * @throws Exception on test failure
708      */
709     @Test
710     @Alerts({"", "TypeError"})
711     public void decode2() throws Exception {
712         final String html = DOCTYPE_HTML
713             + "<html>\n"
714             + "<head>\n"
715             + "  <script>\n"
716             + LOG_TITLE_FUNCTION
717             + "    function doTest() {\n"
718             + "      var dec = new TextDecoder('utf-8');\n"
719             + "      try {\n"
720             + "        log(dec.decode(undefined));\n"
721             + "      } catch(e) { logEx(e); }\n"
722 
723             + "      try {\n"
724             + "        log(dec.decode(null));\n"
725             + "      } catch(e) { logEx(e); }\n"
726             + "    }\n"
727             + "  </script>\n"
728             + "</head>\n"
729             + "<body onload='doTest()'>\n"
730             + "</body></html>";
731 
732         loadPageVerifyTitle2(html);
733     }
734 
735     /**
736      * @throws Exception on test failure
737      */
738     @Test
739     @Alerts("RangeError")
740     public void decodeReplacement() throws Exception {
741         final String html = DOCTYPE_HTML
742             + "<html>\n"
743             + "<head>\n"
744             + "  <script>\n"
745             + LOG_TITLE_FUNCTION
746             + "    function doTest() {\n"
747             + "      try {\n"
748             + "        var dec = new TextDecoder('iso-2022-kr');\n"
749             + "      } catch(e) { logEx(e); }\n"
750             + "    }\n"
751             + "  </script>\n"
752             + "</head>\n"
753             + "<body onload='doTest()'>\n"
754             + "</body></html>";
755 
756         loadPageVerifyTitle2(html);
757     }
758 
759     /**
760      * @throws Exception on test failure
761      */
762     @Test
763     @Alerts({"", "ex-null", "TypeError", "\uf7cf!"})
764     public void decodeXuserDefined() throws Exception {
765         final String html = DOCTYPE_HTML
766             + "<html>\n"
767             + "<head>\n"
768             + "  <script>\n"
769             + LOG_TITLE_FUNCTION
770             + "    function doTest() {\n"
771             + "      var dec = new TextDecoder('x-user-defined');\n"
772 
773             + "      try {\n"
774             + "        log(dec.decode(undefined));\n"
775             + "      } catch(e) { log('ex-undefined'); logEx(e); }\n"
776 
777             + "      try {\n"
778             + "        log(dec.decode(null));\n"
779             + "      } catch(e) { log('ex-null'); logEx(e); }\n"
780 
781             + "      try {\n"
782             + "        var bytes = new Uint8Array([ 207, 33]);"
783             + "        log(dec.decode(bytes));\n"
784             + "      } catch(e) { logEx(e); }\n"
785             + "    }\n"
786             + "  </script>\n"
787             + "</head>\n"
788             + "<body onload='doTest()'>\n"
789             + "</body></html>";
790 
791         loadPageVerifyTitle2(html);
792     }
793 }