View Javadoc
1   /*
2    * Copyright (c) 2002-2025 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * https://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package org.htmlunit.javascript.host;
16  
17  import org.htmlunit.WebDriverTestCase;
18  import org.htmlunit.junit.annotation.Alerts;
19  import org.junit.jupiter.api.Test;
20  
21  /**
22   * Tests for {@link TextDecoder}.
23   *
24   * @author Ronald Brill
25   * @see <a href="https://developer.mozilla.org/en-US/docs/Web/API/TextDecoder">TextDecoder() - Web APIs | MDN</a>
26   */
27  public class TextDecoderTest extends WebDriverTestCase {
28  
29      /**
30       * @throws Exception on test failure
31       */
32      @Test
33      @Alerts({"utf-8", "utf-8", "utf-8", "utf-8"})
34      public void encoding() throws Exception {
35          final String html = DOCTYPE_HTML
36              + "<html>\n"
37              + "<head>\n"
38              + "  <script>\n"
39              + LOG_TITLE_FUNCTION
40              + "    function doTest() {\n"
41              + "      var enc = new TextDecoder();\n"
42              + "      log(enc.encoding);\n"
43  
44              + "      enc = new TextDecoder(undefined);\n"
45              + "      log(enc.encoding);\n"
46  
47              + "      enc = new TextDecoder('utf-8');\n"
48              + "      log(enc.encoding);\n"
49  
50              + "      enc = new TextDecoder('utf8');\n"
51              + "      log(enc.encoding);\n"
52  
53              + "    }\n"
54              + "  </script>\n"
55              + "</head>\n"
56              + "<body onload='doTest()'>\n"
57              + "</body></html>";
58  
59          loadPageVerifyTitle2(html);
60      }
61  
62      /**
63       * @throws Exception on test failure
64       */
65      @Test
66      @Alerts("utf-8")
67      public void encoding_utf8() throws Exception {
68          encoding("unicode-1-1-utf-8");
69          encoding("utf-8");
70          encoding("utf8");
71      }
72  
73      /**
74       * @throws Exception on test failure
75       */
76      @Test
77      @Alerts("ibm866")
78      public void encoding_ibm866() throws Exception {
79          encoding("866");
80          encoding("cp866");
81          encoding("csibm866");
82          encoding("ibm866");
83      }
84  
85      /**
86       * @throws Exception on test failure
87       */
88      @Test
89      @Alerts("iso-8859-2")
90      public void encoding_iso_8859_2() throws Exception {
91          encoding("csisolatin2");
92          encoding("iso-8859-2");
93          encoding("iso-ir-101");
94          encoding("iso8859-2");
95          encoding("iso88592");
96          encoding("iso_8859-2");
97          encoding("iso_8859-2:1987");
98          encoding("l2");
99          encoding("latin2");
100     }
101 
102     /**
103      * @throws Exception on test failure
104      */
105     @Test
106     @Alerts("iso-8859-3")
107     public void encoding_iso_8859_3() throws Exception {
108         encoding("csisolatin3");
109         encoding("iso-8859-3");
110         encoding("iso-ir-109");
111         encoding("iso8859-3");
112         encoding("iso88593");
113         encoding("iso_8859-3");
114         encoding("iso_8859-3:1988");
115         encoding("l3");
116         encoding("latin3");
117     }
118 
119     /**
120      * @throws Exception on test failure
121      */
122     @Test
123     @Alerts("iso-8859-4")
124     public void encoding_iso_8859_4() throws Exception {
125         encoding("csisolatin4");
126         encoding("iso-8859-4");
127         encoding("iso-ir-110");
128         encoding("iso8859-4");
129         encoding("iso88594");
130         encoding("iso_8859-4");
131         encoding("iso_8859-4:1988");
132         encoding("l4");
133         encoding("latin4");
134     }
135 
136     /**
137      * @throws Exception on test failure
138      */
139     @Test
140     @Alerts("iso-8859-5")
141     public void encoding_iso_8859_5() throws Exception {
142         encoding("csisolatincyrillic");
143         encoding("cyrillic");
144         encoding("iso-8859-5");
145         encoding("iso-ir-144");
146         encoding("iso88595");
147         encoding("iso_8859-5");
148         encoding("iso_8859-5:1988");
149     }
150 
151     /**
152      * @throws Exception on test failure
153      */
154     @Test
155     @Alerts("iso-8859-6")
156     public void encoding_iso_8859_6() throws Exception {
157         encoding("arabic");
158         encoding("asmo-708");
159         encoding("csiso88596e");
160         encoding("csiso88596i");
161         encoding("csisolatinarabic");
162         encoding("ecma-114");
163         encoding("iso-8859-6");
164         encoding("iso-8859-6-e");
165         encoding("iso-8859-6-i");
166         encoding("iso-ir-127");
167         encoding("iso8859-6");
168         encoding("iso88596");
169         encoding("iso_8859-6");
170         encoding("iso_8859-6:1987");
171     }
172 
173     /**
174      * @throws Exception on test failure
175      */
176     @Test
177     @Alerts("iso-8859-7")
178     public void encoding_iso_8859_7() throws Exception {
179         encoding("csisolatingreek");
180         encoding("ecma-118");
181         encoding("elot_928");
182         encoding("greek");
183         encoding("greek8");
184         encoding("iso-8859-7");
185         encoding("iso-ir-126");
186         encoding("iso8859-7");
187         encoding("iso88597");
188         encoding("iso_8859-7");
189         encoding("iso_8859-7:1987");
190         encoding("sun_eu_greek");
191     }
192 
193     /**
194      * @throws Exception on test failure
195      */
196     @Test
197     @Alerts("iso-8859-8")
198     public void encoding_iso_8859_8() throws Exception {
199         encoding("csiso88598e");
200         encoding("csisolatinhebrew");
201         encoding("hebrew");
202         encoding("iso-8859-8");
203         encoding("iso-8859-8-e");
204         encoding("iso-ir-138");
205         encoding("iso8859-8");
206         encoding("iso88598");
207         encoding("iso_8859-8");
208         encoding("iso_8859-8:1988");
209         encoding("visual");
210     }
211 
212     /**
213      * @throws Exception on test failure
214      */
215     @Test
216     @Alerts("iso-8859-8-i")
217     public void encoding_iso_8859_8i() throws Exception {
218         encoding("csiso88598i");
219         encoding("iso-8859-8-i");
220         encoding("logical");
221     }
222 
223     /**
224      * @throws Exception on test failure
225      */
226     @Test
227     @Alerts("iso-8859-10")
228     public void encoding_iso_8859_10() throws Exception {
229         encoding("csisolatin6");
230         encoding("iso-8859-10");
231         encoding("iso-ir-157");
232         encoding("iso8859-10");
233         encoding("iso885910");
234         encoding("l6");
235         encoding("latin6");
236     }
237 
238     /**
239      * @throws Exception on test failure
240      */
241     @Test
242     @Alerts("iso-8859-13")
243     public void encoding_iso_8859_13() throws Exception {
244         encoding("iso-8859-13");
245         encoding("iso8859-13");
246         encoding("iso885913");
247     }
248 
249     /**
250      * @throws Exception on test failure
251      */
252     @Test
253     @Alerts("iso-8859-14")
254     public void encoding_iso_8859_14() throws Exception {
255         encoding("iso-8859-14");
256         encoding("iso8859-14");
257         encoding("iso885914");
258     }
259 
260     /**
261      * @throws Exception on test failure
262      */
263     @Test
264     @Alerts("iso-8859-15")
265     public void encoding_iso_8859_15() throws Exception {
266         encoding("csisolatin9");
267         encoding("iso-8859-15");
268         encoding("iso8859-15");
269         encoding("iso885915");
270         encoding("l9");
271         // encoding("latin9");
272     }
273 
274     /**
275      * @throws Exception on test failure
276      */
277     @Test
278     @Alerts("RangeError")
279     public void encoding_iso_8859_15_ex() throws Exception {
280         encoding("latin9");
281     }
282 
283     /**
284      * @throws Exception on test failure
285      */
286     @Test
287     @Alerts("iso-8859-16")
288     public void encoding_iso_8859_16() throws Exception {
289         encoding("iso-8859-16");
290     }
291 
292     /**
293      * @throws Exception on test failure
294      */
295     @Test
296     @Alerts("koi8-r")
297     public void encoding_koi8_r() throws Exception {
298         encoding("cskoi8r");
299         encoding("koi");
300         encoding("koi8");
301         encoding("koi8-r");
302         encoding("koi8_r");
303     }
304 
305     /**
306      * @throws Exception on test failure
307      */
308     @Test
309     @Alerts("koi8-u")
310     public void encoding_koi8_u() throws Exception {
311         encoding("koi8-u");
312     }
313 
314     /**
315      * @throws Exception on test failure
316      */
317     @Test
318     @Alerts("macintosh")
319     public void encoding_macintosh() throws Exception {
320         encoding("csmacintosh");
321         encoding("mac");
322         encoding("macintosh");
323         encoding("x-mac-roman");
324     }
325 
326     /**
327      * @throws Exception on test failure
328      */
329     @Test
330     @Alerts("windows-874")
331     public void encoding_windows_874() throws Exception {
332         encoding("dos-874");
333         encoding("iso-8859-11");
334         encoding("iso8859-11");
335         encoding("iso885911");
336         encoding("tis-620");
337         encoding("windows-874");
338     }
339 
340     /**
341      * @throws Exception on test failure
342      */
343     @Test
344     @Alerts("windows-1250")
345     public void encoding_windows_1250() throws Exception {
346         encoding("cp1250");
347         encoding("windows-1250");
348         encoding("x-cp1250");
349     }
350 
351     /**
352      * @throws Exception on test failure
353      */
354     @Test
355     @Alerts("windows-1251")
356     public void encoding_windows_1251() throws Exception {
357         encoding("cp1251");
358         encoding("windows-1251");
359         encoding("x-cp1251");
360     }
361 
362     /**
363      * @throws Exception on test failure
364      */
365     @Test
366     @Alerts("windows-1252")
367     public void encoding_windows_1252() throws Exception {
368         encoding("ansi_x3.4-1968");
369         encoding("ascii");
370         encoding("cp1252");
371         encoding("cp819");
372         encoding("csisolatin1");
373         encoding("ibm819");
374         encoding("iso-8859-1");
375         encoding("iso-ir-100");
376         encoding("iso8859-1");
377         encoding("iso88591");
378         encoding("iso_8859-1");
379         encoding("iso_8859-1:1987");
380         encoding("l1");
381         encoding("latin1");
382         encoding("us-ascii");
383         encoding("windows-1252");
384         encoding("x-cp1252");
385     }
386 
387     /**
388      * @throws Exception on test failure
389      */
390     @Test
391     @Alerts("windows-1253")
392     public void encoding_windows_1253() throws Exception {
393         encoding("cp1253");
394         encoding("windows-1253");
395         encoding("x-cp1253");
396     }
397 
398     /**
399      * @throws Exception on test failure
400      */
401     @Test
402     @Alerts("windows-1254")
403     public void encoding_windows_1254() throws Exception {
404         encoding("cp1254");
405         encoding("csisolatin5");
406         encoding("iso-8859-9");
407         encoding("iso-ir-148");
408         encoding("iso8859-9");
409         encoding("iso88599");
410         encoding("iso_8859-9");
411         encoding("iso_8859-9:1989");
412         encoding("l5");
413         encoding("latin5");
414         encoding("windows-1254");
415         encoding("x-cp1254");
416     }
417 
418     /**
419      * @throws Exception on test failure
420      */
421     @Test
422     @Alerts("windows-1255")
423     public void encoding_windows_1255() throws Exception {
424         encoding("cp1255");
425         encoding("windows-1255");
426         encoding("x-cp1255");
427     }
428 
429     /**
430      * @throws Exception on test failure
431      */
432     @Test
433     @Alerts("windows-1256")
434     public void encoding_windows_1256() throws Exception {
435         encoding("cp1256");
436         encoding("windows-1256");
437         encoding("x-cp1256");
438     }
439 
440     /**
441      * @throws Exception on test failure
442      */
443     @Test
444     @Alerts("windows-1257")
445     public void encoding_windows_1257() throws Exception {
446         encoding("cp1257");
447         encoding("windows-1257");
448         encoding("x-cp1257");
449     }
450 
451     /**
452      * @throws Exception on test failure
453      */
454     @Test
455     @Alerts("windows-1258")
456     public void encoding_windows_1258() throws Exception {
457         encoding("cp1258");
458         encoding("windows-1258");
459         encoding("x-cp1258");
460     }
461 
462     /**
463      * @throws Exception on test failure
464      */
465     @Test
466     @Alerts("x-mac-cyrillic")
467     public void encoding_x_mac_cyrillic() throws Exception {
468         encoding("x-mac-cyrillic");
469         encoding("x-mac-ukrainian");
470     }
471 
472     /**
473      * @throws Exception on test failure
474      */
475     @Test
476     @Alerts("gbk")
477     public void encoding_gbk() throws Exception {
478         encoding("chinese");
479         encoding("csgb2312");
480         encoding("csiso58gb231280");
481         encoding("gb2312");
482         encoding("gb_2312");
483         encoding("gb_2312-80");
484         encoding("gbk");
485         encoding("iso-ir-58");
486         encoding("x-gbk");
487     }
488 
489     /**
490      * @throws Exception on test failure
491      */
492     @Test
493     @Alerts("gb18030")
494     public void encoding_gb18030() throws Exception {
495         encoding("gb18030");
496     }
497 
498     /**
499      * @throws Exception on test failure
500      */
501     @Test
502     @Alerts("RangeError")
503     public void encoding_hz_gb_2312() throws Exception {
504         encoding("hz-gb-2312");
505     }
506 
507     /**
508      * @throws Exception on test failure
509      */
510     @Test
511     @Alerts("big5")
512     public void encoding_big5() throws Exception {
513         encoding("big5");
514         encoding("big5-hkscs");
515         encoding("cn-big5");
516         encoding("csbig5");
517         encoding("x-x-big5");
518     }
519 
520     /**
521      * @throws Exception on test failure
522      */
523     @Test
524     @Alerts("euc-jp")
525     public void encoding_euc_jp() throws Exception {
526         encoding("cseucpkdfmtjapanese");
527         encoding("euc-jp");
528         encoding("x-euc-jp");
529     }
530 
531     /**
532      * @throws Exception on test failure
533      */
534     @Test
535     @Alerts("iso-2022-jp")
536     public void encoding_iso_2022_jp() throws Exception {
537         encoding("csiso2022jp");
538         encoding("iso-2022-jp");
539     }
540 
541     /**
542      * @throws Exception on test failure
543      */
544     @Test
545     @Alerts("shift_jis")
546     public void encoding_shift_jis() throws Exception {
547         encoding("csshiftjis");
548         encoding("ms_kanji");
549         encoding("shift-jis");
550         encoding("shift_jis");
551         encoding("sjis");
552         encoding("windows-31j");
553         encoding("x-sjis");
554     }
555 
556     /**
557      * @throws Exception on test failure
558      */
559     @Test
560     @Alerts("euc-kr")
561     public void encoding_euc_kr() throws Exception {
562         encoding("cseuckr");
563         encoding("csksc56011987");
564         encoding("euc-kr");
565         encoding("iso-ir-149");
566         encoding("korean");
567         encoding("ks_c_5601-1987");
568         encoding("ks_c_5601-1989");
569         encoding("ksc5601");
570         encoding("ksc_5601");
571         encoding("windows-949");
572     }
573 
574     /**
575      * @throws Exception on test failure
576      */
577     @Test
578     @Alerts("RangeError")
579     public void encoding_iso_2022_kr() throws Exception {
580         encoding("csiso2022kr");
581         encoding("iso-2022-kr");
582     }
583 
584     /**
585      * @throws Exception on test failure
586      */
587     @Test
588     @Alerts("utf-16be")
589     public void encoding_utf_16be() throws Exception {
590         encoding("utf-16be");
591     }
592 
593     /**
594      * @throws Exception on test failure
595      */
596     @Test
597     @Alerts("utf-16le")
598     public void encoding_utf_16le() throws Exception {
599         encoding("utf-16");
600         encoding("utf-16le");
601     }
602 
603     /**
604      * @throws Exception on test failure
605      */
606     @Test
607     @Alerts("x-user-defined")
608     public void encoding_x_user_defined() throws Exception {
609         encoding("x-user-defined");
610     }
611 
612     /**
613      * @throws Exception on test failure
614      */
615     @Test
616     @Alerts("RangeError")
617     public void encoding_replacement() throws Exception {
618         encoding("iso-2022-cn");
619         encoding("iso-2022-cn-ext");
620     }
621 
622     private void encoding(final String encoding) throws Exception {
623         final String html = DOCTYPE_HTML
624             + "<html>\n"
625             + "<head>\n"
626             + "  <script>\n"
627             + LOG_TITLE_FUNCTION
628             + "    function doTest() {\n"
629             + "      try {\n"
630             + "        enc = new TextDecoder('" + encoding + "');\n"
631             + "        log(enc.encoding);\n"
632             + "      } catch(e) { logEx(e); }\n"
633             + "    }\n"
634             + "  </script>\n"
635             + "</head>\n"
636             + "<body onload='doTest()'>\n"
637             + "</body></html>";
638 
639         loadPageVerifyTitle2(html);
640     }
641 
642     /**
643      * @throws Exception on test failure
644      */
645     @Test
646     @Alerts({"0", "8", "72", "116"})
647     public void encode() throws Exception {
648         final String html = DOCTYPE_HTML
649             + "<html>\n"
650             + "<head>\n"
651             + "  <script>\n"
652             + LOG_TITLE_FUNCTION
653             + "    function doTest() {\n"
654             + "      var enc = new TextEncoder();\n"
655 
656             + "      var encoded = enc.encode('');\n"
657             + "      log(encoded.length);\n"
658 
659             + "      encoded = enc.encode('HtmlUnit');\n"
660             + "      log(encoded.length);\n"
661             + "      log(encoded[0]);\n"
662             + "      log(encoded[encoded.length - 1]);\n"
663             + "    }\n"
664             + "  </script>\n"
665             + "</head>\n"
666             + "<body onload='doTest()'>\n"
667             + "</body></html>";
668 
669         loadPageVerifyTitle2(html);
670     }
671 
672     /**
673      * @throws Exception on test failure
674      */
675     @Test
676     @Alerts({"HtmlUnit", "mlU"})
677     public void decode() throws Exception {
678         final String html = DOCTYPE_HTML
679             + "<html>\n"
680             + "<head>\n"
681             + "  <script>\n"
682             + LOG_TITLE_FUNCTION
683             + "    function doTest() {\n"
684             + "      var enc = new TextEncoder();\n"
685             + "      var encoded = enc.encode('HtmlUnit');\n"
686 
687             + "      var dec = new TextDecoder('utf-8');\n"
688             + "      var decoded = dec.decode(encoded);\n"
689             + "      log(decoded);\n"
690 
691             + "      var arrayBuffer = encoded.buffer;\n"
692             + "      var typedArray = new Uint8Array(arrayBuffer, 2, 3);\n"
693             + "      log(dec.decode(typedArray));\n"
694             + "    }\n"
695             + "  </script>\n"
696             + "</head>\n"
697             + "<body onload='doTest()'>\n"
698             + "</body></html>";
699 
700         loadPageVerifyTitle2(html);
701     }
702 
703     /**
704      * @throws Exception on test failure
705      */
706     @Test
707     @Alerts({"", "TypeError"})
708     public void decode2() throws Exception {
709         final String html = DOCTYPE_HTML
710             + "<html>\n"
711             + "<head>\n"
712             + "  <script>\n"
713             + LOG_TITLE_FUNCTION
714             + "    function doTest() {\n"
715             + "      var dec = new TextDecoder('utf-8');\n"
716             + "      try {\n"
717             + "        log(dec.decode(undefined));\n"
718             + "      } catch(e) { logEx(e); }\n"
719 
720             + "      try {\n"
721             + "        log(dec.decode(null));\n"
722             + "      } catch(e) { logEx(e); }\n"
723             + "    }\n"
724             + "  </script>\n"
725             + "</head>\n"
726             + "<body onload='doTest()'>\n"
727             + "</body></html>";
728 
729         loadPageVerifyTitle2(html);
730     }
731 
732     /**
733      * @throws Exception on test failure
734      */
735     @Test
736     @Alerts("RangeError")
737     public void decodeReplacement() throws Exception {
738         final String html = DOCTYPE_HTML
739             + "<html>\n"
740             + "<head>\n"
741             + "  <script>\n"
742             + LOG_TITLE_FUNCTION
743             + "    function doTest() {\n"
744             + "      try {\n"
745             + "        var dec = new TextDecoder('iso-2022-kr');\n"
746             + "      } catch(e) { logEx(e); }\n"
747             + "    }\n"
748             + "  </script>\n"
749             + "</head>\n"
750             + "<body onload='doTest()'>\n"
751             + "</body></html>";
752 
753         loadPageVerifyTitle2(html);
754     }
755 
756     /**
757      * @throws Exception on test failure
758      */
759     @Test
760     @Alerts({"", "ex-null", "TypeError", "\uf7cf!"})
761     public void decodeXuserDefined() throws Exception {
762         final String html = DOCTYPE_HTML
763             + "<html>\n"
764             + "<head>\n"
765             + "  <script>\n"
766             + LOG_TITLE_FUNCTION
767             + "    function doTest() {\n"
768             + "      var dec = new TextDecoder('x-user-defined');\n"
769 
770             + "      try {\n"
771             + "        log(dec.decode(undefined));\n"
772             + "      } catch(e) { log('ex-undefined'); logEx(e); }\n"
773 
774             + "      try {\n"
775             + "        log(dec.decode(null));\n"
776             + "      } catch(e) { log('ex-null'); logEx(e); }\n"
777 
778             + "      try {\n"
779             + "        var bytes = new Uint8Array([ 207, 33]);"
780             + "        log(dec.decode(bytes));\n"
781             + "      } catch(e) { logEx(e); }\n"
782             + "    }\n"
783             + "  </script>\n"
784             + "</head>\n"
785             + "<body onload='doTest()'>\n"
786             + "</body></html>";
787 
788         loadPageVerifyTitle2(html);
789     }
790 }