To: vim-dev@vim.org Subject: Patch 6.1.417 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 8bit ------------ Patch 6.1.417 Problem: Unprintable multi-byte characters are not handled correctly. Multi-byte characters above 0xffff are displayed as another character. Solution: Handle unprintable multi-byte characters. Display multi-byte characters above 0xffff with a marker. Recognize UTF-16 words and BOM words as unprintable. (Daniel Elstner) Files: src/charset.c, src/mbyte.c, src/screen.c *** ../vim61.416/src/charset.c Fri Mar 14 22:45:22 2003 --- src/charset.c Thu Mar 20 20:26:37 2003 *************** *** 331,352 **** char_u *res; char_u *p; #ifdef FEAT_MBYTE ! int l, len; #endif #ifdef FEAT_MBYTE if (has_mbyte) { ! /* Compute the length of the result, taking into account that ! * multi-byte characters are copied unchanged. */ len = 0; p = s; while (*p != NUL) { if ((l = (*mb_ptr2len_check)(p)) > 1) { ! len += l; p += l; } else { --- 331,360 ---- char_u *res; char_u *p; #ifdef FEAT_MBYTE ! int l, len, c; ! char_u hexbuf[11]; #endif #ifdef FEAT_MBYTE if (has_mbyte) { ! /* Compute the length of the result, taking account of unprintable ! * multi-byte characters. */ len = 0; p = s; while (*p != NUL) { if ((l = (*mb_ptr2len_check)(p)) > 1) { ! c = (*mb_ptr2char)(p); p += l; + if (vim_isprintc(c)) + len += l; + else + { + transchar_hex(hexbuf, c); + len += STRLEN(hexbuf); + } } else { *************** *** 371,377 **** #ifdef FEAT_MBYTE if (has_mbyte && (l = (*mb_ptr2len_check)(p)) > 1) { ! STRNCAT(res, p, l); /* append printable multi-byte char */ p += l; } else --- 379,389 ---- #ifdef FEAT_MBYTE if (has_mbyte && (l = (*mb_ptr2len_check)(p)) > 1) { ! c = (*mb_ptr2char)(p); ! if (vim_isprintc(c)) ! STRNCAT(res, p, l); /* append printable multi-byte char */ ! else ! transchar_hex(res + STRLEN(res), c); p += l; } else *** ../vim61.416/src/mbyte.c Sat Mar 15 17:55:18 2003 --- src/mbyte.c Wed Mar 26 19:53:37 2003 *************** *** 1491,1501 **** utf_printable(c) int c; { ! /* sorted list of non-overlapping intervals */ static struct interval nonprint[] = { {0x070f, 0x070f}, {0x180b, 0x180e}, {0x200b, 0x200f}, {0x202a, 0x202e}, ! {0x206a, 0x206f}, {0xfeff, 0xfeff}, {0xfff9, 0xfffb} }; return !intable(nonprint, sizeof(nonprint) / sizeof(struct interval), c); --- 1485,1497 ---- utf_printable(c) int c; { ! /* Sorted list of non-overlapping intervals. ! * 0xd800-0xdfff is reserved for UTF-16, actually illegal. */ static struct interval nonprint[] = { {0x070f, 0x070f}, {0x180b, 0x180e}, {0x200b, 0x200f}, {0x202a, 0x202e}, ! {0x206a, 0x206f}, {0xd800, 0xdfff}, {0xfeff, 0xfeff}, {0xfff9, 0xfffb}, ! {0xfffe, 0xffff} }; return !intable(nonprint, sizeof(nonprint) / sizeof(struct interval), c); *** ../vim61.416/src/screen.c Thu Mar 20 21:33:57 2003 --- src/screen.c Mon Mar 24 20:15:10 2003 *************** *** 2077,2083 **** --- 2077,2087 ---- { #ifdef FEAT_MBYTE if (has_mbyte && (len = (*mb_ptr2len_check)(p)) > 1) + { + if (!vim_isprintc((*mb_ptr2char)(p))) + break; p += len - 1; + } else #endif if (*p == TAB) *************** *** 2139,2145 **** ScreenLinesUC[idx] = 0; else { ! ScreenLinesUC[idx] = u8c; ScreenLinesC1[idx] = u8c_c1; ScreenLinesC2[idx] = u8c_c2; } --- 2143,2153 ---- ScreenLinesUC[idx] = 0; else { ! /* Non-BMP character: display as ? or fullwidth ?. */ ! if (u8c >= 0x10000) ! ScreenLinesUC[idx] = (cells == 2) ? 0xff1f : (int)'?'; ! else ! ScreenLinesUC[idx] = u8c; ScreenLinesC1[idx] = u8c_c1; ScreenLinesC2[idx] = u8c_c2; } *************** *** 3248,3267 **** } if ((mb_l == 1 && c >= 0x80) || (mb_l >= 1 && mb_c == 0) ! || (mb_l > 1 && !vim_isprintc(mb_c))) { /* * Illegal UTF-8 byte: display as . */ ! transchar_hex(extra, mb_c); #ifdef FEAT_RIGHTLEFT ! if (wp->w_p_rl) /* reverse */ ! rl_mirror(extra); #endif p_extra = extra; ! c = *p_extra++; ! mb_c = c; ! mb_utf8 = FALSE; n_extra = (int)STRLEN(p_extra); c_extra = NUL; if (area_attr == 0 && search_attr == 0) --- 3259,3289 ---- } if ((mb_l == 1 && c >= 0x80) || (mb_l >= 1 && mb_c == 0) ! || (mb_l > 1 && (!vim_isprintc(mb_c) ! || mb_c >= 0x10000))) { /* * Illegal UTF-8 byte: display as . + * Non-BMP character : display as ? or fullwidth ?. */ ! if (mb_c < 0x10000) ! { ! transchar_hex(extra, mb_c); #ifdef FEAT_RIGHTLEFT ! if (wp->w_p_rl) /* reverse */ ! rl_mirror(extra); #endif + } + else if (utf_char2cells(mb_c) != 2) + STRCPY(extra, "?"); + else + /* 0xff1f in UTF-8: full-width '?' */ + STRCPY(extra, "\357\274\237"); + p_extra = extra; ! c = *p_extra; ! mb_c = mb_ptr2char_adv(&p_extra); ! mb_utf8 = (c >= 0x80); n_extra = (int)STRLEN(p_extra); c_extra = NUL; if (area_attr == 0 && search_attr == 0) *************** *** 5241,5246 **** --- 5263,5275 ---- { u8c = utfc_ptr2char(ptr, &u8c_c1, &u8c_c2); mbyte_cells = utf_char2cells(u8c); + /* Non-BMP character: display as ? or fullwidth ?. */ + if (u8c >= 0x10000) + { + u8c = (mbyte_cells == 2) ? 0xff1f : (int)'?'; + if (attr == 0) + attr = hl_attr(HLF_8); + } } } #endif *** ../vim61.416/src/version.c Wed Mar 26 10:20:31 2003 --- src/version.c Wed Mar 26 21:29:36 2003 *************** *** 613,614 **** --- 613,616 ---- { /* Add new patch number below this line */ + /**/ + 417, /**/ -- hundred-and-one symptoms of being an internet addict: 227. You sleep next to your monitor. Or on top of it. /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// Creator of Vim - Vi IMproved -- http://www.Vim.org \\\ \\\ Project leader for A-A-P -- http://www.A-A-P.org /// \\\ Help AIDS victims, buy at Amazon -- http://ICCF.nl/click1.html ///