[Midnightbsd-cvs] src: fs/msdosfs: support utf8 including some chinese characters.
laffer1 at midnightbsd.org
laffer1 at midnightbsd.org
Mon Mar 30 14:19:41 EDT 2009
Log Message:
-----------
support utf8 including some chinese characters.
Modified Files:
--------------
src/sys/fs/msdosfs:
msdosfs_conv.c (r1.4 -> r1.5)
msdosfs_vfsops.c (r1.6 -> r1.7)
-------------- next part --------------
Index: msdosfs_vfsops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/msdosfs/msdosfs_vfsops.c,v
retrieving revision 1.6
retrieving revision 1.7
diff -L sys/fs/msdosfs/msdosfs_vfsops.c -L sys/fs/msdosfs/msdosfs_vfsops.c -u -r1.6 -r1.7
--- sys/fs/msdosfs/msdosfs_vfsops.c
+++ sys/fs/msdosfs/msdosfs_vfsops.c
@@ -132,10 +132,18 @@
error = vfs_getopt(mp->mnt_optnew,
"cs_dos", &dos, NULL);
if (!error) {
- msdosfs_iconv->open(win, local, &pmp->pm_u2w);
- msdosfs_iconv->open(local, win, &pmp->pm_w2u);
- msdosfs_iconv->open(dos, local, &pmp->pm_u2d);
- msdosfs_iconv->open(local, dos, &pmp->pm_d2u);
+ char *p = (char*)local;
+ if (p!=NULL && p[0]=='U'
+ && p[1]=='T' && p[2]=='F'
+ && p[3]=='-' && p[4]=='8' && p[5]=='\0'){
+ pmp->pm_w2u = NULL;
+ pmp->pm_u2w = NULL;
+ }else{
+ msdosfs_iconv->open(win, local, &pmp->pm_u2w);
+ msdosfs_iconv->open(local, win, &pmp->pm_w2u);
+ }
+ msdosfs_iconv->open(dos, local, &pmp->pm_u2d);
+ msdosfs_iconv->open(local, dos, &pmp->pm_d2u);
}
if (error != 0)
return (error);
Index: msdosfs_conv.c
===================================================================
RCS file: /home/cvs/src/sys/fs/msdosfs/msdosfs_conv.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -L sys/fs/msdosfs/msdosfs_conv.c -L sys/fs/msdosfs/msdosfs_conv.c -u -r1.4 -r1.5
--- sys/fs/msdosfs/msdosfs_conv.c
+++ sys/fs/msdosfs/msdosfs_conv.c
@@ -64,7 +64,7 @@
static int mbsadjpos(const char **, size_t, size_t, int, int, void *handle);
static u_int16_t dos2unixchr(const u_char **, size_t *, int, struct msdosfsmount *);
static u_int16_t unix2doschr(const u_char **, size_t *, struct msdosfsmount *);
-static u_int16_t win2unixchr(u_int16_t, struct msdosfsmount *);
+static u_int32_t win2unixchr(u_int16_t, struct msdosfsmount *);
static u_int16_t unix2winchr(const u_char **, size_t *, int, struct msdosfsmount *);
/*
@@ -222,6 +222,109 @@
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* f8-ff */
};
+static int iconv_u2w(const char **inbuf, size_t *inbytes,
+ char **outbuf, size_t *outbytes)
+{
+ u_int8_t mark;
+ u_int16_t uc = 0;
+ char * obuf = NULL;
+ const char *ibuf, *ibuf_end, *obuf_end;
+ if ((inbuf&&inbytes&&outbuf&&outbytes)
+ && (*inbuf&&*inbytes&&*outbuf&&*outbytes)){
+ ibuf = *inbuf;
+ ibuf_end = *inbuf+*inbytes;
+ obuf = *outbuf;
+ obuf_end = *outbuf+*outbytes;
+ int follow = 0;
+ while(ibuf<ibuf_end && &obuf[1]<obuf_end){
+ mark = (u_int8_t)*ibuf++;
+ if (mark<0xF0 && mark>0xE0){
+ /* 1110XXXX */
+ uc = mark&0x0F;
+ follow = 2;
+ }else if (mark<0xE0 && mark>0xC0){
+ /* 110XXXXX */
+ uc = mark&0x1F;
+ follow = 1;
+ }else if (mark<0x80){
+ /* 0XXXXXXX */
+ uc = mark;
+ follow = 0;
+ }else{
+ /* convert fail: 0xF0 0xE0 should NOT in UTF-8 seq */
+ printf("convert fail 0xF0 0xE0\n");
+ break;
+ }
+ if (&ibuf[follow] > ibuf_end){
+ /* unexpect input end */
+ break;
+ }
+ for (; follow>0; follow--){
+ /* 10XX.XXXX 0x80-0xBF*/
+ if ((*ibuf&0xC0) != 0x80){
+ *outbytes = obuf_end - *outbuf;
+ *inbytes = ibuf_end - *inbuf;
+ printf("convert fail SEQ\n");
+ return 0;
+ }
+ uc = (uc<<6)|(*ibuf++&0x3F);
+ }
+ *obuf++ = (uc>>8);
+ *obuf++ = uc;
+ *outbuf = obuf;
+ *inbuf = ibuf;
+ }
+ *outbytes = obuf_end - *outbuf;
+ *inbytes = ibuf_end - *inbuf;
+ }
+ return 0;
+}
+
+static int iconv_w2u(const char **inbuf, size_t *inbytes,
+ char **outbuf, size_t *outbytes)
+{
+ u_int16_t uc = 0;
+ char *obuf = NULL;
+ const char *ibuf, *ibuf_end, *obuf_end;
+ if ((inbuf&&inbytes&&outbuf&&outbytes)
+ && (*inbuf&&*inbytes&&*outbuf&&*outbytes)){
+ ibuf = *inbuf;
+ ibuf_end = *inbuf+*inbytes;
+ obuf = *outbuf;
+ obuf_end = *outbuf+*outbytes;
+ int follow = 0;
+ while(&ibuf[1]<ibuf_end && obuf<obuf_end){
+ uc = (0xFF&*ibuf++);
+ uc = (0xFF&*ibuf++)|(uc<<8);
+ if (uc < 0x80){
+ *obuf++ = (uc);
+ follow = 0;
+ }else if (uc < 0x800){
+ *obuf++ = (uc>>6)|0xC0;
+ follow = 1;
+ }else {
+ /* assert(uc<=0xFFFF); */
+ *obuf++ = (uc>>12)|0xE0;
+ follow = 2;
+ }
+ if (&obuf[follow] > obuf_end){
+ /*no output buffer */
+ break;
+ }
+ for (follow--;follow>=0;follow--){
+ int shift = follow*6;
+ u_int8_t ch = uc>>shift;
+ *obuf++ = (ch&0x3F)|0x80;
+ }
+ *outbuf = obuf;
+ *inbuf = ibuf;
+ }
+ *outbytes = obuf_end - *outbuf;
+ *inbytes = ibuf_end - *inbuf;
+ }
+ return 0;
+}
+
/*
* DOS filenames are made of 2 parts, the name part and the extension part.
* The name part is 8 characters long and the extension part is 3
@@ -654,8 +757,8 @@
struct msdosfsmount *pmp;
{
u_int8_t *cp;
- u_int8_t *np, name[WIN_CHARS * 2 + 1];
- u_int16_t code;
+ u_int8_t *np, name[WIN_CHARS * 3 + 1];
+ u_int32_t code;
int i;
if ((wep->weCnt&WIN_CNT) > howmany(WIN_MAXLEN, WIN_CHARS)
@@ -688,6 +791,8 @@
return -1;
default:
code = win2unixchr(code, pmp);
+ if (code & 0xff0000)
+ *np++ = code >> 16;
if (code & 0xff00)
*np++ = code >> 8;
*np++ = code;
@@ -707,6 +812,8 @@
return -1;
default:
code = win2unixchr(code, pmp);
+ if (code & 0xff0000)
+ *np++ = code >> 16;
if (code & 0xff00)
*np++ = code >> 8;
*np++ = code;
@@ -726,6 +833,8 @@
return -1;
default:
code = win2unixchr(code, pmp);
+ if (code & 0xff0000)
+ *np++ = code >> 16;
if (code & 0xff00)
*np++ = code >> 8;
*np++ = code;
@@ -778,7 +887,10 @@
if (pmp->pm_flags & MSDOSFSMNT_KICONV && msdosfs_iconv) {
wlen = WIN_MAXLEN * 2;
wnp = wn;
- msdosfs_iconv->conv(pmp->pm_u2w, (const char **)&un, &unlen, &wnp, &wlen);
+ if (pmp->pm_u2w != NULL)
+ msdosfs_iconv->conv(pmp->pm_u2w, (const char **)&un, &unlen, &wnp, &wlen);
+ else
+ iconv_u2w((const char**)&un, &unlen, &wnp, &wlen);
if (unlen > 0)
return 0;
return howmany(WIN_MAXLEN - wlen/2, WIN_CHARS);
@@ -816,7 +928,10 @@
if (flag & MSDOSFSMNT_KICONV && msdosfs_iconv) {
outp = outstr;
outlen *= weight;
- msdosfs_iconv->conv(handle, instr, &inlen, &outp, &outlen);
+ if (handle != NULL)
+ msdosfs_iconv->conv(handle, instr, &inlen, &outp, &outlen);
+ else
+ iconv_u2w(instr, &inlen, &outp, &outlen);
return (inlen);
}
@@ -888,8 +1003,11 @@
ucslen = 2;
len = *ilen;
up = unicode;
- msdosfs_iconv->convchr(pmp->pm_u2w, (const char **)instr,
- ilen, &up, &ucslen);
+ if (pmp->pm_u2w != NULL)
+ msdosfs_iconv->convchr(pmp->pm_u2w, (const char **)instr,
+ ilen, &up, &ucslen);
+ else
+ iconv_u2w((const char**)instr, ilen, &up, &ucslen);
unixlen = len - *ilen;
/*
@@ -950,10 +1068,10 @@
/*
* Convert Windows char to Local char
*/
-static u_int16_t
+static u_int32_t
win2unixchr(u_int16_t wc, struct msdosfsmount *pmp)
{
- u_char *inp, *outp, inbuf[3], outbuf[3];
+ u_char *inp, *outp, inbuf[3], outbuf[4];
size_t ilen, olen, len;
if (wc == 0)
@@ -965,10 +1083,14 @@
inbuf[2] = '\0';
ilen = olen = len = 2;
+ len = olen = 4;
inp = inbuf;
outp = outbuf;
- msdosfs_iconv->convchr(pmp->pm_w2u, (const char **)&inp, &ilen,
- (char **)&outp, &olen);
+ if (pmp->pm_w2u != NULL)
+ msdosfs_iconv->convchr(pmp->pm_w2u, (const char **)&inp, &ilen,
+ (char **)&outp, &olen);
+ else
+ iconv_w2u((const char**)&inp, &ilen, (char**)&outp, &olen);
len -= olen;
/*
@@ -979,10 +1101,10 @@
return (wc);
}
- wc = 0;
+ u_int32_t wc32 = 0;
while(len--)
- wc |= (*(outp - len - 1) & 0xff) << (len << 3);
- return (wc);
+ wc32 |= (*(outp - len - 1) & 0xff) << (len << 3);
+ return (wc32);
}
if (wc & 0xff00)
@@ -1007,7 +1129,9 @@
if (pmp->pm_flags & MSDOSFSMNT_KICONV && msdosfs_iconv) {
outp = outbuf;
olen = 2;
- if (lower & (LCASE_BASE | LCASE_EXT))
+ if (pmp->pm_u2w == NULL)
+ iconv_u2w((const char**)instr, ilen, (char **)&outp, &olen);
+ else if (lower & (LCASE_BASE | LCASE_EXT))
msdosfs_iconv->convchr_case(pmp->pm_u2w, (const char **)instr,
ilen, (char **)&outp, &olen,
KICONV_FROM_LOWER);
@@ -1021,7 +1145,7 @@
if (olen == 2)
return (0);
- wc = (outbuf[0]<<8) | outbuf[1];
+ wc = (outbuf[0]<<8)|outbuf[1];
return (wc);
}
More information about the Midnightbsd-cvs
mailing list