[Midnightbsd-cvs] src: fs/ntfs: support chinese characters, etc.

laffer1 at midnightbsd.org laffer1 at midnightbsd.org
Mon Mar 30 14:20:44 EDT 2009


Log Message:
-----------
support chinese characters, etc.

Modified Files:
--------------
    src/sys/fs/ntfs:
        ntfs_subr.c (r1.3 -> r1.4)
        ntfs_vfsops.c (r1.3 -> r1.4)
        ntfs_vnops.c (r1.4 -> r1.5)

-------------- next part --------------
Index: ntfs_vfsops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/ntfs/ntfs_vfsops.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -L sys/fs/ntfs/ntfs_vfsops.c -L sys/fs/ntfs/ntfs_vfsops.c -u -r1.3 -r1.4
--- sys/fs/ntfs/ntfs_vfsops.c
+++ sys/fs/ntfs/ntfs_vfsops.c
@@ -29,7 +29,6 @@
  * $FreeBSD: src/sys/fs/ntfs/ntfs_vfsops.c,v 1.88 2007/09/21 23:50:15 rodrigc Exp $
  */
 
-
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
Index: ntfs_subr.c
===================================================================
RCS file: /home/cvs/src/sys/fs/ntfs/ntfs_subr.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -L sys/fs/ntfs/ntfs_subr.c -L sys/fs/ntfs/ntfs_subr.c -u -r1.3 -r1.4
--- sys/fs/ntfs/ntfs_subr.c
+++ sys/fs/ntfs/ntfs_subr.c
@@ -61,7 +61,6 @@
 static int ntfs_findvattr(struct ntfsmount *, struct ntnode *, struct ntvattr **, struct ntvattr **, u_int32_t, const char *, size_t, cn_t);
 static int ntfs_uastricmp(struct ntfsmount *, const wchar *, size_t, const char *, size_t);
 static int ntfs_uastrcmp(struct ntfsmount *, const wchar *, size_t, const char *, size_t);
-
 /* table for mapping Unicode chars into uppercase; it's filled upon first
  * ntfs mount, freed upon last ntfs umount */
 static wchar *ntfs_toupper_tab;
@@ -76,6 +75,149 @@
   (aalp->al_type == type) && (aalp->al_namelen == namelen) &&		\
   !NTFS_UASTRCMP(aalp->al_name,aalp->al_namelen,name,namelen) )
 
+static int ntfs_iconv_u2w(const char **inbuf, size_t *inbytes,
+        char **outbuf, size_t *outbytes)
+{   
+    u_int8_t mark;
+    u_int16_t uc = 0;
+    char * obuf  = NULL;
+    const char *ibuf, *ibuf_end, *obuf_end;
+    if ((inbuf&&inbytes&&outbuf&&outbytes)
+            && (*inbuf&&*inbytes&&*outbuf&&*outbytes)){
+        ibuf = *inbuf;
+        ibuf_end = *inbuf+*inbytes;
+        obuf = *outbuf;
+        obuf_end = *outbuf+*outbytes;
+        int follow = 0;
+        while(ibuf<ibuf_end && &obuf[1]<obuf_end){
+            mark = (u_int8_t)*ibuf++;
+            if (mark<0xF0 && mark>0xE0){
+                /* 1110XXXX */
+                uc = mark&0x0F;
+                follow = 2;
+            }else if (mark<0xE0 && mark>0xC0){
+                /* 110XXXXX */
+                uc = mark&0x1F;
+                follow = 1;
+            }else if (mark<0x80){
+                /* 0XXXXXXX */
+                uc = mark;
+                follow = 0;
+            }else{
+                /* convert fail: 0xF0 0xE0 should NOT in UTF-8 seq */
+                printf("convert fail 0xF0 0xE0\n");
+                break;
+            }
+            if (&ibuf[follow] > ibuf_end){
+                /* unexpect input end */
+                break;
+            }
+            for (; follow>0; follow--){
+                /* 10XX.XXXX 0x80-0xBF*/
+                if ((*ibuf&0xC0) != 0x80){
+                    *outbytes = obuf_end - *outbuf;
+                    *inbytes = ibuf_end - *inbuf;
+                    printf("convert fail SEQ\n");
+                    return 0;
+                }
+                uc = (uc<<6)|(*ibuf++&0x3F);
+            }
+            *obuf++ = (uc);
+            *obuf++ = (uc>>8);
+            *outbuf = obuf;
+            *inbuf = ibuf;
+        }
+        *outbytes = obuf_end - *outbuf;
+        *inbytes = ibuf_end - *inbuf;
+    }
+    return 0;
+}
+
+static int ntfs_iconv_w2u(const char **inbuf, size_t *inbytes,
+        char **outbuf, size_t *outbytes)
+{
+    u_int16_t uc = 0;
+    char *obuf  = NULL;
+    const char *ibuf, *ibuf_end, *obuf_end;
+    if ((inbuf&&inbytes&&outbuf&&outbytes)
+            && (*inbuf&&*inbytes&&*outbuf&&*outbytes)){
+        ibuf = *inbuf;
+        ibuf_end = *inbuf+*inbytes;
+        obuf = *outbuf;
+        obuf_end = *outbuf+*outbytes;
+        int follow = 0;
+        while(&ibuf[1]<ibuf_end && obuf<obuf_end){
+            uc = (0xFF&*ibuf++);
+            uc |= (*ibuf++<<8);
+            if (uc < 0x80){
+                *obuf++ = (uc);
+                follow = 0;
+            }else if (uc < 0x800){
+                *obuf++ = (uc>>6)|0xC0;
+                follow = 1;
+            }else {
+                *obuf++ = (uc>>12)|0xE0;
+                follow = 2;
+            }
+            if (&obuf[follow] > obuf_end){
+                /*no output buffer */
+                break;
+            }
+            for (follow--;follow>=0;follow--){
+                int shift = follow*6;
+                u_int8_t ch = uc>>shift;
+                *obuf++ = (ch&0x3F)|0x80;
+            }
+            *outbuf = obuf;
+            *inbuf = ibuf;
+        }
+        *outbytes = obuf_end - *outbuf;
+        *inbytes = ibuf_end - *inbuf;
+    }
+    return 0;
+}
+
+
+static int ntfs_iconv_l2u(void *handle, const char **inbuf, size_t *inbytes,
+        char **outbuf, size_t *outbytes)
+{
+    int retval = 0;
+    if (handle == NULL)
+        return ntfs_iconv_u2w(inbuf, inbytes, outbuf, outbytes);
+
+    if ((outbuf&&outbytes)&&(*outbytes&&*outbuf)){
+        char *obuf = *outbuf;
+        retval = ntfs_iconv->convchr(handle, inbuf, inbytes, outbuf, outbytes);
+        char *obuf_end = *outbuf;
+        for (; &obuf[1]<obuf_end; obuf+=2){
+            char sw = obuf[0];
+            obuf[0] = obuf[1]; obuf[1] = sw;
+        }
+    }
+    return retval;
+}
+
+int ntfs_iconv_u2l(void *handle, const char **inbuf, size_t *inbytes,
+        char **outbuf, size_t *outbytes)
+{
+    char text[3];
+    char *ptext=NULL;
+
+    if (handle == NULL){
+        return ntfs_iconv_w2u(inbuf, inbytes, outbuf, outbytes);
+    }
+
+    if ((inbuf&&inbytes) && (*inbuf&&*inbytes)){ 
+        const char *ibuf_end = *inbuf+*inbytes;
+        for (char *ibuf=*inbuf; &ibuf[1]<ibuf_end&&ptext!=text; ibuf+=2){
+            ptext = text;
+            text[0] = ibuf[1]; text[1] = ibuf[0];
+            ntfs_iconv->convchr(handle, &ptext, inbytes, outbuf, outbytes);
+        }
+    }
+    return 0;
+}
+
 /*
  * 
  */
@@ -675,8 +817,11 @@
 	int res;
 	wchar wc;
 
-	if (ntmp->ntm_ic_l2u) {
+#define NTFS_ICONV_UTF8_ENABLE (1==1)
+
+	if (ntmp->ntm_ic_l2u || NTFS_ICONV_UTF8_ENABLE) {
 		for (i = 0, j = 0; i < ustrlen && j < astrlen; i++, j++) {
+#if 0
 			if (j < astrlen -1) {
 				wc = (wchar)astr[j]<<8 | (astr[j+1]&0xFF);
 				len = 2;
@@ -684,22 +829,52 @@
 				wc = (wchar)astr[j]<<8 & 0xFF00;
 				len = 1;
 			}
+#endif
+            wchar wcode = '?';
+            char *optr = (char*)&wcode;
+            size_t olen = 2;
+            const char *iptr = &astr[j];
+            size_t ilen = astrlen-j;
+            len = ilen;
+            ntfs_iconv_l2u(ntmp->ntm_ic_l2u, &iptr, &ilen, &optr, &olen);
+            len -= ilen;
 			res = ((int) NTFS_TOUPPER(ustr[i])) -
-				((int)NTFS_TOUPPER(NTFS_82U(wc, &len)));
+				((int)NTFS_TOUPPER(wcode));
 			j += len - 1;
 			mbstrlen -= len - 1;
 
-			if (res)
+			if (res){
 				return res;
+            }
 		}
 	} else {
 		/*
 		 * We use NTFS_82U(NTFS_U28(c)) to get rid of unicode
 		 * symbols not covered by translation table
 		 */
+        printf("bad code!\n");
 		for (i = 0; i < ustrlen && i < astrlen; i++) {
-			res = ((int) NTFS_TOUPPER(NTFS_82U(NTFS_U28(ustr[i]), &len))) -
-				((int)NTFS_TOUPPER(NTFS_82U((wchar)astr[i], &len)));
+#if 1
+            wchar wcode='?', wcode2='?';
+            char obuf[4];
+            char *optr = obuf;
+            size_t olen = 4;
+            const char *iptr = (const char*)&ustr[i];
+            size_t ilen = 2;
+            ntfs_iconv_u2l(ntmp->ntm_ic_u2l, &iptr, &ilen, &optr, &olen);
+            iptr = obuf;
+            ilen = 4-olen;
+            optr = (char*)&wcode;
+            olen = 2;
+            ntfs_iconv_l2u(ntmp->ntm_ic_l2u, &iptr, &ilen, &optr, &olen);
+            iptr = &astr[i];
+            ilen = astrlen-i;
+            optr = (char*)&wcode2;
+            olen = 2;
+            ntfs_iconv_l2u(ntmp->ntm_ic_l2u, &iptr, &ilen, &optr, &olen);
+#endif
+			res = ((int) NTFS_TOUPPER(wcode)) -
+				((int)NTFS_TOUPPER(wcode2));
 			if (res)
 				return res;
 		}
@@ -718,23 +893,34 @@
 	const char *astr;
 	size_t astrlen;
 {
-	char u, l;
 	size_t i, j, mbstrlen = astrlen;
 	int res;
 	wchar wc;
 
 	for (i = 0, j = 0; (i < ustrlen) && (j < astrlen); i++, j++) {
 		res = 0;
-		wc = NTFS_U28(ustr[i]);
-		u = (char)(wc>>8);
-		l = (char)wc;
-		if (u != '\0' && j < astrlen -1) {
-			res = (int) (u - astr[j++]);
-			mbstrlen--;
-		}
-		res = (res<<8) + (int) (l - astr[j]);
-		if (res)
-			return res;
+        char obuf[3];
+        char *optr = obuf;
+        size_t olen = 3;
+        const char *iptr = &ustr[i];
+        size_t ilen = 2;
+        ntfs_iconv_u2l(ntmp->ntm_ic_u2l, &iptr, &ilen, &optr, &olen);
+        if (olen == 3){
+            obuf[0] = '?';
+            olen--;
+        }
+        int t = 0;
+        j--;
+        mbstrlen++;
+        while (olen < 3){
+            res = (int)(obuf[t] - astr[++j]);
+            if (res){
+                return res;
+            }
+            mbstrlen--;
+            olen++;
+            t++;
+        }
 	}
 	return (ustrlen - mbstrlen);
 }
@@ -2047,7 +2233,13 @@
 	int i, j, h, l;
 
 	if (ntfs_iconv && cs_local) {
-		ntfs_iconv->open(cs_local, cs_ntfs, &ntmp->ntm_ic_u2l);
+        if (cs_local[0]=='U' && cs_local[1]=='T'
+                && cs_local[2]=='F' && cs_local[3]=='-'
+                && cs_local[4]=='8' && cs_local[5]=='\0'){
+            ntmp->ntm_ic_u2l = NULL;
+        }else{
+            ntfs_iconv->open(cs_local, cs_ntfs, &ntmp->ntm_ic_u2l);
+        }
 		return (0);
 	}
 
@@ -2105,7 +2297,13 @@
 	int i;
 
 	if (ntfs_iconv && cs_local) {
-		ntfs_iconv->open(cs_ntfs, cs_local, &ntmp->ntm_ic_l2u);
+        if (cs_local[0]=='U' && cs_local[1]=='T'
+                && cs_local[2]=='F' && cs_local[3]=='-'
+                && cs_local[4]=='8' && cs_local[5]=='\0'){
+            ntmp->ntm_ic_l2u = NULL;
+        }else{
+            ntfs_iconv->open(cs_ntfs, cs_local, &ntmp->ntm_ic_l2u);
+        }
 		return (0);
 	}
 
@@ -2140,6 +2338,7 @@
  * and substitutes a '_' for it if the result would be '\0';
  * something better has to be definitely though out
  */
+#if 0
 wchar
 ntfs_u28(
 	struct ntfsmount *ntmp, 
@@ -2204,4 +2403,4 @@
 
 	return ('?');
 }
-
+#endif
Index: ntfs_vnops.c
===================================================================
RCS file: /home/cvs/src/sys/fs/ntfs/ntfs_vnops.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -L sys/fs/ntfs/ntfs_vnops.c -L sys/fs/ntfs/ntfs_vnops.c -u -r1.4 -r1.5
--- sys/fs/ntfs/ntfs_vnops.c
+++ sys/fs/ntfs/ntfs_vnops.c
@@ -83,6 +83,8 @@
 static vop_pathconf_t	ntfs_pathconf;
 static vop_vptofh_t	ntfs_vptofh;
 
+int ntfs_iconv_u2l(void *, const char **, size_t *, char **, size_t *);
+
 int	ntfs_prtactive = 1;	/* 1 => print out reclaim of active vnodes */
 
 /*
@@ -554,12 +556,20 @@
 			if(!ntfs_isnamepermitted(ntmp,iep))
 				continue;
 
+#if 0
 			for(i=0, j=0; i<iep->ie_fnamelen; i++, j++) {
 				c = NTFS_U28(iep->ie_fname[i]);
 				if (c&0xFF00)
 					cde.d_name[j++] = (char)(c>>8);
 				cde.d_name[j] = (char)c&0xFF;
 			}
+#endif
+            const char *ibuf = (const char *)iep->ie_fname;
+            size_t ilen = iep->ie_fnamelen*2;
+            char *obuf = cde.d_name;
+            size_t olen = j = sizeof(cde.d_name)-1;
+            ntfs_iconv_u2l(ntmp->ntm_ic_u2l, &ibuf, &ilen, &obuf, &olen);
+            j -= olen;
 			cde.d_name[j] = '\0';
 			dprintf(("ntfs_readdir: elem: %d, fname:[%s] type: %d, flag: %d, ",
 				num, cde.d_name, iep->ie_fnametype,


More information about the Midnightbsd-cvs mailing list