1 /* -*- Mode: c; tab-width: 8; indent-tabs-mode: 1; c-basic-offset: 8; -*- */
2 /*
3  * Copyright (c) 1993, 1994, 1995, 1996, 1997
4  *        The Regents of the University of California.  All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. All advertising materials mentioning features or use of this software
15  *    must display the following acknowledgement:
16  *        This product includes software developed by the Computer Systems
17  *        Engineering Group at Lawrence Berkeley Laboratory.
18  * 4. Neither the name of the University nor of the Laboratory may be used
19  *    to endorse or promote products derived from this software without
20  *    specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #ifdef _WIN32
36 #include <stdio.h>
37 #include <errno.h>
38 
39 #include <pcap/pcap.h>        /* Needed for PCAP_ERRBUF_SIZE */
40 
41 #include "charconv.h"
42 
43 wchar_t *
cp_to_utf_16le(UINT codepage,const char * cp_string,DWORD flags)44 cp_to_utf_16le(UINT codepage, const char *cp_string, DWORD flags)
45 {
46           int utf16le_len;
47           wchar_t *utf16le_string;
48 
49           /*
50            * Map from the specified code page to UTF-16LE.
51            * First, find out how big a buffer we'll need.
52            */
53           utf16le_len = MultiByteToWideChar(codepage, flags, cp_string, -1,
54               NULL, 0);
55           if (utf16le_len == 0) {
56                     /*
57                      * Error.  Fail with EINVAL.
58                      */
59                     errno = EINVAL;
60                     return (NULL);
61           }
62 
63           /*
64            * Now attempt to allocate a buffer for that.
65            */
66           utf16le_string = malloc(utf16le_len * sizeof (wchar_t));
67           if (utf16le_string == NULL) {
68                     /*
69                      * Not enough memory; assume errno has been
70                      * set, and fail.
71                      */
72                     return (NULL);
73           }
74 
75           /*
76            * Now convert.
77            */
78           utf16le_len = MultiByteToWideChar(codepage, flags, cp_string, -1,
79               utf16le_string, utf16le_len);
80           if (utf16le_len == 0) {
81                     /*
82                      * Error.  Fail with EINVAL.
83                      * XXX - should this ever happen, given that
84                      * we already ran the string through
85                      * MultiByteToWideChar() to find out how big
86                      * a buffer we needed?
87                      */
88                     free(utf16le_string);
89                     errno = EINVAL;
90                     return (NULL);
91           }
92           return (utf16le_string);
93 }
94 
95 char *
utf_16le_to_cp(UINT codepage,const wchar_t * utf16le_string)96 utf_16le_to_cp(UINT codepage, const wchar_t *utf16le_string)
97 {
98           int cp_len;
99           char *cp_string;
100 
101           /*
102            * Map from UTF-16LE to the specified code page.
103            * First, find out how big a buffer we'll need.
104            * We convert composite characters to precomposed characters,
105            * as that's what Windows expects.
106            */
107           cp_len = WideCharToMultiByte(codepage, WC_COMPOSITECHECK,
108               utf16le_string, -1, NULL, 0, NULL, NULL);
109           if (cp_len == 0) {
110                     /*
111                      * Error.  Fail with EINVAL.
112                      */
113                     errno = EINVAL;
114                     return (NULL);
115           }
116 
117           /*
118            * Now attempt to allocate a buffer for that.
119            */
120           cp_string = malloc(cp_len * sizeof (char));
121           if (cp_string == NULL) {
122                     /*
123                      * Not enough memory; assume errno has been
124                      * set, and fail.
125                      */
126                     return (NULL);
127           }
128 
129           /*
130            * Now convert.
131            */
132           cp_len = WideCharToMultiByte(codepage, WC_COMPOSITECHECK,
133               utf16le_string, -1, cp_string, cp_len, NULL, NULL);
134           if (cp_len == 0) {
135                     /*
136                      * Error.  Fail with EINVAL.
137                      * XXX - should this ever happen, given that
138                      * we already ran the string through
139                      * WideCharToMultiByte() to find out how big
140                      * a buffer we needed?
141                      */
142                     free(cp_string);
143                     errno = EINVAL;
144                     return (NULL);
145           }
146           return (cp_string);
147 }
148 
149 /*
150  * Convert an error message string from UTF-8 to the local code page, as
151  * best we can.
152  *
153  * The buffer is assumed to be PCAP_ERRBUF_SIZE bytes long; we truncate
154  * if it doesn't fit.
155  */
156 void
utf_8_to_acp_truncated(char * errbuf)157 utf_8_to_acp_truncated(char *errbuf)
158 {
159           wchar_t *utf_16_errbuf;
160           int retval;
161           DWORD err;
162 
163           /*
164            * Do this by converting to UTF-16LE and then to the local
165            * code page.  That means we get to use Microsoft's
166            * conversion routines, rather than having to understand
167            * all the code pages ourselves, *and* that this routine
168            * can convert in place.
169            */
170 
171           /*
172            * Map from UTF-8 to UTF-16LE.
173            * First, find out how big a buffer we'll need.
174            * Convert any invalid characters to REPLACEMENT CHARACTER.
175            */
176           utf_16_errbuf = cp_to_utf_16le(CP_UTF8, errbuf, 0);
177           if (utf_16_errbuf == NULL) {
178                     /*
179                      * Error.  Give up.
180                      */
181                     snprintf(errbuf, PCAP_ERRBUF_SIZE,
182                         "Can't convert error string to the local code page");
183                     return;
184           }
185 
186           /*
187            * Now, convert that to the local code page.
188            * Use the current thread's code page.  For unconvertible
189            * characters, let it pick the "best fit" character.
190            *
191            * XXX - we'd like some way to do what utf_16le_to_utf_8_truncated()
192            * does if the buffer isn't big enough, but we don't want to have
193            * to handle all local code pages ourselves; doing so requires
194            * knowledge of all those code pages, including knowledge of how
195            * characters are formed in those code pages so that we can avoid
196            * cutting a multi-byte character into pieces.
197            *
198            * Converting to an un-truncated string using Windows APIs, and
199            * then copying to the buffer, still requires knowledge of how
200            * characters are formed in the target code page.
201            */
202           retval = WideCharToMultiByte(CP_THREAD_ACP, 0, utf_16_errbuf, -1,
203               errbuf, PCAP_ERRBUF_SIZE, NULL, NULL);
204           if (retval == 0) {
205                     err = GetLastError();
206                     free(utf_16_errbuf);
207                     if (err == ERROR_INSUFFICIENT_BUFFER)
208                               snprintf(errbuf, PCAP_ERRBUF_SIZE,
209                                   "The error string, in the local code page, didn't fit in the buffer");
210                     else
211                               snprintf(errbuf, PCAP_ERRBUF_SIZE,
212                                   "Can't convert error string to the local code page");
213                     return;
214           }
215           free(utf_16_errbuf);
216 }
217 #endif
218