1 /* General utility routines for GDB/Python.
2 
3    Copyright (C) 2008-2024 Free Software Foundation, Inc.
4 
5    This file is part of GDB.
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3 of the License, or
10    (at your option) any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
19 
20 #include "top.h"
21 #include "charset.h"
22 #include "value.h"
23 #include "python-internal.h"
24 
25 /* Converts a Python 8-bit string to a unicode string object.  Assumes the
26    8-bit string is in the host charset.  If an error occurs during conversion,
27    returns NULL with a python exception set.
28 
29    As an added bonus, the functions accepts a unicode string and returns it
30    right away, so callers don't need to check which kind of string they've
31    got.  In Python 3, all strings are Unicode so this case is always the
32    one that applies.
33 
34    If the given object is not one of the mentioned string types, NULL is
35    returned, with the TypeError python exception set.  */
36 gdbpy_ref<>
python_string_to_unicode(PyObject * obj)37 python_string_to_unicode (PyObject *obj)
38 {
39   PyObject *unicode_str;
40 
41   /* If obj is already a unicode string, just return it.
42      I wish life was always that simple...  */
43   if (PyUnicode_Check (obj))
44     {
45       unicode_str = obj;
46       Py_INCREF (obj);
47     }
48   else
49     {
50       PyErr_SetString (PyExc_TypeError,
51                            _("Expected a string object."));
52       unicode_str = NULL;
53     }
54 
55   return gdbpy_ref<> (unicode_str);
56 }
57 
58 /* Returns a newly allocated string with the contents of the given unicode
59    string object converted to CHARSET.  If an error occurs during the
60    conversion, NULL will be returned and a python exception will be
61    set.  */
62 static gdb::unique_xmalloc_ptr<char>
unicode_to_encoded_string(PyObject * unicode_str,const char * charset)63 unicode_to_encoded_string (PyObject *unicode_str, const char *charset)
64 {
65   /* Translate string to named charset.  */
66   gdbpy_ref<> string (PyUnicode_AsEncodedString (unicode_str, charset, NULL));
67   if (string == NULL)
68     return NULL;
69 
70   return gdb::unique_xmalloc_ptr<char>
71     (xstrdup (PyBytes_AsString (string.get ())));
72 }
73 
74 /* Returns a PyObject with the contents of the given unicode string
75    object converted to a named charset.  If an error occurs during
76    the conversion, NULL will be returned and a python exception will
77    be set.  */
78 static gdbpy_ref<>
unicode_to_encoded_python_string(PyObject * unicode_str,const char * charset)79 unicode_to_encoded_python_string (PyObject *unicode_str, const char *charset)
80 {
81   /* Translate string to named charset.  */
82   return gdbpy_ref<> (PyUnicode_AsEncodedString (unicode_str, charset, NULL));
83 }
84 
85 /* Returns a newly allocated string with the contents of the given
86    unicode string object converted to the target's charset.  If an
87    error occurs during the conversion, NULL will be returned and a
88    python exception will be set.  */
89 gdb::unique_xmalloc_ptr<char>
unicode_to_target_string(PyObject * unicode_str)90 unicode_to_target_string (PyObject *unicode_str)
91 {
92   return (unicode_to_encoded_string
93             (unicode_str,
94              target_charset (gdbpy_enter::get_gdbarch ())));
95 }
96 
97 /* Returns a PyObject with the contents of the given unicode string
98    object converted to the target's charset.  If an error occurs
99    during the conversion, NULL will be returned and a python exception
100    will be set.  */
101 static gdbpy_ref<>
unicode_to_target_python_string(PyObject * unicode_str)102 unicode_to_target_python_string (PyObject *unicode_str)
103 {
104   return (unicode_to_encoded_python_string
105             (unicode_str,
106              target_charset (gdbpy_enter::get_gdbarch ())));
107 }
108 
109 /* Converts a python string (8-bit or unicode) to a target string in
110    the target's charset.  Returns NULL on error, with a python
111    exception set.  */
112 gdb::unique_xmalloc_ptr<char>
python_string_to_target_string(PyObject * obj)113 python_string_to_target_string (PyObject *obj)
114 {
115   gdbpy_ref<> str = python_string_to_unicode (obj);
116   if (str == NULL)
117     return NULL;
118 
119   return unicode_to_target_string (str.get ());
120 }
121 
122 /* Converts a python string (8-bit or unicode) to a target string in the
123    target's charset.  Returns NULL on error, with a python exception
124    set.
125 
126    In Python 3, the returned object is a "bytes" object (not a string).  */
127 gdbpy_ref<>
python_string_to_target_python_string(PyObject * obj)128 python_string_to_target_python_string (PyObject *obj)
129 {
130   gdbpy_ref<> str = python_string_to_unicode (obj);
131   if (str == NULL)
132     return str;
133 
134   return unicode_to_target_python_string (str.get ());
135 }
136 
137 /* Converts a python string (8-bit or unicode) to a target string in
138    the host's charset.  Returns NULL on error, with a python exception
139    set.  */
140 gdb::unique_xmalloc_ptr<char>
python_string_to_host_string(PyObject * obj)141 python_string_to_host_string (PyObject *obj)
142 {
143   gdbpy_ref<> str = python_string_to_unicode (obj);
144   if (str == NULL)
145     return NULL;
146 
147   return unicode_to_encoded_string (str.get (), host_charset ());
148 }
149 
150 /* Convert a host string to a python string.  */
151 
152 gdbpy_ref<>
host_string_to_python_string(const char * str)153 host_string_to_python_string (const char *str)
154 {
155   return gdbpy_ref<> (PyUnicode_Decode (str, strlen (str), host_charset (),
156                                                   NULL));
157 }
158 
159 /* Return true if OBJ is a Python string or unicode object, false
160    otherwise.  */
161 
162 int
gdbpy_is_string(PyObject * obj)163 gdbpy_is_string (PyObject *obj)
164 {
165   return PyUnicode_Check (obj);
166 }
167 
168 /* Return the string representation of OBJ, i.e., str (obj).
169    If the result is NULL a python error occurred, the caller must clear it.  */
170 
171 gdb::unique_xmalloc_ptr<char>
gdbpy_obj_to_string(PyObject * obj)172 gdbpy_obj_to_string (PyObject *obj)
173 {
174   gdbpy_ref<> str_obj (PyObject_Str (obj));
175 
176   if (str_obj != NULL)
177     return python_string_to_host_string (str_obj.get ());
178 
179   return NULL;
180 }
181 
182 /* See python-internal.h.  */
183 
184 gdb::unique_xmalloc_ptr<char>
to_string()185 gdbpy_err_fetch::to_string () const
186 {
187   /* There are a few cases to consider.
188      For example:
189      value is a string when PyErr_SetString is used.
190      value is not a string when raise "foo" is used, instead it is None
191      and type is "foo".
192      So the algorithm we use is to print `str (value)' if it's not
193      None, otherwise we print `str (type)'.
194      Using str (aka PyObject_Str) will fetch the error message from
195      gdb.GdbError ("message").  */
196 
197   gdbpy_ref<> value = this->value ();
198   if (value.get () != nullptr && value.get () != Py_None)
199     return gdbpy_obj_to_string (value.get ());
200   else
201     return gdbpy_obj_to_string (this->type ().get ());
202 }
203 
204 /* See python-internal.h.  */
205 
206 gdb::unique_xmalloc_ptr<char>
type_to_string()207 gdbpy_err_fetch::type_to_string () const
208 {
209   return gdbpy_obj_to_string (this->type ().get ());
210 }
211 
212 /* Convert a GDB exception to the appropriate Python exception.
213 
214    This sets the Python error indicator.  */
215 
216 void
gdbpy_convert_exception(const struct gdb_exception & exception)217 gdbpy_convert_exception (const struct gdb_exception &exception)
218 {
219   PyObject *exc_class;
220 
221   if (exception.reason == RETURN_QUIT)
222     exc_class = PyExc_KeyboardInterrupt;
223   else if (exception.reason == RETURN_FORCED_QUIT)
224     quit_force (NULL, 0);
225   else if (exception.error == MEMORY_ERROR)
226     exc_class = gdbpy_gdb_memory_error;
227   else
228     exc_class = gdbpy_gdb_error;
229 
230   PyErr_Format (exc_class, "%s", exception.what ());
231 }
232 
233 /* Converts OBJ to a CORE_ADDR value.
234 
235    Returns 0 on success or -1 on failure, with a Python exception set.
236 */
237 
238 int
get_addr_from_python(PyObject * obj,CORE_ADDR * addr)239 get_addr_from_python (PyObject *obj, CORE_ADDR *addr)
240 {
241   if (gdbpy_is_value_object (obj))
242     {
243 
244       try
245           {
246             *addr = value_as_address (value_object_to_value (obj));
247           }
248       catch (const gdb_exception &except)
249           {
250             GDB_PY_SET_HANDLE_EXCEPTION (except);
251           }
252     }
253   else
254     {
255       gdbpy_ref<> num (PyNumber_Long (obj));
256       gdb_py_ulongest val;
257 
258       if (num == NULL)
259           return -1;
260 
261       val = gdb_py_long_as_ulongest (num.get ());
262       if (PyErr_Occurred ())
263           return -1;
264 
265       if (sizeof (val) > sizeof (CORE_ADDR) && ((CORE_ADDR) val) != val)
266           {
267             PyErr_SetString (PyExc_ValueError,
268                                  _("Overflow converting to address."));
269             return -1;
270           }
271 
272       *addr = val;
273     }
274 
275   return 0;
276 }
277 
278 /* Convert a LONGEST to the appropriate Python object -- either an
279    integer object or a long object, depending on its value.  */
280 
281 gdbpy_ref<>
gdb_py_object_from_longest(LONGEST l)282 gdb_py_object_from_longest (LONGEST l)
283 {
284   if (sizeof (l) > sizeof (long))
285     return gdbpy_ref<> (PyLong_FromLongLong (l));
286   return gdbpy_ref<> (PyLong_FromLong (l));
287 }
288 
289 /* Convert a ULONGEST to the appropriate Python object -- either an
290    integer object or a long object, depending on its value.  */
291 
292 gdbpy_ref<>
gdb_py_object_from_ulongest(ULONGEST l)293 gdb_py_object_from_ulongest (ULONGEST l)
294 {
295   if (sizeof (l) > sizeof (unsigned long))
296     return gdbpy_ref<> (PyLong_FromUnsignedLongLong (l));
297   return gdbpy_ref<> (PyLong_FromUnsignedLong (l));
298 }
299 
300 /* Like PyLong_AsLong, but returns 0 on failure, 1 on success, and puts
301    the value into an out parameter.  */
302 
303 int
gdb_py_int_as_long(PyObject * obj,long * result)304 gdb_py_int_as_long (PyObject *obj, long *result)
305 {
306   *result = PyLong_AsLong (obj);
307   return ! (*result == -1 && PyErr_Occurred ());
308 }
309 
310 
311 
312 /* Generic implementation of the __dict__ attribute for objects that
313    have a dictionary.  The CLOSURE argument should be the type object.
314    This only handles positive values for tp_dictoffset.  */
315 
316 PyObject *
gdb_py_generic_dict(PyObject * self,void * closure)317 gdb_py_generic_dict (PyObject *self, void *closure)
318 {
319   PyObject *result;
320   PyTypeObject *type_obj = (PyTypeObject *) closure;
321   char *raw_ptr;
322 
323   raw_ptr = (char *) self + type_obj->tp_dictoffset;
324   result = * (PyObject **) raw_ptr;
325 
326   Py_INCREF (result);
327   return result;
328 }
329 
330 /* Like PyModule_AddObject, but does not steal a reference to
331    OBJECT.  */
332 
333 int
gdb_pymodule_addobject(PyObject * module,const char * name,PyObject * object)334 gdb_pymodule_addobject (PyObject *module, const char *name, PyObject *object)
335 {
336   int result;
337 
338   Py_INCREF (object);
339   result = PyModule_AddObject (module, name, object);
340   if (result < 0)
341     Py_DECREF (object);
342   return result;
343 }
344 
345 /* See python-internal.h.  */
346 
347 void
gdbpy_error(const char * fmt,...)348 gdbpy_error (const char *fmt, ...)
349 {
350   va_list ap;
351   va_start (ap, fmt);
352   std::string str = string_vprintf (fmt, ap);
353   va_end (ap);
354 
355   const char *msg = str.c_str ();
356   if (msg != nullptr && *msg != '\0')
357     error (_("Error occurred in Python: %s"), msg);
358   else
359     error (_("Error occurred in Python."));
360 }
361 
362 /* Handle a Python exception when the special gdb.GdbError treatment
363    is desired.  This should only be called when an exception is set.
364    If the exception is a gdb.GdbError, throw a gdb exception with the
365    exception text.  For other exceptions, print the Python stack and
366    then throw a gdb exception.  */
367 
368 void
gdbpy_handle_exception()369 gdbpy_handle_exception ()
370 {
371   gdbpy_err_fetch fetched_error;
372   gdb::unique_xmalloc_ptr<char> msg = fetched_error.to_string ();
373 
374   if (msg == NULL)
375     {
376       /* An error occurred computing the string representation of the
377            error message.  This is rare, but we should inform the user.  */
378       gdb_printf (_("An error occurred in Python "
379                         "and then another occurred computing the "
380                         "error message.\n"));
381       gdbpy_print_stack ();
382     }
383 
384   /* Don't print the stack for gdb.GdbError exceptions.
385      It is generally used to flag user errors.
386 
387      We also don't want to print "Error occurred in Python command"
388      for user errors.  However, a missing message for gdb.GdbError
389      exceptions is arguably a bug, so we flag it as such.  */
390 
391   if (fetched_error.type_matches (PyExc_KeyboardInterrupt))
392     throw_quit ("Quit");
393   else if (! fetched_error.type_matches (gdbpy_gdberror_exc)
394              || msg == NULL || *msg == '\0')
395     {
396       fetched_error.restore ();
397       gdbpy_print_stack ();
398       if (msg != NULL && *msg != '\0')
399           error (_("Error occurred in Python: %s"), msg.get ());
400       else
401           error (_("Error occurred in Python."));
402     }
403   else
404     error ("%s", msg.get ());
405 }
406 
407 /* See python-internal.h.  */
408 
409 gdb::unique_xmalloc_ptr<char>
gdbpy_fix_doc_string_indentation(gdb::unique_xmalloc_ptr<char> doc)410 gdbpy_fix_doc_string_indentation (gdb::unique_xmalloc_ptr<char> doc)
411 {
412   /* A structure used to track the white-space information on each line of
413      DOC.  */
414   struct line_whitespace
415   {
416     /* Constructor.  OFFSET is the offset from the start of DOC, WS_COUNT
417        is the number of whitespace characters starting at OFFSET.  */
418     line_whitespace (size_t offset, int ws_count)
419       : m_offset (offset),
420           m_ws_count (ws_count)
421     { /* Nothing.  */ }
422 
423     /* The offset from the start of DOC.  */
424     size_t offset () const
425     { return m_offset; }
426 
427     /* The number of white-space characters at the start of this line.  */
428     int ws () const
429     { return m_ws_count; }
430 
431   private:
432     /* The offset from the start of DOC to the first character of this
433        line.  */
434     size_t m_offset;
435 
436     /* White space count on this line, the first character of this
437        whitespace is at OFFSET.  */
438     int m_ws_count;
439   };
440 
441   /* Count the number of white-space character starting at TXT.  We
442      currently only count true single space characters, things like tabs,
443      newlines, etc are not counted.  */
444   auto count_whitespace = [] (const char *txt) -> int
445   {
446     int count = 0;
447 
448     while (*txt == ' ')
449       {
450           ++txt;
451           ++count;
452       }
453 
454     return count;
455   };
456 
457   /* In MIN_WHITESPACE we track the smallest number of whitespace
458      characters seen at the start of a line (that has actual content), this
459      is the number of characters that we can delete off all lines without
460      altering the relative indentation of all lines in DOC.
461 
462      The first line often has no indentation, but instead starts immediates
463      after the 3-quotes marker within the Python doc string, so, if the
464      first line has zero white-space then we just ignore it, and don't set
465      MIN_WHITESPACE to zero.
466 
467      Lines without any content should (ideally) have no white-space at
468      all, but if they do then they might have an artificially low number
469      (user left a single stray space at the start of an otherwise blank
470      line), we don't consider lines without content when updating the
471      MIN_WHITESPACE value.  */
472   std::optional<int> min_whitespace;
473 
474   /* The index into WS_INFO at which the processing of DOC can be
475      considered "all done", that is, after this point there are no further
476      lines with useful content and we should just stop.  */
477   std::optional<size_t> all_done_idx;
478 
479   /* White-space information for each line in DOC.  */
480   std::vector<line_whitespace> ws_info;
481 
482   /* Now look through DOC and collect the required information.  */
483   const char *tmp = doc.get ();
484   while (*tmp != '\0')
485     {
486       /* Add an entry for the offset to the start of this line, and how
487            much white-space there is at the start of this line.  */
488       size_t offset = tmp - doc.get ();
489       int ws_count = count_whitespace (tmp);
490       ws_info.emplace_back (offset, ws_count);
491 
492       /* Skip over the white-space.  */
493       tmp += ws_count;
494 
495       /* Remember where the content of this line starts, and skip forward
496            to either the end of this line (newline) or the end of the DOC
497            string (null character), whichever comes first.  */
498       const char *content_start = tmp;
499       while (*tmp != '\0' && *tmp != '\n')
500           ++tmp;
501 
502       /* If this is not the first line, and if this line has some content,
503            then update MIN_WHITESPACE, this reflects the smallest number of
504            whitespace characters we can delete from all lines without
505            impacting the relative indentation of all the lines of DOC.  */
506       if (offset > 0 && tmp > content_start)
507           {
508             if (!min_whitespace.has_value ())
509               min_whitespace = ws_count;
510             else
511               min_whitespace = std::min (*min_whitespace, ws_count);
512           }
513 
514       /* Each time we encounter a line that has some content we update
515            ALL_DONE_IDX to be the index of the next line.  If the last lines
516            of DOC don't contain any content then ALL_DONE_IDX will be left
517            pointing at an earlier line.  When we rewrite DOC, when we reach
518            ALL_DONE_IDX then we can stop, the allows us to trim any blank
519            lines from the end of DOC.  */
520       if (tmp > content_start)
521           all_done_idx = ws_info.size ();
522 
523       /* If we reached a newline then skip forward to the start of the next
524            line.  The other possibility at this point is that we're at the
525            very end of the DOC string (null terminator).  */
526       if (*tmp == '\n')
527           ++tmp;
528     }
529 
530   /* We found no lines with content, fail safe by just returning the
531      original documentation string.  */
532   if (!all_done_idx.has_value () || !min_whitespace.has_value ())
533     return doc;
534 
535   /* Setup DST and SRC, both pointing into the DOC string.  We're going to
536      rewrite DOC in-place, as we only ever make DOC shorter (by removing
537      white-space), thus we know this will not overflow.  */
538   char *dst = doc.get ();
539   char *src = doc.get ();
540 
541   /* Array indices used with DST, SRC, and WS_INFO respectively.  */
542   size_t dst_offset = 0;
543   size_t src_offset = 0;
544   size_t ws_info_offset = 0;
545 
546   /* Now, walk over the source string, this is the original DOC.  */
547   while (src[src_offset] != '\0')
548     {
549       /* If we are at the start of the next line (in WS_INFO), then we may
550            need to skip some white-space characters.  */
551       if (src_offset == ws_info[ws_info_offset].offset ())
552           {
553             /* If a line has leading white-space then we need to skip over
554                some number of characters now.  */
555             if (ws_info[ws_info_offset].ws () > 0)
556               {
557                 /* If the line is entirely white-space then we skip all of
558                      the white-space, the next character to copy will be the
559                      newline or null character.  Otherwise, we skip the just
560                      some portion of the leading white-space.  */
561                 if (src[src_offset + ws_info[ws_info_offset].ws ()] == '\n'
562                       || src[src_offset + ws_info[ws_info_offset].ws ()] == '\0')
563                     src_offset += ws_info[ws_info_offset].ws ();
564                 else
565                     src_offset += std::min (*min_whitespace,
566                                                   ws_info[ws_info_offset].ws ());
567 
568                 /* If we skipped white-space, and are now at the end of the
569                      input, then we're done.  */
570                 if (src[src_offset] == '\0')
571                     break;
572               }
573             if (ws_info_offset < (ws_info.size () - 1))
574               ++ws_info_offset;
575             if (ws_info_offset > *all_done_idx)
576               break;
577           }
578 
579       /* Don't copy a newline to the start of the DST string, this would
580            result in a leading blank line.  But in all other cases, copy the
581            next character into the destination string.  */
582       if ((dst_offset > 0 || src[src_offset] != '\n'))
583           {
584             dst[dst_offset] = src[src_offset];
585             ++dst_offset;
586           }
587 
588       /* Move to the next source character.  */
589       ++src_offset;
590     }
591 
592   /* Remove the trailing newline character(s), and ensure we have a null
593      terminator in place.  */
594   while (dst_offset > 1 && dst[dst_offset - 1] == '\n')
595     --dst_offset;
596   dst[dst_offset] = '\0';
597 
598   return doc;
599 }
600 
601 /* See python-internal.h.  */
602 
603 PyObject *
gdb_py_invalid_object_repr(PyObject * self)604 gdb_py_invalid_object_repr (PyObject *self)
605 {
606   return PyUnicode_FromFormat ("<%s (invalid)>", Py_TYPE (self)->tp_name);
607 }
608