Skip to content

Commit

Permalink
pch: Fix streaming of strings with embedded null bytes
Browse files Browse the repository at this point in the history
When a GTY'ed struct is streamed to PCH, any plain char* pointers it contains
(whether they live in GC-controlled memory or not) will be marked for PCH
output by the routine gt_pch_note_object in ggc-common.cc. This routine
special-cases plain char* strings, and in particular it uses strlen() to get
their length. Thus it does not handle strings with embedded null bytes, but it
is possible for something PCH cares about (such as a string literal token in a
macro definition) to contain such embedded nulls. To fix that up, add a new
GTY option "string_length" so that gt_pch_note_object can be informed the
actual length it ought to use, and use it in the relevant libcpp structs
(cpp_string and ht_identifier) accordingly.

gcc/ChangeLog:

	* gengtype.cc (output_escaped_param): Add missing const.
	(get_string_option): Add missing check for option type.
	(walk_type): Support new "string_length" GTY option.
	(write_types_process_field): Likewise.
	* ggc-common.cc (gt_pch_note_object): Add optional length argument.
	* ggc.h (gt_pch_note_object): Adjust prototype for new argument.
	(gt_pch_n_S2): Declare...
	* stringpool.cc (gt_pch_n_S2): ...new function.
	* doc/gty.texi: Document new GTY((string_length)) option.

libcpp/ChangeLog:

	* include/cpplib.h (struct cpp_string): Use new "string_length" GTY.
	* include/symtab.h (struct ht_identifier): Likewise.

gcc/testsuite/ChangeLog:

	* g++.dg/pch/pch-string-nulls.C: New test.
	* g++.dg/pch/pch-string-nulls.Hs: New test.
  • Loading branch information
Lewis Hyatt committed Oct 19, 2022
1 parent 09fed44 commit f3b957e
Show file tree
Hide file tree
Showing 9 changed files with 68 additions and 10 deletions.
21 changes: 20 additions & 1 deletion gcc/doc/gty.texi
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,26 @@ static GTY((length("reg_known_value_size"))) rtx *reg_known_value;
Note that the @code{length} option is only meant for use with arrays of
non-atomic objects, that is, objects that contain pointers pointing to
other GTY-managed objects. For other GC-allocated arrays and strings
you should use @code{atomic}.
you should use @code{atomic} or @code{string_length}.

@findex string_length
@item string_length ("@var{expression}")

In order to simplify production of PCH, a structure member that is a plain
array of bytes (an optionally @code{const} and/or @code{unsigned} @code{char
*}) is treated specially by the infrastructure. Even if such an array has not
been allocated in GC-controlled memory, it will still be written properly into
a PCH. The machinery responsible for this needs to know the length of the
data; by default, the length is determined by calling @code{strlen} on the
pointer. The @code{string_length} option specifies an alternate way to
determine the length, such as by inspecting another struct member:

@smallexample
struct GTY(()) non_terminated_string @{
size_t sz;
const char * GTY((string_length ("%h.sz"))) data;
@};
@end smallexample

@findex skip
@item skip
Expand Down
25 changes: 21 additions & 4 deletions gcc/gengtype.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2403,7 +2403,7 @@ struct write_types_data
enum write_types_kinds kind;
};

static void output_escaped_param (struct walk_type_data *d,
static void output_escaped_param (const struct walk_type_data *d,
const char *, const char *);
static void output_mangled_typename (outf_p, const_type_p);
static void walk_type (type_p t, struct walk_type_data *d);
Expand Down Expand Up @@ -2537,7 +2537,7 @@ output_mangled_typename (outf_p of, const_type_p t)
print error messages. */

static void
output_escaped_param (struct walk_type_data *d, const char *param,
output_escaped_param (const struct walk_type_data *d, const char *param,
const char *oname)
{
const char *p;
Expand Down Expand Up @@ -2576,7 +2576,7 @@ const char *
get_string_option (options_p opt, const char *key)
{
for (; opt; opt = opt->next)
if (strcmp (opt->name, key) == 0)
if (opt->kind == OPTION_STRING && strcmp (opt->name, key) == 0)
return opt->info.string;
return NULL;
}
Expand Down Expand Up @@ -2700,6 +2700,8 @@ walk_type (type_p t, struct walk_type_data *d)
;
else if (strcmp (oo->name, "callback") == 0)
;
else if (strcmp (oo->name, "string_length") == 0)
;
else
error_at_line (d->line, "unknown option `%s'\n", oo->name);

Expand Down Expand Up @@ -3251,7 +3253,22 @@ write_types_process_field (type_p f, const struct walk_type_data *d)
{
oprintf (d->of, "%*sgt_%s_", d->indent, "", wtd->prefix);
output_mangled_typename (d->of, f);
oprintf (d->of, " (%s%s);\n", cast, d->val);

/* Check if we need to call the special pch note version
for strings that takes an explicit length. */
const auto length_override
= (f->kind == TYPE_STRING && !strcmp (wtd->prefix, "pch_n")
? get_string_option (d->opt, "string_length")
: nullptr);
if (length_override)
{
oprintf (d->of, "2 (%s%s, ", cast, d->val);
output_escaped_param (d, length_override, "string_length");
}
else
oprintf (d->of, " (%s%s", cast, d->val);

oprintf (d->of, ");\n");
if (d->reorder_fn && wtd->reorder_note_routine)
oprintf (d->of, "%*s%s (%s%s, %s%s, %s);\n", d->indent, "",
wtd->reorder_note_routine, cast, d->val, cast, d->val,
Expand Down
7 changes: 5 additions & 2 deletions gcc/ggc-common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,8 @@ static vec<void *> reloc_addrs_vec;

int
gt_pch_note_object (void *obj, void *note_ptr_cookie,
gt_note_pointers note_ptr_fn)
gt_note_pointers note_ptr_fn,
size_t length_override)
{
struct ptr_data **slot;

Expand All @@ -273,7 +274,9 @@ gt_pch_note_object (void *obj, void *note_ptr_cookie,
(*slot)->obj = obj;
(*slot)->note_ptr_fn = note_ptr_fn;
(*slot)->note_ptr_cookie = note_ptr_cookie;
if (note_ptr_fn == gt_pch_p_S)
if (length_override != (size_t)-1)
(*slot)->size = length_override;
else if (note_ptr_fn == gt_pch_p_S)
(*slot)->size = strlen ((const char *)obj) + 1;
else
(*slot)->size = ggc_get_size (obj);
Expand Down
4 changes: 3 additions & 1 deletion gcc/ggc.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ typedef void (*gt_handle_reorder) (void *, void *, gt_pointer_operator,
void *);

/* Used by the gt_pch_n_* routines. Register an object in the hash table. */
extern int gt_pch_note_object (void *, void *, gt_note_pointers);
extern int gt_pch_note_object (void *, void *, gt_note_pointers,
size_t length_override = (size_t)-1);

/* Used by the gt_pch_p_* routines. Register address of a callback
pointer. */
Expand Down Expand Up @@ -101,6 +102,7 @@ extern int ggc_marked_p (const void *);

/* PCH and GGC handling for strings, mostly trivial. */
extern void gt_pch_n_S (const void *);
extern void gt_pch_n_S2 (const void *, size_t);
extern void gt_ggc_m_S (const void *);

/* End of GTY machinery API. */
Expand Down
7 changes: 7 additions & 0 deletions gcc/stringpool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,13 @@ gt_pch_n_S (const void *x)
&gt_pch_p_S);
}

void
gt_pch_n_S2 (const void *x, size_t string_len)
{
gt_pch_note_object (CONST_CAST (void *, x), CONST_CAST (void *, x),
&gt_pch_p_S, string_len);
}


/* User-callable entry point for marking string X. */

Expand Down
3 changes: 3 additions & 0 deletions gcc/testsuite/g++.dg/pch/pch-string-nulls.C
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// { dg-do compile { target c++11 } }
#include "pch-string-nulls.H"
static_assert (X[4] == '[' && X[5] == '!' && X[6] == ']', "error");
Binary file added gcc/testsuite/g++.dg/pch/pch-string-nulls.Hs
Binary file not shown.
6 changes: 5 additions & 1 deletion libcpp/include/cpplib.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,11 @@ enum c_lang {CLK_GNUC89 = 0, CLK_GNUC99, CLK_GNUC11, CLK_GNUC17, CLK_GNUC2X,
/* Payload of a NUMBER, STRING, CHAR or COMMENT token. */
struct GTY(()) cpp_string {
unsigned int len;
const unsigned char *text;

/* TEXT is always null terminated (terminator not included in len); but this
GTY markup arranges that PCH streaming works properly even if there is a
null byte in the middle of the string. */
const unsigned char * GTY((string_length ("1 + %h.len"))) text;
};

/* Flags for the cpp_token structure. */
Expand Down
5 changes: 4 additions & 1 deletion libcpp/include/symtab.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@ along with this program; see the file COPYING3. If not see
typedef struct ht_identifier ht_identifier;
typedef struct ht_identifier *ht_identifier_ptr;
struct GTY(()) ht_identifier {
const unsigned char *str;
/* This GTY markup arranges that the null-terminated identifier would still
stream to PCH correctly, if a null byte were to make its way into an
identifier somehow. */
const unsigned char * GTY((string_length ("1 + %h.len"))) str;
unsigned int len;
unsigned int hash_value;
};
Expand Down

0 comments on commit f3b957e

Please sign in to comment.