sarif-replay: decode event IDs [PR123056]

Attempt to round-trip event IDs through in execution paths
through SARIF.

gcc/ChangeLog:
	PR sarif-replay/123056
	* libsarifreplay.cc: Include "json-pointer-parsing.h".
	(sarif_replayer::sarif_replayer): Initialize m_root_val.
	(sarif_replayer::m_root_val): New field.
	(sarif_replayer::replay_file): Store m_root_val.
	(sarif_replayer::append_embeddded_link): Add message_obj param.
	Attempt to decode intra-sarif links, turning them into event IDs.
	(sarif_replayer::decode_link_within_sarif): New.
	(sarif_replayer::make_plain_text_within_result_message): Pass
	message_obj to append_embeddded_link.

gcc/testsuite/ChangeLog:
	PR sarif-replay/123056
	* sarif-replay.dg/2.1.0-invalid/3.10.3-bad-json-pointer.sarif: New
	test.
	* sarif-replay.dg/2.1.0-valid/embedded-links-pr123056-check-sarif-roundtrip.py
	(test_roundtrip_of_url_in_generated_sarif): Update expected
	result, to expect the URL for the event.

Signed-off-by: David Malcolm <dmalcolm@redhat.com>
This commit is contained in:
David Malcolm
2026-02-24 17:54:39 -05:00
parent 45024ece3f
commit d03d0681fe
3 changed files with 197 additions and 5 deletions

View File

@@ -28,6 +28,7 @@ along with GCC; see the file COPYING3. If not see
#include "libgdiagnostics++.h"
#include "libgdiagnostics-private.h"
#include "json-parsing.h"
#include "json-pointer-parsing.h"
#include "intl.h"
#include "sarif-spec-urls.def"
#include "libsarifreplay.h"
@@ -307,6 +308,7 @@ public:
libgdiagnostics::manager &&control_manager)
: m_output_mgr (std::move (output_manager)),
m_control_mgr (std::move (control_manager)),
m_root_val (nullptr),
m_driver_obj (nullptr),
m_artifacts_arr (nullptr)
{
@@ -711,7 +713,12 @@ private:
void
append_embeddded_link (libgdiagnostics::message_buffer &result,
const embedded_link &link);
const embedded_link &link,
const json::object &message_obj);
const json::value *
decode_link_within_sarif (const char *dst,
const json::object &message_obj);
/* The manager to replay the SARIF files to. */
libgdiagnostics::manager m_output_mgr;
@@ -724,6 +731,7 @@ private:
json::simple_location_map m_json_location_map;
const json::value *m_root_val;
const json::object *m_driver_obj;
const json::array *m_artifacts_arr;
};
@@ -857,6 +865,7 @@ sarif_replayer::replay_file (const char *filename,
}
gcc_assert (result.m_val.get ());
m_root_val = result.m_val.get ();
return emit_sarif_as_diagnostics (*result.m_val.get ());
}
@@ -1563,11 +1572,36 @@ maybe_consume_embedded_link (const char *&iter_src)
void
sarif_replayer::append_embeddded_link (libgdiagnostics::message_buffer &result,
const embedded_link &link)
const embedded_link &link,
const json::object &message_obj)
{
/* We can't yet decode intra-sarif links, so simply use their text. */
/* Try to convert intra-sarif links into event ids. */
if (!strncmp (link.destination.c_str (), "sarif:/", strlen ("sarif:/")))
{
if (auto linked_val = decode_link_within_sarif (link.destination.c_str (),
message_obj))
{
/* Assume we have a threadFlowLocation object, and that it's
for the correct code flow. */
if (const json::object *linked_obj
= dyn_cast <const json::object *> (linked_val))
{
const property_spec_ref location_prop
("threadFlowLocation", "executionOrder", "3.38.11");
if (auto execution_order
= get_optional_property<json::integer_number> (*linked_obj,
location_prop))
if (execution_order->get () > 0)
{
diagnostic_event_id event_id = execution_order->get () - 1;
diagnostic_message_buffer_append_event_id (result.m_inner,
event_id);
return;
}
}
}
/* If we can't use the sarif link, simply use the text. */
result += link.text.c_str ();
return;
}
@@ -1576,6 +1610,29 @@ sarif_replayer::append_embeddded_link (libgdiagnostics::message_buffer &result,
result.end_url ();
}
const json::value *
sarif_replayer::decode_link_within_sarif (const char *dst,
const json::object &message_obj)
{
gcc_assert (!strncmp (dst, "sarif:/", strlen ("sarif:/")));
gcc_assert (m_root_val);
auto result
= json::pointer::parse_utf8_string (dst + strlen ("sarif:/") - 1,
m_root_val);
if (result.m_err)
{
const spec_ref uris_with_sarif_scheme ("3.10.3");
pp_token_buffer_element e (result.m_err->m_tokens);
report_invalid_sarif
(message_obj, uris_with_sarif_scheme,
"error parsing JSON pointer in SARIF link %qs: %e",
dst, &e);
return nullptr;
}
return result.m_val;
}
/* Lookup the plain text string within a result.message (§3.27.11),
and substitute for any placeholders (§3.11.5) and handle any
embedded links (§3.11.6).
@@ -1660,7 +1717,7 @@ make_plain_text_within_result_message (const json::object *tool_component_obj,
}
}
else if (auto link = maybe_consume_embedded_link (iter_src))
append_embeddded_link (result, *link);
append_embeddded_link (result, *link, message_obj);
else
{
result += ch;

View File

@@ -0,0 +1,135 @@
{"$schema": "https://docs.oasis-open.org/sarif/sarif/v2.1.0/errata01/os/schemas/sarif-schema-2.1.0.json",
"version": "2.1.0",
"runs": [{"tool": {"driver": {"name": "GNU C23",
"fullName": "GNU C23 (GCC) version 16.0.1 20260114 (experimental) (x86_64-pc-linux-gnu)",
"version": "16.0.1 20260114 (experimental)",
"informationUri": "https://gcc.gnu.org/gcc-16/",
"rules": [{"id": "-Wanalyzer-malloc-leak",
"helpUri": "https://gcc.gnu.org/onlinedocs/gcc/Static-Analyzer-Options.html#index-Wanalyzer-malloc-leak"}]}},
"taxonomies": [{"name": "CWE",
"version": "4.7",
"organization": "MITRE",
"shortDescription": {"text": "The MITRE Common Weakness Enumeration"},
"taxa": [{"id": "401",
"helpUri": "https://cwe.mitre.org/data/definitions/401.html"}]}],
"invocations": [{"arguments": ["./cc1",
"-quiet",
"-iprefix",
"/home/david/coding-3/gcc-newgit-queued-for-next-stage-1/build/gcc/../lib/gcc/x86_64-pc-linux-gnu/16.0.1/",
"-isystem",
"./include",
"-isystem",
"./include-fixed",
"pr123056.c",
"-quiet",
"-dumpbase",
"pr123056.c",
"-dumpbase-ext",
".c",
"-mtune=generic",
"-march=x86-64",
"-fanalyzer",
"-fdiagnostics-add-output=sarif",
"-fdiagnostics-add-output=experimental-html",
"-o",
"pr123056.s"],
"workingDirectory": {"uri": "/home/david/coding-3/gcc-newgit-queued-for-next-stage-1/build/gcc"},
"startTimeUtc": "2026-01-16T17:43:19Z",
"executionSuccessful": true,
"toolExecutionNotifications": [],
"endTimeUtc": "2026-01-16T17:43:19Z"}],
"originalUriBaseIds": {"PWD": {"uri": "file:///home/david/coding-3/gcc-newgit-queued-for-next-stage-1/build/gcc/"}},
"artifacts": [{"location": {"uri": "pr123056.c",
"uriBaseId": "PWD"},
"sourceLanguage": "c",
"contents": {"text": "void test (void)\n{\n void *p = __builtin_malloc (1024);\n}\n"},
"roles": ["analysisTarget",
"tracedFile"]}],
"results": [{"ruleId": "-Wanalyzer-malloc-leak",
"taxa": [{"id": "401",
"toolComponent": {"name": "cwe"}}],
"properties": {"gcc/analyzer/saved_diagnostic/sm": "malloc",
"gcc/analyzer/saved_diagnostic/ploc": {"enode": 5},
"gcc/analyzer/saved_diagnostic/var": "p_3",
"gcc/analyzer/saved_diagnostic/sval": "&HEAP_ALLOCATED_REGION(14)",
"gcc/analyzer/saved_diagnostic/state": "unchecked ({free})",
"gcc/analyzer/saved_diagnostic/idx": 0,
"gcc/analyzer/saved_diagnostic/duplicates": [{"properties": {"gcc/analyzer/saved_diagnostic/sm": "malloc",
"gcc/analyzer/saved_diagnostic/ploc": {"enode": 5},
"gcc/analyzer/saved_diagnostic/var": "p_3",
"gcc/analyzer/saved_diagnostic/sval": "&HEAP_ALLOCATED_REGION(14)",
"gcc/analyzer/saved_diagnostic/state": "unchecked ({free})",
"gcc/analyzer/saved_diagnostic/idx": 1,
"gcc/analyzer/pending_diagnostic/kind": "malloc_leak"}}],
"gcc/analyzer/pending_diagnostic/kind": "malloc_leak"},
"level": "warning",
"message": {"text": "leak of p"},
"locations": [{"physicalLocation": {"artifactLocation": {"uri": "pr123056.c",
"uriBaseId": "PWD"},
"region": {"startLine": 4,
"startColumn": 1,
"endColumn": 2},
"contextRegion": {"startLine": 4,
"snippet": {"text": "}\n"}}},
"logicalLocations": [{"index": 0,
"fullyQualifiedName": "test"}]}],
"codeFlows": [{"threadFlows": [{"id": "main",
"locations": [{"properties": {"gcc/analyzer/checker_event/emission_id": "(1)",
"gcc/analyzer/checker_event/kind": "state_change"},
"location": {"physicalLocation": {"artifactLocation": {"uri": "pr123056.c",
"uriBaseId": "PWD"},
"region": {"startLine": 3,
"startColumn": 13,
"endColumn": 36},
"contextRegion": {"startLine": 3,
"snippet": {"text": " void *p = __builtin_malloc (1024);\n"}}},
"logicalLocations": [{"index": 0,
"fullyQualifiedName": "test"}],
"message": {"text": "allocated here"}},
"kinds": ["acquire",
"memory"],
"nestingLevel": 1,
"executionOrder": 1},
{"properties": {"gcc/analyzer/checker_event/emission_id": "(2)",
"gcc/analyzer/checker_event/kind": "warning"},
"location": {"physicalLocation": {"artifactLocation": {"uri": "pr123056.c",
"uriBaseId": "PWD"},
"region": {"startLine": 4,
"startColumn": 1,
"endColumn": 2},
"contextRegion": {"startLine": 4,
"snippet": {"text": "}\n"}}},
"logicalLocations": [{"index": 0,
"fullyQualifiedName": "test"}],
"message": {"text": "p leaks here; was allocated at [(1)](sarif:/runs/1066/results/0/codeFlows/0/threadFlows/0/locations/0)"}}, // { dg-error "array index 1066 out of range for array '/runs'" }
"kinds": ["danger"],
"nestingLevel": 1,
"executionOrder": 2}]}]}]}],
"logicalLocations": [{"name": "test",
"fullyQualifiedName": "test",
"decoratedName": "test",
"kind": "function",
"index": 0}]}]}
/* { dg-begin-multiline-output "" }
In JSON object '/runs/0/results/0/codeFlows/0/threadFlows/0/locations/1/location/message':
{ dg-end-multiline-output "" } */
/* { dg-begin-multiline-output "" }
104 | "message": {"text": "p leaks here; was allocated at [(1)](sarif:/runs/1066/results/0/codeFlows/0/threadFlows/0/locations/0)"}},
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
{ dg-end-multiline-output "" } */
/* { dg-begin-multiline-output "" }
In function 'test':
pr123056.c:4:1: warning: leak of p [-Wanalyzer-malloc-leak]
4 | }
| ^
'test': events 1-2
3 | void *p = __builtin_malloc (1024);
| ^~~~~~~~~~~~~~~~~~~~~~~
| |
| (1) allocated here
4 | }
| ~
| |
| (2) p leaks here; was allocated at (1)
{ dg-end-multiline-output "" } */

View File

@@ -11,4 +11,4 @@ def test_roundtrip_of_url_in_generated_sarif(sarif):
assert result['level'] == 'warning'
assert result['message']['text'] == "leak of p"
assert (result['codeFlows'][0]['threadFlows'][0]['locations'][1]['location']['message']['text']
== "p leaks here; was allocated at (1)")
== "p leaks here; was allocated at [(1)](sarif:/runs/0/results/0/codeFlows/0/threadFlows/0/locations/0)")