Perf is adding Rust support

Datetime:2016-08-23 00:02:20          Topic: Rust           Share

@@ -0,0 +1,269 @@

+#include <string.h>

+#include "util.h"

+#include "debug.h"

+

+#include "demangle-rust.h"

+

+/*

+ * Mangled Rust symbols look like this:

+ *

+ * _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a

+ *

+ * The original symbol is:

+ *

+ * <std::sys::fd::FileDesc as core::ops::Drop>::drop

+ *

+ * The last component of the path is a 64-bit hash in lowercase hex, prefixed

+ * with "h". Rust does not have a global namespace between crates, an illusion

+ * which Rust maintains by using the hash to distinguish things that would

+ * otherwise have the same symbol.

+ *

+ * Any path component not starting with a XID_Start character is prefixed with

+ * "_".

+ *

+ * The following escape sequences are used:

+ *

+ * "," => $C$

+ * "@" => $SP$

+ * "*" => $BP$

+ * "&" => $RF$

+ * "<" => $LT$

+ * ">" => $GT$

+ * "(" => $LP$

+ * ")" => $RP$

+ * " " => $u20$

+ * "'" => $u27$

+ * "[" => $u5b$

+ * "]" => $u5d$

+ * "~" => $u7e$

+ *

+ * A double ".." means "::" and a single "." means "-".

+ *

+ * The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$

+ */

+

+static const char *hash_prefix = "::h";

+static const size_t hash_prefix_len = 3;

+static const size_t hash_len = 16;

+

+static bool is_prefixed_hash(const char *start);

+static bool looks_like_rust(const char *sym, size_t len);

+static bool unescape(const char **in, char **out, const char *seq, char value);

+

+/*

+ * INPUT:

+ * sym: symbol that has been through BFD-demangling

+ *

+ * This function looks for the following indicators:

+ *

+ * 1. The hash must consist of "h" followed by 16 lowercase hex digits.

+ *

+ * 2. As a sanity check, the hash must use between 5 and 15 of the 16 possible

+ * hex digits. This is true of 99.9998% of hashes so once in your life you

+ * may see a false negative. The point is to notice path components that

+ * could be Rust hashes but are probably not, like "haaaaaaaaaaaaaaaa". In

+ * this case a false positive (non-Rust symbol has an important path

+ * component removed because it looks like a Rust hash) is worse than a

+ * false negative (the rare Rust symbol is not demangled) so this sets the

+ * balance in favor of false negatives.

+ *

+ * 3. There must be no characters other than a-zA-Z0-9 and _.:$

+ *

+ * 4. There must be no unrecognized $-sign sequences.

+ *

+ * 5. There must be no sequence of three or more dots in a row ("...").

+ */

+bool

+rust_is_mangled(const char *sym)

+{

+ size_t len, len_without_hash;

+

+ if (!sym)

+ return false;

+

+ len = strlen(sym);

+ if (len <= hash_prefix_len + hash_len)

+ /* Not long enough to contain "::h" + hash + something else */

+ return false;

+

+ len_without_hash = len - (hash_prefix_len + hash_len);

+ if (!is_prefixed_hash(sym + len_without_hash))

+ return false;

+

+ return looks_like_rust(sym, len_without_hash);

+}

+

+/*

+ * A hash is the prefix "::h" followed by 16 lowercase hex digits. The hex

+ * digits must comprise between 5 and 15 (inclusive) distinct digits.

+ */

+static bool is_prefixed_hash(const char *str)

+{

+ const char *end;

+ bool seen[16];

+ size_t i;

+ int count;

+

+ if (strncmp(str, hash_prefix, hash_prefix_len))

+ return false;

+ str += hash_prefix_len;

+

+ memset(seen, false, sizeof(seen));

+ for (end = str + hash_len; str < end; str++)

+ if (*str >= '0' && *str <= '9')

+ seen[*str - '0'] = true;

+ else if (*str >= 'a' && *str <= 'f')

+ seen[*str - 'a' + 10] = true;

+ else

+ return false;

+

+ /* Count how many distinct digits seen */

+ count = 0;

+ for (i = 0; i < 16; i++)

+ if (seen[i])

+ count++;

+

+ return count >= 5 && count <= 15;

+}

+

+static bool looks_like_rust(const char *str, size_t len)

+{

+ const char *end = str + len;

+

+ while (str < end)

+ switch (*str) {

+ case '$':

+ if (!strncmp(str, "$C$", 3))

+ str += 3;

+ else if (!strncmp(str, "$SP$", 4)

+ || !strncmp(str, "$BP$", 4)

+ || !strncmp(str, "$RF$", 4)

+ || !strncmp(str, "$LT$", 4)

+ || !strncmp(str, "$GT$", 4)

+ || !strncmp(str, "$LP$", 4)

+ || !strncmp(str, "$RP$", 4))

+ str += 4;

+ else if (!strncmp(str, "$u20$", 5)

+ || !strncmp(str, "$u27$", 5)

+ || !strncmp(str, "$u5b$", 5)

+ || !strncmp(str, "$u5d$", 5)

+ || !strncmp(str, "$u7e$", 5))

+ str += 5;

+ else

+ return false;

+ break;

+ case '.':

+ /* Do not allow three or more consecutive dots */

+ if (!strncmp(str, "...", 3))

+ return false;

+ /* Fall through */

+ case 'a' ... 'z':

+ case 'A' ... 'Z':

+ case '0' ... '9':

+ case '_':

+ case ':':

+ str++;

+ break;

+ default:

+ return false;

+ }

+

+ return true;

+}

+

+/*

+ * INPUT:

+ * sym: symbol for which rust_is_mangled(sym) returns true

+ *

+ * The input is demangled in-place because the mangled name is always longer

+ * than the demangled one.

+ */

+void

+rust_demangle_sym(char *sym)

+{

+ const char *in;

+ char *out;

+ const char *end;

+

+ if (!sym)

+ return;

+

+ in = sym;

+ out = sym;

+ end = sym + strlen(sym) - (hash_prefix_len + hash_len);

+

+ while (in < end)

+ switch (*in) {

+ case '$':

+ if (!(unescape(&in, &out, "$C$", ',')

+ || unescape(&in, &out, "$SP$", '@')

+ || unescape(&in, &out, "$BP$", '*')

+ || unescape(&in, &out, "$RF$", '&')

+ || unescape(&in, &out, "$LT$", '<')

+ || unescape(&in, &out, "$GT$", '>')

+ || unescape(&in, &out, "$LP$", '(')

+ || unescape(&in, &out, "$RP$", ')')

+ || unescape(&in, &out, "$u20$", ' ')

+ || unescape(&in, &out, "$u27$", '\'')

+ || unescape(&in, &out, "$u5b$", '[')

+ || unescape(&in, &out, "$u5d$", ']')

+ || unescape(&in, &out, "$u7e$", '~'))) {

+ pr_err("demangle-rust: unexpected escape sequence");

+ goto done;

+ }

+ break;

+ case '_':

+ /*

+ * If this is the start of a path component and the next

+ * character is an escape sequence, ignore the

+ * underscore. The mangler inserts an underscore to make

+ * sure the path component begins with a XID_Start

+ * character.

+ */

+ if ((in == sym || in[-1] == ':') && in[1] == '$')

+ in++;

+ else

+ *out++ = *in++;

+ break;

+ case '.':

+ if (in[1] == '.') {

+ /* ".." becomes "::" */

+ *out++ = ':';

+ *out++ = ':';

+ in += 2;

+ } else {

+ /* "." becomes "-" */

+ *out++ = '-';

+ in++;

+ }

+ break;

+ case 'a' ... 'z':

+ case 'A' ... 'Z':

+ case '0' ... '9':

+ case ':':

+ *out++ = *in++;

+ break;

+ default:

+ pr_err("demangle-rust: unexpected character '%c' in symbol\n",

+ *in);

+ goto done;

+ }

+

+done:

+ *out = '\0';

+}

+

+static bool unescape(const char **in, char **out, const char *seq, char value)

+{

+ size_t len = strlen(seq);

+

+ if (strncmp(*in, seq, len))

+ return false;

+

+ **out = value;

+

+ *in += len;

+ *out += 1;

+

+ return true;

+}





About List