diff --git a/Makefile.in b/Makefile.in index 0c47bf87d..7637800c0 100644 --- a/Makefile.in +++ b/Makefile.in @@ -2691,11 +2691,13 @@ src_tools_linux_dump_syms_dump_syms_SOURCES = \ src/tools/linux/dump_syms/dump_syms.cc src_tools_linux_dump_syms_dump_syms_CXXFLAGS = \ - $(RUSTC_DEMANGLE_CFLAGS) + $(RUSTC_DEMANGLE_CFLAGS) \ + $(ZSTD_CFLAGS) src_tools_linux_dump_syms_dump_syms_LDADD = \ $(RUSTC_DEMANGLE_LIBS) \ - -lz -lzstd + $(ZSTD_CFLAGS) \ + -lz src_tools_linux_md2core_minidump_2_core_SOURCES = \ src/common/linux/memory_mapped_file.cc \ @@ -2822,13 +2824,15 @@ src_common_dumper_unittest_SOURCES = \ src_common_dumper_unittest_CPPFLAGS = \ $(AM_CPPFLAGS) $(TEST_CFLAGS) \ $(RUSTC_DEMANGLE_CFLAGS) \ - $(PTHREAD_CFLAGS) + $(PTHREAD_CFLAGS) \ + $(ZSTD_CFLAGS) src_common_dumper_unittest_LDADD = \ $(TEST_LIBS) \ $(RUSTC_DEMANGLE_LIBS) \ $(PTHREAD_CFLAGS) $(PTHREAD_LIBS) \ - -lz -lzstd + $(ZSTD_LIBS) \ + -lz src_common_mac_macho_reader_unittest_SOURCES = \ src/common/dwarf_cfi_to_module.cc \ diff --git a/src/client/linux/minidump_writer/linux_ptrace_dumper.cc b/src/client/linux/minidump_writer/linux_ptrace_dumper.cc index 2adc39e12..fb5b7e762 100644 --- a/src/client/linux/minidump_writer/linux_ptrace_dumper.cc +++ b/src/client/linux/minidump_writer/linux_ptrace_dumper.cc @@ -59,9 +59,24 @@ #include "client/linux/minidump_writer/directory_reader.h" #include "client/linux/minidump_writer/line_reader.h" +#include "common/linux/eintr_wrapper.h" #include "common/linux/linux_libc_support.h" #include "third_party/lss/linux_syscall_support.h" +#if defined(__arm__) +/* + * https://elixir.bootlin.com/linux/v6.8-rc2/source/arch/arm/include/asm/user.h#L81 + * User specific VFP registers. If only VFPv2 is present, registers 16 to 31 + * are ignored by the ptrace system call and the signal handler. + */ +typedef struct { + unsigned long long fpregs[32]; + unsigned long fpscr; +// Kernel just appends fpscr to the copy of fpregs, so we need to force +// compiler to build the same layout. +} __attribute__((packed, aligned(4))) user_vfp_t; +#endif // defined(__arm__) + // Suspends a thread by attaching to it. static bool SuspendThread(pid_t pid) { // This may fail if the thread has just died or debugged. @@ -70,11 +85,29 @@ static bool SuspendThread(pid_t pid) { errno != 0) { return false; } - while (sys_waitpid(pid, NULL, __WALL) < 0) { - if (errno != EINTR) { + while (true) { + int status; + int r = HANDLE_EINTR(sys_waitpid(pid, &status, __WALL)); + if (r < 0) { sys_ptrace(PTRACE_DETACH, pid, NULL, NULL); return false; } + + if (!WIFSTOPPED(status)) + return false; + + // Any signal will stop the thread, make sure it is SIGSTOP. Otherwise, this + // signal will be delivered after PTRACE_DETACH, and the thread will enter + // the "T (stopped)" state. + if (WSTOPSIG(status) == SIGSTOP) + break; + + // Signals other than SIGSTOP that are received need to be reinjected, + // or they will otherwise get lost. + r = sys_ptrace(PTRACE_CONT, pid, NULL, + reinterpret_cast(WSTOPSIG(status))); + if (r < 0) + return false; } #if defined(__i386) || defined(__x86_64) // On x86, the stack pointer is NULL or -1, when executing trusted code in @@ -152,6 +185,24 @@ bool LinuxPtraceDumper::CopyFromProcess(void* dest, pid_t child, return true; } +// This read VFP registers via either PTRACE_GETREGSET or PTRACE_GETREGS +#if defined(__arm__) +static bool ReadVFPRegistersArm32(pid_t tid, struct iovec* io) { +#ifdef PTRACE_GETREGSET + if (sys_ptrace(PTRACE_GETREGSET, tid, reinterpret_cast(NT_ARM_VFP), + io) == 0 && io->iov_len == sizeof(user_vfp_t)) { + return true; + } +#endif // PTRACE_GETREGSET +#ifdef PTRACE_GETVFPREGS + if (sys_ptrace(PTRACE_GETVFPREGS, tid, nullptr, io->iov_base) == 0) { + return true; + } +#endif // PTRACE_GETVFPREGS + return false; +} +#endif // defined(__arm__) + bool LinuxPtraceDumper::ReadRegisterSet(ThreadInfo* info, pid_t tid) { #ifdef PTRACE_GETREGSET @@ -163,7 +214,24 @@ bool LinuxPtraceDumper::ReadRegisterSet(ThreadInfo* info, pid_t tid) info->GetFloatingPointRegisters(&io.iov_base, &io.iov_len); if (sys_ptrace(PTRACE_GETREGSET, tid, (void*)NT_FPREGSET, (void*)&io) == -1) { - return false; + // We are going to check if we can read VFP registers on ARM32. + // Currently breakpad does not support VFP registers to be a part of minidump, + // so this is only to confirm that we can actually read FP registers. + // That is needed to prevent a false-positive minidumps failures with ARM32 + // binaries running on top of ARM64 Linux kernels. +#if defined(__arm__) + switch (errno) { + case EIO: + case EINVAL: + user_vfp_t vfp; + struct iovec io; + io.iov_base = &vfp; + io.iov_len = sizeof(vfp); + return ReadVFPRegistersArm32(tid, &io); + default: + return false; + } +#endif // defined(__arm__) } return true; #else @@ -194,7 +262,24 @@ bool LinuxPtraceDumper::ReadRegisters(ThreadInfo* info, pid_t tid) { void* fp_addr; info->GetFloatingPointRegisters(&fp_addr, NULL); if (sys_ptrace(PTRACE_GETFPREGS, tid, NULL, fp_addr) == -1) { - return false; + // We are going to check if we can read VFP registers on ARM32. + // Currently breakpad does not support VFP registers to be a part of minidump, + // so this is only to confirm that we can actually read FP registers. + // That is needed to prevent a false-positive minidumps failures with ARM32 + // binaries running on top of ARM64 Linux kernels. +#if defined(__arm__) + switch (errno) { + case EIO: + case EINVAL: + user_vfp_t vfp; + struct iovec io; + io.iov_base = &vfp; + io.iov_len = sizeof(vfp); + return ReadVFPRegistersArm32(tid, &io); + default: + return false; + } +#endif // defined(__arm__) } #endif // !(defined(__ANDROID__) && defined(__ARM_EABI__)) #endif // !defined(__SOFTFP__) diff --git a/src/common/dwarf/dwarf2reader.cc b/src/common/dwarf/dwarf2reader.cc index 8a8bf6f86..3cae1a8e8 100644 --- a/src/common/dwarf/dwarf2reader.cc +++ b/src/common/dwarf/dwarf2reader.cc @@ -285,8 +285,8 @@ const uint8_t* CompilationUnit::SkipAttribute(const uint8_t* start, case DW_FORM_sec_offset: return start + reader_->OffsetSize(); } - fprintf(stderr,"Unhandled form type"); - return NULL; + fprintf(stderr,"Unhandled form type 0x%x\n", form); + return nullptr; } // Read the abbreviation offset from a compilation unit header. @@ -369,7 +369,7 @@ void CompilationUnit::ReadHeader() { break; case DW_UT_type: case DW_UT_split_type: - is_type_unit_ = true; + is_type_unit_ = true; headerptr += ReadTypeSignature(headerptr); headerptr += ReadTypeOffset(headerptr); break; @@ -463,7 +463,11 @@ uint64_t CompilationUnit::Start() { } // Now that we have our abbreviations, start processing DIE's. - ProcessDIEs(); + if (!ProcessDIEs()) { + // If ProcessDIEs fails return 0, ourlength must be non-zero + // as it is equal to header_.length + (12 or 4) + return 0; + } // If this is a skeleton compilation unit generated with split DWARF, // and the client needs the full debug info, we need to find the full @@ -512,7 +516,7 @@ const uint8_t* CompilationUnit::ProcessOffsetBaseAttribute( &len)); start += len; return ProcessOffsetBaseAttribute(dieoffset, start, attr, form, - implicit_const); + implicit_const); case DW_FORM_flag_present: return start; @@ -568,10 +572,10 @@ const uint8_t* CompilationUnit::ProcessOffsetBaseAttribute( // offset size. assert(header_.version >= 2); if (header_.version == 2) { - reader_->ReadAddress(start); + reader_->ReadAddress(start); return start + reader_->AddressSize(); } else if (header_.version >= 3) { - reader_->ReadOffset(start); + reader_->ReadOffset(start); return start + reader_->OffsetSize(); } break; @@ -647,8 +651,8 @@ const uint8_t* CompilationUnit::ProcessOffsetBaseAttribute( reader_->ReadUnsignedLEB128(start, &len); return start + len; } - fprintf(stderr, "Unhandled form type\n"); - return NULL; + fprintf(stderr,"Unhandled form type 0x%x\n", form); + return nullptr; } // If one really wanted, you could merge SkipAttribute and @@ -896,11 +900,11 @@ const uint8_t* CompilationUnit::ProcessDIE(uint64_t dieoffset, uint64_t dieoffset_copy = dieoffset; const uint8_t* start_copy = start; for (AttributeList::const_iterator i = abbrev.attributes.begin(); - i != abbrev.attributes.end(); - i++) { + i != abbrev.attributes.end(); + i++) { start_copy = ProcessOffsetBaseAttribute(dieoffset_copy, start_copy, - i->attr_, i->form_, - i->value_); + i->attr_, i->form_, + i->value_); } } @@ -922,7 +926,7 @@ const uint8_t* CompilationUnit::ProcessDIE(uint64_t dieoffset, return start; } -void CompilationUnit::ProcessDIEs() { +bool CompilationUnit::ProcessDIEs() { const uint8_t* dieptr = after_header_; size_t len; @@ -953,13 +957,21 @@ void CompilationUnit::ProcessDIEs() { if (abbrev_num == 0) { if (die_stack.size() == 0) // If it is padding, then we are done with the compilation unit's DIEs. - return; + return true; const uint64_t offset = die_stack.top(); die_stack.pop(); handler_->EndDIE(offset); continue; } + // Abbrev > abbrev_.size() indicates a corruption in the dwarf file. + if (abbrev_num > abbrevs_->size()) { + fprintf(stderr, "An invalid abbrev was referenced %" PRIu64 " / %zu. " + "Stopped procesing following DIEs in this CU.", abbrev_num, + abbrevs_->size()); + return false; + } + const Abbrev& abbrev = abbrevs_->at(static_cast(abbrev_num)); const enum DwarfTag tag = abbrev.tag; if (!handler_->StartDIE(absolute_offset, tag)) { @@ -989,6 +1001,7 @@ void CompilationUnit::ProcessDIEs() { handler_->EndDIE(absolute_offset); } } + return true; } // Check for a valid ELF file and return the Address size. diff --git a/src/common/dwarf/dwarf2reader.h b/src/common/dwarf/dwarf2reader.h index b6bd2f31a..bd2e4d2e9 100644 --- a/src/common/dwarf/dwarf2reader.h +++ b/src/common/dwarf/dwarf2reader.h @@ -570,10 +570,10 @@ class CompilationUnit { // Special version of ProcessAttribute, for finding str_offsets_base and // DW_AT_addr_base in DW_TAG_compile_unit, for DWARF v5. const uint8_t* ProcessOffsetBaseAttribute(uint64_t dieoffset, - const uint8_t* start, - enum DwarfAttribute attr, - enum DwarfForm form, - uint64_t implicit_const); + const uint8_t* start, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64_t implicit_const); // Called when we have an attribute with unsigned data to give to // our handler. The attribute is for the DIE at OFFSET from the @@ -664,7 +664,7 @@ class CompilationUnit { } // Processes all DIEs for this compilation unit - void ProcessDIEs(); + bool ProcessDIEs(); // Skips the die with attributes specified in ABBREV starting at // START, and return the new place to position the stream to. @@ -909,11 +909,11 @@ class DwpReader { // // For example, here is a complete (uncompressed) table describing the // function above: -// +// // insn cfa r0 r1 ... ra // ======================================= // func+0: sp cfa[0] -// func+1: sp+16 cfa[0] +// func+1: sp+16 cfa[0] // func+2: sp+16 cfa[-4] cfa[0] // func+11: sp+20 cfa[-4] cfa[0] // func+21: sp+20 cfa[0] @@ -947,7 +947,7 @@ class DwpReader { // save them, caller-saves registers are probably dead in the caller // anyway, so compilers usually don't generate CFA for caller-saves // registers.) -// +// // - Exactly where the CFA points is a matter of convention that // depends on the architecture and ABI in use. In the example, the // CFA is the value the stack pointer had upon entry to the @@ -968,7 +968,7 @@ class DwpReader { // reduces the size of the data by mentioning only the addresses and // columns at which changes take place. So for the above, DWARF CFI // data would only actually mention the following: -// +// // insn cfa r0 r1 ... ra // ======================================= // func+0: sp cfa[0] @@ -976,7 +976,7 @@ class DwpReader { // func+2: cfa[-4] // func+11: sp+20 // func+21: r0 -// func+22: sp +// func+22: sp // // In fact, this is the way the parser reports CFI to the consumer: as // a series of statements of the form, "At address X, column Y changed @@ -1094,7 +1094,7 @@ class CallFrameInfo { // handling are described here, rather poorly: // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html - // + // // The mechanics of C++ exception handling, personality routines, // and language-specific data areas are described here, rather nicely: // http://www.codesourcery.com/public/cxx-abi/abi-eh.html @@ -1127,7 +1127,7 @@ class CallFrameInfo { // The start of this entry in the buffer. const uint8_t* start; - + // Which kind of entry this is. // // We want to be able to use this for error reporting even while we're @@ -1161,13 +1161,13 @@ class CallFrameInfo { struct CIE: public Entry { uint8_t version; // CFI data version number string augmentation; // vendor format extension markers - uint64_t code_alignment_factor; // scale for code address adjustments + uint64_t code_alignment_factor; // scale for code address adjustments int data_alignment_factor; // scale for stack pointer adjustments unsigned return_address_register; // which register holds the return addr // True if this CIE includes Linux C++ ABI 'z' augmentation data. bool has_z_augmentation; - + // Parsed 'z' augmentation data. These are meaningful only if // has_z_augmentation is true. bool has_z_lsda; // The 'z' augmentation included 'L'. @@ -1221,7 +1221,7 @@ class CallFrameInfo { class ValExpressionRule; class RuleMap; class State; - + // Parse the initial length and id of a CFI entry, either a CIE, an FDE, // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the // data to parse. On success, populate ENTRY as appropriate, and return @@ -1308,7 +1308,7 @@ class CallFrameInfo::Handler { // Immediately after a call to Entry, the handler should assume that // the rule for each callee-saves register is "unchanged" --- that // is, that the register still has the value it had in the caller. - // + // // If a *Rule function returns true, we continue processing this entry's // instructions. If a *Rule function returns false, we stop evaluating // instructions, and skip to the next entry. Either way, we call End @@ -1497,13 +1497,13 @@ class CallFrameInfo::Reporter { // The instruction at INSN_OFFSET in the entry at OFFSET, of kind // KIND, establishes a rule that cites the CFA, but we have not // established a CFA rule yet. - virtual void NoCFARule(uint64_t offset, CallFrameInfo::EntryKind kind, + virtual void NoCFARule(uint64_t offset, CallFrameInfo::EntryKind kind, uint64_t insn_offset); // The instruction at INSN_OFFSET in the entry at OFFSET, of kind // KIND, is a DW_CFA_restore_state instruction, but the stack of // saved states is empty. - virtual void EmptyStateStack(uint64_t offset, CallFrameInfo::EntryKind kind, + virtual void EmptyStateStack(uint64_t offset, CallFrameInfo::EntryKind kind, uint64_t insn_offset); // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry @@ -1511,7 +1511,7 @@ class CallFrameInfo::Reporter { // rule, whereas the current state does have a CFA rule. This is // bogus input, which the CallFrameInfo::Handler interface doesn't // (and shouldn't) have any way to report. - virtual void ClearingCFARule(uint64_t offset, CallFrameInfo::EntryKind kind, + virtual void ClearingCFARule(uint64_t offset, CallFrameInfo::EntryKind kind, uint64_t insn_offset); protected: diff --git a/src/common/dwarf_cu_to_module.cc b/src/common/dwarf_cu_to_module.cc index 94a0d428e..613f97854 100644 --- a/src/common/dwarf_cu_to_module.cc +++ b/src/common/dwarf_cu_to_module.cc @@ -562,6 +562,29 @@ static bool IsEmptyRange(const vector& ranges) { return size == 0; } +// A handler for DW_TAG_lexical_block DIEs. +class DwarfCUToModule::LexicalBlockHandler : public GenericDIEHandler { + public: + LexicalBlockHandler(CUContext* cu_context, + uint64_t offset, + int inline_nest_level, + vector>& inlines) + : GenericDIEHandler(cu_context, nullptr, offset), + inline_nest_level_(inline_nest_level), + inlines_(inlines) {} + + DIEHandler* FindChildHandler(uint64_t offset, enum DwarfTag tag); + bool EndAttributes() { return true; } + void Finish(); + + private: + int inline_nest_level_; + // A vector of inlines in the same nest level. It's owned by its parent + // function/inline. At Finish(), add this inline into the vector. + vector>& inlines_; + // A vector of child inlines. + vector> child_inlines_; +}; // A handler for DW_TAG_inlined_subroutine DIEs. class DwarfCUToModule::InlineHandler : public GenericDIEHandler { @@ -645,6 +668,9 @@ DIEHandler* DwarfCUToModule::InlineHandler::FindChildHandler( case DW_TAG_inlined_subroutine: return new InlineHandler(cu_context_, nullptr, offset, inline_nest_level_ + 1, child_inlines_); + case DW_TAG_lexical_block: + return new LexicalBlockHandler(cu_context_, offset, + inline_nest_level_ + 1, child_inlines_); default: return NULL; } @@ -715,6 +741,29 @@ void DwarfCUToModule::InlineHandler::Finish() { inlines_.push_back(std::move(in)); } +DIEHandler* DwarfCUToModule::LexicalBlockHandler::FindChildHandler( + uint64_t offset, + enum DwarfTag tag) { + switch (tag) { + case DW_TAG_inlined_subroutine: + return new InlineHandler(cu_context_, nullptr, offset, inline_nest_level_, + child_inlines_); + case DW_TAG_lexical_block: + return new LexicalBlockHandler(cu_context_, offset, inline_nest_level_, + child_inlines_); + default: + return nullptr; + } +} + +void DwarfCUToModule::LexicalBlockHandler::Finish() { + // Insert child inlines inside the lexical block into the inline vector from + // parent as if the block does not exit. + inlines_.insert(inlines_.end(), + std::make_move_iterator(child_inlines_.begin()), + std::make_move_iterator(child_inlines_.end())); +} + // A handler for DIEs that contain functions and contribute a // component to their names: namespaces, classes, etc. class DwarfCUToModule::NamedScopeHandler: public GenericDIEHandler { @@ -831,6 +880,9 @@ DIEHandler* DwarfCUToModule::FuncHandler::FindChildHandler( case DW_TAG_union_type: return new NamedScopeHandler(cu_context_, &child_context_, offset, handle_inline_); + case DW_TAG_lexical_block: + if (handle_inline_) + return new LexicalBlockHandler(cu_context_, offset, 0, child_inlines_); default: return NULL; } diff --git a/src/common/dwarf_cu_to_module.h b/src/common/dwarf_cu_to_module.h index c7ada306c..82c25091d 100644 --- a/src/common/dwarf_cu_to_module.h +++ b/src/common/dwarf_cu_to_module.h @@ -254,61 +254,61 @@ class DwarfCUToModule: public RootDIEHandler { void UncoveredHeading(); }; - class NullWarningReporter: public WarningReporter { + class NullWarningReporter : public WarningReporter { public: - NullWarningReporter(const string &filename, uint64_t cu_offset): - WarningReporter(filename, cu_offset) { } + NullWarningReporter(const string& filename, uint64_t cu_offset) + : WarningReporter(filename, cu_offset) {} // Set the name of the compilation unit we're processing to NAME. - void SetCUName(const string &name) { } + void SetCUName(const string& name) {} // Accessor and setter for uncovered_warnings_enabled_. // UncoveredFunction and UncoveredLine only report a problem if that is // true. By default, these warnings are disabled, because those // conditions occur occasionally in healthy code. - void set_uncovered_warnings_enabled(bool value) { } + void set_uncovered_warnings_enabled(bool value) {} // A DW_AT_specification in the DIE at OFFSET refers to a DIE we // haven't processed yet, or that wasn't marked as a declaration, // at TARGET. - void UnknownSpecification(uint64_t offset, uint64_t target) { } + void UnknownSpecification(uint64_t offset, uint64_t target) {} // A DW_AT_abstract_origin in the DIE at OFFSET refers to a DIE we // haven't processed yet, or that wasn't marked as inline, at TARGET. - void UnknownAbstractOrigin(uint64_t offset, uint64_t target) { } + void UnknownAbstractOrigin(uint64_t offset, uint64_t target) {} // We were unable to find the DWARF section named SECTION_NAME. - void MissingSection(const string §ion_name) { } + void MissingSection(const string& section_name) {} // The CU's DW_AT_stmt_list offset OFFSET is bogus. - void BadLineInfoOffset(uint64_t offset) { } + void BadLineInfoOffset(uint64_t offset) {} // FUNCTION includes code covered by no line number data. - void UncoveredFunction(const Module::Function &function) { } + void UncoveredFunction(const Module::Function& function) {} // Line number NUMBER in LINE_FILE, of length LENGTH, includes code // covered by no function. - void UncoveredLine(const Module::Line &line) { } + void UncoveredLine(const Module::Line& line) {} // The DW_TAG_subprogram DIE at OFFSET has no name specified directly // in the DIE, nor via a DW_AT_specification or DW_AT_abstract_origin // link. - void UnnamedFunction(uint64_t offset) { } + void UnnamedFunction(uint64_t offset) {} // __cxa_demangle() failed to demangle INPUT. - void DemangleError(const string &input) { } + void DemangleError(const string& input) {} // The DW_FORM_ref_addr at OFFSET to TARGET was not handled because // FilePrivate did not retain the inter-CU specification data. - void UnhandledInterCUReference(uint64_t offset, uint64_t target) { } + void UnhandledInterCUReference(uint64_t offset, uint64_t target) {} // The DW_AT_ranges at offset is malformed (truncated or outside of the // .debug_ranges section's bound). - void MalformedRangeList(uint64_t offset) { } + void MalformedRangeList(uint64_t offset) {} // A DW_AT_ranges attribute was encountered but the no .debug_ranges // section was found. - void MissingRanges() { } + void MissingRanges() {} }; // Create a DWARF debugging info handler for a compilation unit @@ -357,6 +357,7 @@ class DwarfCUToModule: public RootDIEHandler { struct Specification; class GenericDIEHandler; class FuncHandler; + class LexicalBlockHandler; class InlineHandler; class NamedScopeHandler; diff --git a/src/common/linux/dump_symbols.cc b/src/common/linux/dump_symbols.cc index 04038224c..77e894e4f 100644 --- a/src/common/linux/dump_symbols.cc +++ b/src/common/linux/dump_symbols.cc @@ -315,31 +315,17 @@ uint32_t GetCompressionHeader( std::pair UncompressZlibSectionContents( const uint8_t* compressed_buffer, uint64_t compressed_size, uint64_t uncompressed_size) { - z_stream stream; - memset(&stream, 0, sizeof stream); - - stream.avail_in = compressed_size; - stream.avail_out = uncompressed_size; - stream.next_in = const_cast(compressed_buffer); - google_breakpad::scoped_array uncompressed_buffer( new uint8_t[uncompressed_size]); - int status = inflateInit(&stream); - while (stream.avail_in != 0 && status == Z_OK) { - stream.next_out = - uncompressed_buffer.get() + uncompressed_size - stream.avail_out; + uLongf size = static_cast(uncompressed_size); - if ((status = inflate(&stream, Z_FINISH)) != Z_STREAM_END) { - break; - } - - status = inflateReset(&stream); - } + int status = uncompress( + uncompressed_buffer.get(), &size, compressed_buffer, compressed_size); - return inflateEnd(&stream) != Z_OK || status != Z_OK || stream.avail_out != 0 - ? std::make_pair(nullptr, 0) - : std::make_pair(uncompressed_buffer.release(), uncompressed_size); + return status != Z_OK + ? std::make_pair(nullptr, 0) + : std::make_pair(uncompressed_buffer.release(), uncompressed_size); } #ifdef HAVE_LIBZSTD @@ -512,7 +498,11 @@ bool LoadDwarf(const string& dwarf_filename, &byte_reader, &die_dispatcher); // Process the entire compilation unit; get the offset of the next. - offset += reader.Start(); + uint64_t result = reader.Start(); + if (result == 0) { + return false; + } + offset += result; // Start to process split dwarf file. if (reader.ShouldProcessSplitDwarf()) { StartProcessSplitDwarf(&reader, module, endianness, handle_inter_cu_refs, @@ -892,6 +882,7 @@ bool LoadSymbols(const string& obj_file, const char* names_end = names + section_names->sh_size; bool found_debug_info_section = false; bool found_usable_info = false; + bool usable_info_parsed = false; if ((options.symbol_data & SYMBOLS_AND_FILES) || (options.symbol_data & INLINES)) { @@ -907,8 +898,10 @@ bool LoadSymbols(const string& obj_file, found_debug_info_section = true; found_usable_info = true; info->LoadedSection(".stab"); - if (!LoadStabs(elf_header, stab_section, stabstr_section, - big_endian, module)) { + bool result = LoadStabs(elf_header, stab_section, stabstr_section, + big_endian, module); + usable_info_parsed = usable_info_parsed || result; + if (!result) { fprintf(stderr, "%s: \".stab\" section found, but failed to load" " STABS debugging information\n", obj_file.c_str()); } @@ -995,9 +988,11 @@ bool LoadSymbols(const string& obj_file, found_debug_info_section = true; found_usable_info = true; info->LoadedSection(".debug_info"); - if (!LoadDwarf(obj_file, elf_header, big_endian, + bool result = LoadDwarf(obj_file, elf_header, big_endian, options.handle_inter_cu_refs, - options.symbol_data & INLINES, module)) { + options.symbol_data & INLINES, module); + usable_info_parsed = usable_info_parsed || result; + if (!result){ fprintf(stderr, "%s: \".debug_info\" section found, but failed to load " "DWARF debugging information\n", obj_file.c_str()); } @@ -1102,7 +1097,7 @@ bool LoadSymbols(const string& obj_file, return false; } - return true; + return usable_info_parsed; } // Return the breakpad symbol file identifier for the architecture of @@ -1123,6 +1118,7 @@ const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) { case EM_SPARCV9: return "sparcv9"; case EM_X86_64: return "x86_64"; case EM_RISCV: return "riscv"; + case EM_NDS32: return "nds32"; default: return NULL; } } @@ -1162,6 +1158,7 @@ template bool InitModuleForElfClass(const typename ElfClass::Ehdr* elf_header, const string& obj_filename, const string& obj_os, + const string& module_id, scoped_ptr& module, bool enable_multiple_field) { PageAllocator allocator; @@ -1185,10 +1182,14 @@ bool InitModuleForElfClass(const typename ElfClass::Ehdr* elf_header, ? name_buf : google_breakpad::BaseName(obj_filename); - // Add an extra "0" at the end. PDB files on Windows have an 'age' - // number appended to the end of the file identifier; this isn't - // really used or necessary on other platforms, but be consistent. - string id = FileID::ConvertIdentifierToUUIDString(identifier) + "0"; + // Use the provided module_id + string id = module_id.empty() + // Add an extra "0" at the end. PDB files on Windows have an 'age' + // number appended to the end of the file identifier; this isn't + // really used or necessary on other platforms, but be consistent. + ? FileID::ConvertIdentifierToUUIDString(identifier) + "0" + : module_id; + // This is just the raw Build ID in hex. string code_id = FileID::ConvertIdentifierToString(identifier); @@ -1202,6 +1203,7 @@ template bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header, const string& obj_filename, const string& obj_os, + const string& module_id, const std::vector& debug_dirs, const DumpOptions& options, Module** out_module) { @@ -1210,8 +1212,8 @@ bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header, *out_module = NULL; scoped_ptr module; - if (!InitModuleForElfClass(elf_header, obj_filename, obj_os, module, - options.enable_multiple_field)) { + if (!InitModuleForElfClass(elf_header, obj_filename, obj_os, module_id, + module, options.enable_multiple_field)) { return false; } @@ -1258,6 +1260,7 @@ namespace google_breakpad { bool ReadSymbolDataInternal(const uint8_t* obj_file, const string& obj_filename, const string& obj_os, + const string& module_id, const std::vector& debug_dirs, const DumpOptions& options, Module** module) { @@ -1270,12 +1273,12 @@ bool ReadSymbolDataInternal(const uint8_t* obj_file, if (elfclass == ELFCLASS32) { return ReadSymbolDataElfClass( reinterpret_cast(obj_file), obj_filename, obj_os, - debug_dirs, options, module); + module_id, debug_dirs, options, module); } if (elfclass == ELFCLASS64) { return ReadSymbolDataElfClass( reinterpret_cast(obj_file), obj_filename, obj_os, - debug_dirs, options, module); + module_id, debug_dirs, options, module); } return false; @@ -1284,15 +1287,16 @@ bool ReadSymbolDataInternal(const uint8_t* obj_file, bool WriteSymbolFile(const string& load_path, const string& obj_file, const string& obj_os, + const string& module_id, const std::vector& debug_dirs, const DumpOptions& options, std::ostream& sym_stream) { Module* module; - if (!ReadSymbolData(load_path, obj_file, obj_os, debug_dirs, options, + if (!ReadSymbolData(load_path, obj_file, obj_os, module_id, debug_dirs, options, &module)) return false; - bool result = module->Write(sym_stream, options.symbol_data); + bool result = module->Write(sym_stream, options.symbol_data, options.preserve_load_address); delete module; return result; } @@ -1303,6 +1307,7 @@ bool WriteSymbolFile(const string& load_path, bool WriteSymbolFileHeader(const string& load_path, const string& obj_file, const string& obj_os, + const string& module_id, std::ostream& sym_stream) { MmapWrapper map_wrapper; void* elf_header = NULL; @@ -1321,14 +1326,14 @@ bool WriteSymbolFileHeader(const string& load_path, if (elfclass == ELFCLASS32) { if (!InitModuleForElfClass( reinterpret_cast(elf_header), obj_file, obj_os, - module, /*enable_multiple_field=*/false)) { + module_id, module, /*enable_multiple_field=*/false)) { fprintf(stderr, "Failed to load ELF module: %s\n", obj_file.c_str()); return false; } } else if (elfclass == ELFCLASS64) { if (!InitModuleForElfClass( reinterpret_cast(elf_header), obj_file, obj_os, - module, /*enable_multiple_field=*/false)) { + module_id, module, /*enable_multiple_field=*/false)) { fprintf(stderr, "Failed to load ELF module: %s\n", obj_file.c_str()); return false; } @@ -1343,6 +1348,7 @@ bool WriteSymbolFileHeader(const string& load_path, bool ReadSymbolData(const string& load_path, const string& obj_file, const string& obj_os, + const string& module_id, const std::vector& debug_dirs, const DumpOptions& options, Module** module) { @@ -1352,7 +1358,7 @@ bool ReadSymbolData(const string& load_path, return false; return ReadSymbolDataInternal(reinterpret_cast(elf_header), - obj_file, obj_os, debug_dirs, options, module); + obj_file, obj_os, module_id, debug_dirs, options, module); } } // namespace google_breakpad diff --git a/src/common/linux/dump_symbols.h b/src/common/linux/dump_symbols.h index f1802ecc3..25ede3e0f 100644 --- a/src/common/linux/dump_symbols.h +++ b/src/common/linux/dump_symbols.h @@ -48,14 +48,17 @@ class Module; struct DumpOptions { DumpOptions(SymbolData symbol_data, bool handle_inter_cu_refs, - bool enable_multiple_field) + bool enable_multiple_field, + bool preserve_load_address) : symbol_data(symbol_data), handle_inter_cu_refs(handle_inter_cu_refs), - enable_multiple_field(enable_multiple_field) {} + enable_multiple_field(enable_multiple_field), + preserve_load_address(preserve_load_address) {} SymbolData symbol_data; bool handle_inter_cu_refs; bool enable_multiple_field; + bool preserve_load_address; }; // Find all the debugging information in OBJ_FILE, an ELF executable @@ -67,6 +70,7 @@ struct DumpOptions { bool WriteSymbolFile(const string& load_path, const string& obj_file, const string& obj_os, + const string& module_id, const std::vector& debug_dirs, const DumpOptions& options, std::ostream& sym_stream); @@ -78,6 +82,7 @@ bool WriteSymbolFile(const string& load_path, bool WriteSymbolFileHeader(const string& load_path, const string& obj_file, const string& obj_os, + const string& module_id, std::ostream& sym_stream); // As above, but simply return the debugging information in MODULE @@ -86,6 +91,7 @@ bool WriteSymbolFileHeader(const string& load_path, bool ReadSymbolData(const string& load_path, const string& obj_file, const string& obj_os, + const string& module_id, const std::vector& debug_dirs, const DumpOptions& options, Module** module); diff --git a/src/common/linux/dump_symbols_unittest.cc b/src/common/linux/dump_symbols_unittest.cc index 55dcdeed8..f20ac0dfc 100644 --- a/src/common/linux/dump_symbols_unittest.cc +++ b/src/common/linux/dump_symbols_unittest.cc @@ -55,6 +55,7 @@ namespace google_breakpad { bool ReadSymbolDataInternal(const uint8_t* obj_file, const string& obj_filename, const string& obj_os, + const string& module_id, const std::vector& debug_dir, const DumpOptions& options, Module** module); @@ -95,10 +96,11 @@ TYPED_TEST(DumpSymbols, Invalid) { Elf32_Ehdr header; memset(&header, 0, sizeof(header)); Module* module; - DumpOptions options(ALL_SYMBOL_DATA, true, false); + DumpOptions options(ALL_SYMBOL_DATA, true, false, false); EXPECT_FALSE(ReadSymbolDataInternal(reinterpret_cast(&header), "foo", "Linux", + "", vector(), options, &module)); @@ -132,10 +134,11 @@ TYPED_TEST(DumpSymbols, SimplePublic) { this->GetElfContents(elf); Module* module; - DumpOptions options(ALL_SYMBOL_DATA, true, false); + DumpOptions options(ALL_SYMBOL_DATA, true, false, false); EXPECT_TRUE(ReadSymbolDataInternal(this->elfdata, "foo", "Linux", + "", vector(), options, &module)); @@ -151,6 +154,54 @@ TYPED_TEST(DumpSymbols, SimplePublic) { delete module; } +TYPED_TEST(DumpSymbols, ModuleIdOverride) { + ELF elf(TypeParam::kMachine, TypeParam::kClass, kLittleEndian); + // Zero out text section for simplicity. + Section text(kLittleEndian); + text.Append(4096, 0); + elf.AddSection(".text", text, SHT_PROGBITS); + + // Add a public symbol. + StringTable table(kLittleEndian); + SymbolTable syms(kLittleEndian, TypeParam::kAddrSize, table); + syms.AddSymbol("superfunc", + (typename TypeParam::Addr)0x1000, + (typename TypeParam::Addr)0x10, + // ELF32_ST_INFO works for 32-or 64-bit. + ELF32_ST_INFO(STB_GLOBAL, STT_FUNC), + SHN_UNDEF + 1); + int index = elf.AddSection(".dynstr", table, SHT_STRTAB); + elf.AddSection(".dynsym", syms, + SHT_DYNSYM, // type + SHF_ALLOC, // flags + 0, // addr + index, // link + sizeof(typename TypeParam::Sym)); // entsize + + elf.Finish(); + this->GetElfContents(elf); + + Module* module; + DumpOptions options(ALL_SYMBOL_DATA, true, false, false); + EXPECT_TRUE(ReadSymbolDataInternal(this->elfdata, + "foo", + "Linux", + "some_module_id", + vector(), + options, + &module)); + + stringstream s; + module->Write(s, ALL_SYMBOL_DATA); + const string expected = + string("MODULE Linux ") + TypeParam::kMachineName + + " some_module_id foo\n" + "INFO CODE_ID 00000000000000000000000000000000\n" + "PUBLIC 1000 0 superfunc\n"; + EXPECT_EQ(expected, s.str()); + delete module; +} + TYPED_TEST(DumpSymbols, SimpleBuildID) { ELF elf(TypeParam::kMachine, TypeParam::kClass, kLittleEndian); // Zero out text section for simplicity. @@ -189,10 +240,11 @@ TYPED_TEST(DumpSymbols, SimpleBuildID) { this->GetElfContents(elf); Module* module; - DumpOptions options(ALL_SYMBOL_DATA, true, false); + DumpOptions options(ALL_SYMBOL_DATA, true, false, false); EXPECT_TRUE(ReadSymbolDataInternal(this->elfdata, "foo", "Linux", + "", vector(), options, &module)); diff --git a/src/common/linux/file_id.cc b/src/common/linux/file_id.cc index d8fcbd8d6..296531608 100644 --- a/src/common/linux/file_id.cc +++ b/src/common/linux/file_id.cc @@ -99,6 +99,13 @@ static bool ElfClassBuildIDNoteIdentifier(const void* section, size_t length, // and copy it into |identifier|. static bool FindElfBuildIDNote(const void* elf_mapped_base, wasteful_vector& identifier) { + void* note_section; + size_t note_size; + if (FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE, + (const void**)¬e_section, ¬e_size)) { + return ElfClassBuildIDNoteIdentifier(note_section, note_size, identifier); + } + PageAllocator allocator; // lld normally creates 2 PT_NOTEs, gold normally creates 1. auto_wasteful_vector segs(&allocator); @@ -110,13 +117,6 @@ static bool FindElfBuildIDNote(const void* elf_mapped_base, } } - void* note_section; - size_t note_size; - if (FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE, - (const void**)¬e_section, ¬e_size)) { - return ElfClassBuildIDNoteIdentifier(note_section, note_size, identifier); - } - return false; } diff --git a/src/common/linux/file_id_unittest.cc b/src/common/linux/file_id_unittest.cc index 0ef453532..7be239f60 100644 --- a/src/common/linux/file_id_unittest.cc +++ b/src/common/linux/file_id_unittest.cc @@ -326,6 +326,45 @@ TYPED_TEST(FileIDTest, BuildIDMultiplePH) { EXPECT_EQ(expected_identifier_string, identifier_string); } +TYPED_TEST(FileIDTest, BuildIDMultiplePHPreferGNU) { + const uint8_t kExpectedIdentifierBytes[] = + {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x10, 0x11, 0x12, 0x13}; + const string expected_identifier_string = + this->get_file_id(kExpectedIdentifierBytes); + + ELF elf(EM_386, TypeParam::kClass, kLittleEndian); + Section text(kLittleEndian); + text.Append(4096, 0); + elf.AddSection(".text", text, SHT_PROGBITS); + Notes notes1(kLittleEndian); + notes1.AddNote(0, "Linux", + reinterpret_cast("\0x42\0x02\0\0"), 4); + Notes notes2(kLittleEndian); + notes2.AddNote(NT_GNU_BUILD_ID, "GNU", + reinterpret_cast("\0x42\0x02\0\0"), 4); + Notes notes3(kLittleEndian); + notes3.AddNote(NT_GNU_BUILD_ID, "GNU", kExpectedIdentifierBytes, + sizeof(kExpectedIdentifierBytes)); + int note1_idx = elf.AddSection(".note1", notes1, SHT_NOTE); + int note2_idx = elf.AddSection(".note2", notes2, SHT_NOTE); + int note3_idx = elf.AddSection(".note.gnu.build-id", notes3, SHT_NOTE); + elf.AddSegment(note1_idx, note1_idx, PT_NOTE); + elf.AddSegment(note2_idx, note2_idx, PT_NOTE); + elf.AddSegment(note3_idx, note3_idx, PT_NOTE); + elf.Finish(); + this->GetElfContents(elf); + + id_vector identifier(this->make_vector()); + EXPECT_TRUE(FileID::ElfFileIdentifierFromMappedFile(this->elfdata, + identifier)); + EXPECT_EQ(sizeof(kExpectedIdentifierBytes), identifier.size()); + + string identifier_string = FileID::ConvertIdentifierToUUIDString(identifier); + EXPECT_EQ(expected_identifier_string, identifier_string); +} + // Test to make sure two files with different text sections produce // different hashes when not using a build id. TYPED_TEST(FileIDTest, UniqueHashes) { diff --git a/src/common/mac/dump_syms.cc b/src/common/mac/dump_syms.cc index bcb62413f..3d3413a2b 100644 --- a/src/common/mac/dump_syms.cc +++ b/src/common/mac/dump_syms.cc @@ -394,7 +394,7 @@ bool DumpSymbols::CreateEmptyModule(scoped_ptr& module) { // In certain cases, it is possible that architecture info can't be reliably // determined, e.g. new architectures that breakpad is unware of. In that // case, avoid crashing and return false instead. - if (selected_arch_name == kUnknownArchName) { + if (strcmp(selected_arch_name, kUnknownArchName) == 0) { return false; } @@ -406,7 +406,7 @@ bool DumpSymbols::CreateEmptyModule(scoped_ptr& module) { selected_object_name_ = object_filename_; if (object_files_.size() > 1) { selected_object_name_ += ", architecture "; - selected_object_name_ + selected_arch_name; + selected_object_name_ += selected_arch_name; } // Compute a module name, to appear in the MODULE record. @@ -528,16 +528,16 @@ void DumpSymbols::ReadDwarf(google_breakpad::Module* module, for (uint64_t offset = 0; offset < debug_info_length;) { // Make a handler for the root DIE that populates MODULE with the // debug info. - DwarfCUToModule::WarningReporter *reporter = nullptr; + std::unique_ptr reporter; if (report_warnings_) { - reporter = new DwarfCUToModule::WarningReporter( - selected_object_name_, offset); + reporter = std::make_unique( + selected_object_name_, offset); } else { - reporter = new DwarfCUToModule::NullWarningReporter( - selected_object_name_, offset); + reporter = std::make_unique( + selected_object_name_, offset); } DwarfCUToModule root_handler(&file_context, &line_to_module, - &ranges_handler, reporter, + &ranges_handler, reporter.get(), handle_inline); // Make a Dwarf2Handler that drives our DIEHandler. DIEDispatcher die_dispatcher(&root_handler); @@ -554,7 +554,6 @@ void DumpSymbols::ReadDwarf(google_breakpad::Module* module, StartProcessSplitDwarf(&dwarf_reader, module, endianness, handle_inter_cu_refs, handle_inline); } - delete reporter; } } diff --git a/src/common/module.cc b/src/common/module.cc index b6f5da7e6..6f81b5119 100644 --- a/src/common/module.cc +++ b/src/common/module.cc @@ -380,7 +380,7 @@ bool Module::AddressIsInModule(Address address) const { return false; } -bool Module::Write(std::ostream& stream, SymbolData symbol_data) { +bool Module::Write(std::ostream& stream, SymbolData symbol_data, bool preserve_load_address) { stream << "MODULE " << os_ << " " << architecture_ << " " << id_ << " " << name_ << "\n"; if (!stream.good()) @@ -390,6 +390,13 @@ bool Module::Write(std::ostream& stream, SymbolData symbol_data) { stream << "INFO CODE_ID " << code_id_ << "\n"; } + // load_address is subtracted from each line. If we use zero instead, we + // preserve the original addresses present in the ELF binary. + Address load_offset = load_address_; + if (preserve_load_address) { + load_offset = 0; + } + if (symbol_data & SYMBOLS_AND_FILES) { // Get all referenced inline origins. set inline_origins; @@ -406,13 +413,13 @@ bool Module::Write(std::ostream& stream, SymbolData symbol_data) { return ReportError(); } } + // Write out inline origins. for (InlineOrigin* origin : inline_origins) { stream << "INLINE_ORIGIN " << origin->id << " " << origin->name << "\n"; if (!stream.good()) return ReportError(); } - // Write out functions and their inlines and lines. for (FunctionSet::const_iterator func_it = functions_.begin(); func_it != functions_.end(); ++func_it) { @@ -421,7 +428,7 @@ bool Module::Write(std::ostream& stream, SymbolData symbol_data) { for (auto range_it = func->ranges.cbegin(); range_it != func->ranges.cend(); ++range_it) { stream << "FUNC " << (func->is_multiple ? "m " : "") << hex - << (range_it->address - load_address_) << " " << range_it->size + << (range_it->address - load_offset) << " " << range_it->size << " " << func->parameter_size << " " << func->name << dec << "\n"; @@ -434,7 +441,7 @@ bool Module::Write(std::ostream& stream, SymbolData symbol_data) { stream << in->inline_nest_level << " " << in->call_site_line << " " << in->getCallSiteFileID() << " " << in->origin->id << hex; for (const Range& r : in->ranges) - stream << " " << (r.address - load_address_) << " " << r.size; + stream << " " << (r.address - load_offset) << " " << r.size; stream << dec << "\n"; }; Module::Inline::InlineDFS(func->inlines, write_inline); @@ -445,7 +452,7 @@ bool Module::Write(std::ostream& stream, SymbolData symbol_data) { (line_it->address >= range_it->address) && (line_it->address < (range_it->address + range_it->size))) { stream << hex - << (line_it->address - load_address_) << " " + << (line_it->address - load_offset) << " " << line_it->size << " " << dec << line_it->number << " " @@ -464,7 +471,7 @@ bool Module::Write(std::ostream& stream, SymbolData symbol_data) { extern_it != externs_.end(); ++extern_it) { Extern* ext = extern_it->get(); stream << "PUBLIC " << (ext->is_multiple ? "m " : "") << hex - << (ext->address - load_address_) << " 0 " << ext->name << dec + << (ext->address - load_offset) << " 0 " << ext->name << dec << "\n"; } } @@ -475,7 +482,7 @@ bool Module::Write(std::ostream& stream, SymbolData symbol_data) { frame_it != stack_frame_entries_.end(); ++frame_it) { StackFrameEntry* entry = frame_it->get(); stream << "STACK CFI INIT " << hex - << (entry->address - load_address_) << " " + << (entry->address - load_offset) << " " << entry->size << " " << dec; if (!stream.good() || !WriteRuleMap(entry->initial_rules, stream)) @@ -487,7 +494,7 @@ bool Module::Write(std::ostream& stream, SymbolData symbol_data) { for (RuleChangeMap::const_iterator delta_it = entry->rule_changes.begin(); delta_it != entry->rule_changes.end(); ++delta_it) { stream << "STACK CFI " << hex - << (delta_it->first - load_address_) << " " << dec; + << (delta_it->first - load_offset) << " " << dec; if (!stream.good() || !WriteRuleMap(delta_it->second, stream)) return ReportError(); diff --git a/src/common/module.h b/src/common/module.h index 28e8e9c50..69eec36de 100644 --- a/src/common/module.h +++ b/src/common/module.h @@ -421,8 +421,9 @@ class Module { // If symbol_data is CFI then: // - all CFI records. // Addresses in the output are all relative to the load address - // established by SetLoadAddress. - bool Write(std::ostream& stream, SymbolData symbol_data); + // established by SetLoadAddress, unless preserve_load_address + // is equal to true, in which case each address will remain unchanged. + bool Write(std::ostream& stream, SymbolData symbol_data, bool preserve_load_address = false); // Place the name in the global set of strings. Return a StringView points to // a string inside the pool. diff --git a/src/common/module_unittest.cc b/src/common/module_unittest.cc index c51162e52..6b2c49a41 100644 --- a/src/common/module_unittest.cc +++ b/src/common/module_unittest.cc @@ -175,6 +175,49 @@ TEST(Module, WriteRelativeLoadAddress) { contents.c_str()); } +TEST(Module, WritePreserveLoadAddress) { + stringstream s; + Module m(MODULE_NAME, MODULE_OS, MODULE_ARCH, MODULE_ID); + // Set the load address to something. Doesn't matter what. + // The goal of this test is to demonstrate that the load + // address does not impact any of the generated addresses + // when the preserve_load_address option is equal to true. + m.SetLoadAddress(0x1337ULL); + + Module::File* file = m.FindFile("filename-a.cc"); + Module::Function* function = new Module::Function( + "do_stuff", 0x110ULL); + Module::Range range(0x110ULL, 0x210ULL); + function->ranges.push_back(range); + function->parameter_size = 0x50ULL; + Module::Line line1 = { 0x110ULL, 0x1ULL, + file, 20ULL }; + function->lines.push_back(line1); + m.AddFunction(function); + + // Some stack information. + auto entry = std::make_unique(); + entry->address = 0x200ULL; + entry->size = 0x55ULL; + entry->initial_rules[".cfa"] = "some call frame info"; + entry->rule_changes[0x201ULL][".s0"] = + "some rules change call frame info"; + m.AddStackFrameEntry(std::move(entry)); + + bool preserve_load_address = true; + m.Write(s, ALL_SYMBOL_DATA, preserve_load_address); + string contents = s.str(); + EXPECT_STREQ("MODULE os-name architecture id-string name with spaces\n" + "FILE 0 filename-a.cc\n" + "FUNC 110 210 50 do_stuff\n" + "110 1 20 0\n" + "STACK CFI INIT 200 55" + " .cfa: some call frame info\n" + "STACK CFI 201" + " .s0: some rules change call frame info\n", + contents.c_str()); +} + TEST(Module, WriteOmitUnusedFiles) { Module m(MODULE_NAME, MODULE_OS, MODULE_ARCH, MODULE_ID); diff --git a/src/common/windows/http_upload.cc b/src/common/windows/http_upload.cc index bd48a2339..22250f502 100644 --- a/src/common/windows/http_upload.cc +++ b/src/common/windows/http_upload.cc @@ -30,14 +30,24 @@ #include // Must come first #endif +#include +#include +#include +#include + #include +#include +#include +#include +#include + +#if defined(HAVE_ZLIB) +#include +#endif // Disable exception handler warnings. #pragma warning(disable:4530) -#include -#include - #include "common/windows/string_utils-inl.h" #include "common/windows/http_upload.h" @@ -46,9 +56,57 @@ namespace { using std::string; using std::wstring; using std::map; - using std::vector; - using std::ifstream; - using std::ios; + using std::unique_ptr; + +// Compresses the contents of `data` into `deflated` using the deflate +// algorithm, if supported. Returns true on success, or false if not supported +// or in case of any error. The contents of `deflated` are undefined in the +// latter case. +bool Deflate(const string& data, string& deflated) { +#if defined(HAVE_ZLIB) + z_stream stream{}; + + // Start with an output buffer sufficient for 75% compression to avoid + // reallocations. + deflated.resize(data.size() / 4); + stream.next_in = reinterpret_cast(const_cast(data.data())); + stream.avail_in = data.size(); + stream.next_out = reinterpret_cast(&deflated[0]); + stream.avail_out = deflated.size(); + stream.data_type = Z_TEXT; + + // Z_BEST_SPEED is chosen because, in practice, it offers excellent speed with + // comparable compression for the symbol data typically being uploaded. + // Z_BEST_COMPRESSION: 2151202094 bytes compressed 84.27% in 74.440s. + // Z_DEFAULT_COMPRESSION: 2151202094 bytes compressed 84.08% in 36.016s. + // Z_BEST_SPEED: 2151202094 bytes compressed 80.39% in 13.73s. + int result = deflateInit(&stream, Z_BEST_SPEED); + if (result != Z_OK) { + return false; + } + + while (true) { + result = deflate(&stream, /*flush=*/Z_FINISH); + if (result == Z_STREAM_END) { // All data processed. + deflated.resize(stream.total_out); + break; + } + if (result != Z_OK && result != Z_BUF_ERROR) { + fwprintf(stderr, L"Compression failed with zlib error %d\n", result); + break; // Error condition. + } + // Grow `deflated` by at least 1k to accept the rest of the data. + deflated.resize(deflated.size() + std::max(stream.avail_in, 1024U)); + stream.next_out = reinterpret_cast(&deflated[stream.total_out]); + stream.avail_out = deflated.size() - stream.total_out; + } + deflateEnd(&stream); + + return result == Z_STREAM_END; +#else + return false; +#endif // defined(HAVE_ZLIB) +} const wchar_t kUserAgent[] = L"Breakpad/1.0 (Windows)"; @@ -110,38 +168,75 @@ namespace { return result; } - bool GetFileContents(const wstring& filename, vector* contents) { - bool rv = false; - // The "open" method on pre-MSVC8 ifstream implementations doesn't accept a - // wchar_t* filename, so use _wfopen directly in that case. For VC8 and - // later, _wfopen has been deprecated in favor of _wfopen_s, which does - // not exist in earlier versions, so let the ifstream open the file itself. - // GCC doesn't support wide file name and opening on FILE* requires ugly - // hacks, so fallback to multi byte file. -#ifdef _MSC_VER - ifstream file; - file.open(filename.c_str(), ios::binary); -#else // GCC - ifstream file(WideToMBCP(filename, CP_ACP).c_str(), ios::binary); -#endif // _MSC_VER >= 1400 - if (file.is_open()) { - file.seekg(0, ios::end); - std::streamoff length = file.tellg(); - // Check for loss of data when converting lenght from std::streamoff into - // std::vector::size_type - std::vector::size_type vector_size = - static_cast::size_type>(length); - if (static_cast(vector_size) == length) { - contents->resize(vector_size); - if (length != 0) { - file.seekg(0, ios::beg); - file.read(&((*contents)[0]), length); + // Returns a string representation of a given Windows error code, or null + // on failure. + using ScopedLocalString = unique_ptr; + ScopedLocalString FormatError(DWORD error) { + wchar_t* message_buffer = nullptr; + DWORD message_length = + ::FormatMessageW( + FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_FROM_HMODULE | FORMAT_MESSAGE_IGNORE_INSERTS, + /*lpSource=*/::GetModuleHandle(L"wininet.dll"), error, + /*dwLanguageId=*/0, reinterpret_cast(&message_buffer), + /*nSize=*/0, /*Arguments=*/nullptr); + return ScopedLocalString(message_length ? message_buffer : nullptr, + &LocalFree); + } + + // Emits a log message to stderr for the named operation and Windows error + // code. + void LogError(const char* operation, DWORD error) { + ScopedLocalString message = FormatError(error); + fwprintf(stderr, L"%S failed with error %u: %s\n", operation, error, + message ? message.get() : L""); + } + + // Invokes the Win32 CloseHandle function on `handle` if it is valid. + // Intended for use as a deleter for a std::unique_ptr. + void CloseWindowsHandle(void* handle) { + if (handle != INVALID_HANDLE_VALUE && handle != nullptr) { + ::CloseHandle(handle); + } + } + + // Appends the contents of the file at `filename` to `contents`. + bool AppendFileContents(const wstring& filename, string* contents) { + // Use Win32 APIs rather than the STL so that files larger than 2^31-1 bytes + // can be read. This also allows for use of a hint to the cache manager that + // the file will be read sequentially, which can improve performance. + using ScopedWindowsHandle = + unique_ptr; + ScopedWindowsHandle file( + ::CreateFileW(filename.c_str(), GENERIC_READ, + FILE_SHARE_DELETE | FILE_SHARE_READ, + /*lpSecurityAttributes=*/nullptr, OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL | FILE_FLAG_SEQUENTIAL_SCAN, + /*hTemplateFile=*/nullptr), &CloseWindowsHandle); + BY_HANDLE_FILE_INFORMATION info = {}; + if (file.get() != nullptr && file.get() != INVALID_HANDLE_VALUE && + ::GetFileInformationByHandle(file.get(), &info)) { + uint64_t file_size = info.nFileSizeHigh; + file_size <<= 32; + file_size |= info.nFileSizeLow; + string::size_type position = contents->size(); + contents->resize(position + file_size); + constexpr DWORD kChunkSize = 1024*1024; + while (file_size) { + const DWORD bytes_to_read = + (file_size >= kChunkSize + ? kChunkSize + : static_cast(file_size)); + DWORD bytes_read = 0; + if (!::ReadFile(file.get(), &((*contents)[position]), bytes_to_read, + &bytes_read, /*lpOverlapped=*/nullptr)) { + return false; } - rv = true; + position += bytes_read; + file_size -= bytes_read; } - file.close(); } - return rv; + return true; } bool CheckParameters(const map& parameters) { @@ -179,37 +274,46 @@ namespace { has_content_length_header = true; claimed_size = wcstol(content_length, NULL, 10); response_body.reserve(claimed_size); + } else { + DWORD error = ::GetLastError(); + if (error != ERROR_HTTP_HEADER_NOT_FOUND) { + LogError("HttpQueryInfo", error); + } } - DWORD bytes_available; DWORD total_read = 0; - BOOL return_code; - - while (((return_code = InternetQueryDataAvailable(request, &bytes_available, - 0, 0)) != 0) && bytes_available > 0) { - vector response_buffer(bytes_available); + while (true) { + DWORD bytes_available; + if (!InternetQueryDataAvailable(request, &bytes_available, 0, 0)) { + LogError("InternetQueryDataAvailable", ::GetLastError()); + return false; + } + if (bytes_available == 0) { + break; + } + // Grow the output to hold the available bytes. + response_body.resize(total_read + bytes_available); DWORD size_read; - - return_code = InternetReadFile(request, - &response_buffer[0], - bytes_available, &size_read); - - if (return_code && size_read > 0) { - total_read += size_read; - response_body.append(&response_buffer[0], size_read); + if (!InternetReadFile(request, &response_body[total_read], + bytes_available, &size_read)) { + LogError("InternetReadFile", ::GetLastError()); + return false; } - else { + if (size_read == 0) { break; } + total_read += size_read; } + // The body may have been over-sized above; shrink to the actual bytes read. + response_body.resize(total_read); - bool succeeded = return_code && (!has_content_length_header || - (total_read == claimed_size)); - if (succeeded && response) { + if (has_content_length_header && (total_read != claimed_size)) { + return false; // The response doesn't match the Content-Length header. + } + if (response) { *response = UTF8ToWide(response_body); } - - return succeeded; + return true; } bool SendRequestInner( @@ -237,8 +341,7 @@ namespace { components.dwUrlPathLength = sizeof(path) / sizeof(path[0]); if (!InternetCrackUrl(url.c_str(), static_cast(url.size()), 0, &components)) { - DWORD err = GetLastError(); - wprintf(L"%d\n", err); + LogError("InternetCrackUrl", ::GetLastError()); return false; } bool secure = false; @@ -255,6 +358,7 @@ namespace { NULL, // proxy bypass 0)); // flags if (!internet.get()) { + LogError("InternetOpen", ::GetLastError()); return false; } @@ -267,6 +371,7 @@ namespace { 0, // flags 0)); // context if (!connection.get()) { + LogError("InternetConnect", ::GetLastError()); return false; } @@ -281,14 +386,17 @@ namespace { http_open_flags, 0)); // context if (!request.get()) { + LogError("HttpOpenRequest", ::GetLastError()); return false; } if (!content_type_header.empty()) { - HttpAddRequestHeaders(request.get(), - content_type_header.c_str(), - static_cast(-1), - HTTP_ADDREQ_FLAG_ADD); + if (!HttpAddRequestHeaders(request.get(), + content_type_header.c_str(), + static_cast(-1), + HTTP_ADDREQ_FLAG_ADD)) { + LogError("HttpAddRequestHeaders", ::GetLastError()); + } } if (timeout_ms) { @@ -296,20 +404,21 @@ namespace { INTERNET_OPTION_SEND_TIMEOUT, timeout_ms, sizeof(*timeout_ms))) { - fwprintf(stderr, L"Could not unset send timeout, continuing...\n"); + LogError("InternetSetOption-send timeout", ::GetLastError()); } if (!InternetSetOption(request.get(), INTERNET_OPTION_RECEIVE_TIMEOUT, timeout_ms, sizeof(*timeout_ms))) { - fwprintf(stderr, L"Could not unset receive timeout, continuing...\n"); + LogError("InternetSetOption-receive timeout", ::GetLastError()); } } if (!HttpSendRequest(request.get(), NULL, 0, const_cast(request_body.data()), static_cast(request_body.size()))) { + LogError("HttpSendRequest", ::GetLastError()); return false; } @@ -319,6 +428,7 @@ namespace { if (!HttpQueryInfo(request.get(), HTTP_QUERY_STATUS_CODE, static_cast(&http_status), &http_status_size, 0)) { + LogError("HttpQueryInfo", ::GetLastError()); return false; } @@ -386,16 +496,7 @@ namespace { request_body->append("\r\n"); } - vector contents; - if (!GetFileContents(filename, &contents)) { - return false; - } - - if (!contents.empty()) { - request_body->append(&(contents[0]), contents.size()); - } - - return true; + return AppendFileContents(filename, request_body); } bool GenerateRequestBody(const map& parameters, @@ -448,10 +549,20 @@ namespace google_breakpad { return false; } + static const wchar_t kNoEncoding[] = L""; + static const wchar_t kDeflateEncoding[] = L"Content-Encoding: deflate\r\n"; + const wchar_t* encoding_header = &kNoEncoding[0]; + string compressed_body; + if (Deflate(request_body, compressed_body)) { + request_body.swap(compressed_body); + encoding_header = &kDeflateEncoding[0]; + } // else deflate unsupported or failed; send the raw data. + string().swap(compressed_body); // Free memory. + return SendRequestInner( url, L"PUT", - L"", + encoding_header, request_body, timeout_ms, response_body, diff --git a/src/common/windows/http_upload.h b/src/common/windows/http_upload.h index e117840e9..4a2e9bf2f 100644 --- a/src/common/windows/http_upload.h +++ b/src/common/windows/http_upload.h @@ -41,6 +41,7 @@ #include #include +#include namespace google_breakpad { @@ -51,7 +52,8 @@ using std::map; class HTTPUpload { public: // Sends a PUT request containing the data in |path| to the given - // URL. + // URL. The data is encoded via the deflate algorithm, if support for such + // is available at build-time. // Only HTTP(S) URLs are currently supported. Returns true on success. // If the request is successful and response_body is non-NULL, // the response body will be returned in response_body. diff --git a/src/common/windows/pdb_source_line_writer.cc b/src/common/windows/pdb_source_line_writer.cc index dd80a6d25..86f01a798 100644 --- a/src/common/windows/pdb_source_line_writer.cc +++ b/src/common/windows/pdb_source_line_writer.cc @@ -105,7 +105,7 @@ void MaybeRecordSymbol(DWORD rva, // Take the 'least' symbol by lexicographical order of the decorated name. We // use the decorated rather than undecorated name because computing the latter // is expensive. - BSTR current_name, new_name; + CComBSTR current_name, new_name; loc->second.symbol->get_name(¤t_name); symbol->get_name(&new_name); if (wcscmp(new_name, current_name) < 0) { @@ -479,6 +479,28 @@ bool PDBSourceLineWriter::Open(const wstring& file, FileFormat format) { return true; } +int PDBSourceLineWriter::GetCallsiteInlineOriginId( + CComPtr& callsite) { + wstring name; + { + CComBSTR name_bstr; + callsite->get_name(&name_bstr); + if (name.assign(name_bstr, name_bstr.Length()).empty()) { + name = L""; + } + } + + const int next_id = inline_origins_.size(); + auto iter_inserted = inline_origins_.emplace(std::move(name), next_id); + if (!iter_inserted.second) { + // `name` is already present. Return its previously-assigned unique id. + return iter_inserted.first->second; + } + // `name` was just inserted. Assign it the next id and return this value. + iter_inserted.first->second = next_id; + return next_id; +} + bool PDBSourceLineWriter::GetLine(IDiaLineNumber* dia_line, Line* line) const { if (FAILED(dia_line->get_relativeVirtualAddress(&line->rva))) { fprintf(stderr, "failed to get line rva\n"); @@ -783,17 +805,16 @@ bool PDBSourceLineWriter::PrintFunctions() { } void PDBSourceLineWriter::PrintInlineOrigins() const { - struct OriginCompare { - bool operator()(const InlineOrigin lhs, const InlineOrigin rhs) const { - return lhs.id < rhs.id; - } - }; - set origins; - // Sort by origin id. - for (auto const& origin : inline_origins_) - origins.insert(origin.second); - for (auto o : origins) { - fprintf(output_, "INLINE_ORIGIN %d %ls\n", o.id, o.name.c_str()); + // Inline origins' unique identifiers are assigned sequentially starting from + // zero. Make a reverse-mapping from ids to names, then print the names in + // order of id. + vector names_by_id(inline_origins_.size()); + for (const auto& origin : inline_origins_) { + names_by_id[origin.second] = &origin.first; + } + int id = 0; + for (const wstring* name : names_by_id) { + fprintf(output_, "INLINE_ORIGIN %d %ls\n", id++, name->c_str()); } } @@ -838,19 +859,7 @@ bool PDBSourceLineWriter::GetInlines(IDiaSymbol* block, } dia_line.Release(); } - BSTR name; - callsite->get_name(&name); - if (SysStringLen(name) == 0) { - name = SysAllocString(L""); - } - auto iter = inline_origins_.find(name); - if (iter == inline_origins_.end()) { - InlineOrigin origin; - origin.id = inline_origins_.size(); - origin.name = name; - inline_origins_[name] = origin; - } - new_inline->SetOriginId(inline_origins_[name].id); + new_inline->SetOriginId(GetCallsiteInlineOriginId(callsite)); new_inline->SetCallSiteLine(call_site_line); new_inline->SetCallSiteFileId(file_id); // Go to next level. diff --git a/src/common/windows/pdb_source_line_writer.h b/src/common/windows/pdb_source_line_writer.h index 8c74e2ca3..c4d7c0613 100644 --- a/src/common/windows/pdb_source_line_writer.h +++ b/src/common/windows/pdb_source_line_writer.h @@ -103,15 +103,6 @@ class PDBSourceLineWriter { bool UsesGUID(bool *uses_guid); private: - // InlineOrigin represents INLINE_ORIGIN record in a symbol file. It's an - // inlined function. - struct InlineOrigin { - // The unique id for an InlineOrigin. - int id; - // The name of the inlined function. - wstring name; - }; - // Line represents LINE record in a symbol file. It represents a source code // line. struct Line { @@ -196,6 +187,10 @@ class PDBSourceLineWriter { map line_map_; }; + // Returns the unique id for the inline origin with the same name as the given + // callsite, creating a new id if needed. + int GetCallsiteInlineOriginId(CComPtr& callsite); + // Construct Line from IDiaLineNumber. The output Line is stored at line. // Return true on success. bool GetLine(IDiaLineNumber* dia_line, Line* line) const; @@ -337,8 +332,8 @@ class PDBSourceLineWriter { // This maps unique filenames to file IDs. unordered_map unique_files_; - // The INLINE_ORIGINS records. The key is the function name. - std::map inline_origins_; + // The INLINE_ORIGINS records; inline origin name -> unique id. + std::map inline_origins_; // This is used for calculating post-transform symbol addresses and lengths. ImageMap image_map_; diff --git a/src/common/windows/symbol_collector_client.cc b/src/common/windows/symbol_collector_client.cc index d91b702b0..9695100a0 100644 --- a/src/common/windows/symbol_collector_client.cc +++ b/src/common/windows/symbol_collector_client.cc @@ -106,7 +106,7 @@ namespace google_breakpad { if (!HTTPUpload::SendSimplePostRequest( url, body, - L"application/json", + L"Content-Type: application/json", timeout_ms, &response, &response_code)) { @@ -176,4 +176,4 @@ namespace google_breakpad { SymbolStatus::Missing; } -} // namespace google_breakpad \ No newline at end of file +} // namespace google_breakpad diff --git a/src/google_breakpad/processor/minidump_processor.h b/src/google_breakpad/processor/minidump_processor.h index 8475407e7..2a5b07499 100644 --- a/src/google_breakpad/processor/minidump_processor.h +++ b/src/google_breakpad/processor/minidump_processor.h @@ -138,6 +138,11 @@ class MinidumpProcessor { enable_objdump_for_exploitability_ = enabled; } + // Sets the maximum number of threads to process. + void set_max_thread_count(int max_thread_count) { + max_thread_count_ = max_thread_count; + } + private: StackFrameSymbolizer* frame_symbolizer_; // Indicate whether resolver_helper_ is owned by this instance. @@ -157,6 +162,10 @@ class MinidumpProcessor { // purposes of disassembly. This results in significantly more overhead than // the enable_objdump_ flag. bool enable_objdump_for_exploitability_; + + // The maximum number of threads to process. This can be exceeded if the + // requesting thread comes after the limit. Setting this to -1 means no limit. + int max_thread_count_; }; } // namespace google_breakpad diff --git a/src/google_breakpad/processor/process_state.h b/src/google_breakpad/processor/process_state.h index 3fe6a5c27..cc0e83a5d 100644 --- a/src/google_breakpad/processor/process_state.h +++ b/src/google_breakpad/processor/process_state.h @@ -105,6 +105,7 @@ class ProcessState { uint64_t crash_address() const { return crash_address_; } string assertion() const { return assertion_; } int requesting_thread() const { return requesting_thread_; } + int original_thread_count() const { return original_thread_count_; } const ExceptionRecord* exception_record() const { return &exception_record_; } const vector* threads() const { return &threads_; } const vector* thread_memory_regions() const { @@ -168,6 +169,11 @@ class ProcessState { // indicating that the dump thread is not available. int requesting_thread_; + // Original thread count. The Processor has limit on how many threads to + // process, so not all threads are processed. This tells you how many threads + // were originally in the minudump. + int original_thread_count_; + // Exception record details: code, flags, address, parameters. ExceptionRecord exception_record_; diff --git a/src/processor/fast_source_line_resolver.cc b/src/processor/fast_source_line_resolver.cc index 79803f2ca..1c329772d 100644 --- a/src/processor/fast_source_line_resolver.cc +++ b/src/processor/fast_source_line_resolver.cc @@ -44,6 +44,7 @@ #include "processor/fast_source_line_resolver_types.h" #include +#include #include #include #include @@ -154,7 +155,7 @@ void FastSourceLineResolver::Module::ConstructInlineFrames( } } - // Use the starting adress of the inlined range as inlined function base. + // Use the starting address of the inlined range as inlined function base. new_frame->function_base = new_frame->module->base_address(); for (const auto& range : in->inline_ranges) { if (address >= range.first && address < range.first + range.second) { @@ -228,21 +229,21 @@ bool FastSourceLineResolver::Module::LoadMapFromMemory( const char* mem_buffer = memory_buffer; mem_buffer = SimpleSerializer::Read(mem_buffer, &is_corrupt_); - const uint32_t* map_sizes = reinterpret_cast(mem_buffer); + const uint64_t* map_sizes = reinterpret_cast(mem_buffer); - unsigned int header_size = kNumberMaps_ * sizeof(unsigned int); + unsigned int header_size = kNumberMaps_ * sizeof(uint64_t); // offsets[]: an array of offset addresses (with respect to mem_buffer), // for each "Static***Map" component of Module. // "Static***Map": static version of std::map or map wrapper, i.e., StaticMap, // StaticAddressMap, StaticContainedRangeMap, and StaticRangeMap. - unsigned int offsets[kNumberMaps_]; + uint64_t offsets[kNumberMaps_]; offsets[0] = header_size; for (int i = 1; i < kNumberMaps_; ++i) { offsets[i] = offsets[i - 1] + map_sizes[i - 1]; } - unsigned int expected_size = sizeof(bool) + offsets[kNumberMaps_ - 1] + - map_sizes[kNumberMaps_ - 1] + 1; + size_t expected_size = sizeof(bool) + offsets[kNumberMaps_ - 1] + + map_sizes[kNumberMaps_ - 1] + 1; if (expected_size != memory_buffer_size && // Allow for having an extra null terminator. expected_size != memory_buffer_size - 1) { diff --git a/src/processor/fast_source_line_resolver_types.h b/src/processor/fast_source_line_resolver_types.h index 75b9004f6..14913518b 100644 --- a/src/processor/fast_source_line_resolver_types.h +++ b/src/processor/fast_source_line_resolver_types.h @@ -107,7 +107,7 @@ struct FastSourceLineResolver::Inline : public SourceLineResolverBase::Inline { // De-serialize the memory data of a Inline. void CopyFrom(const char* raw) { - DESERIALIZE(raw, has_call_site_file_id); + raw = SimpleSerializer::Read(raw, &has_call_site_file_id); DESERIALIZE(raw, inline_nest_level); DESERIALIZE(raw, call_site_line); DESERIALIZE(raw, call_site_file_id); diff --git a/src/processor/logging.h b/src/processor/logging.h index 8c040837f..519c852f5 100644 --- a/src/processor/logging.h +++ b/src/processor/logging.h @@ -57,7 +57,9 @@ #define PROCESSOR_LOGGING_H__ #include +#include #include +#include #include "common/using_std_string.h" #include "google_breakpad/common/breakpad_types.h" @@ -119,9 +121,12 @@ class LogMessageVoidify { }; // Returns number formatted as a hexadecimal string, such as "0x7b". -string HexString(uint32_t number); -string HexString(uint64_t number); -string HexString(int number); +template +string HexString(T number) { + std::stringstream stream; + stream << "0x" << std::hex << number; + return stream.str(); +} // Returns the error code as set in the global errno variable, and sets // error_string, a required argument, to a string describing that error diff --git a/src/processor/map_serializers-inl.h b/src/processor/map_serializers-inl.h index 577b95f7b..5dfb0a752 100644 --- a/src/processor/map_serializers-inl.h +++ b/src/processor/map_serializers-inl.h @@ -54,7 +54,7 @@ template size_t StdMapSerializer::SizeOf( const std::map& m) const { size_t size = 0; - size_t header_size = (1 + m.size()) * sizeof(uint32_t); + size_t header_size = (1 + m.size()) * sizeof(uint64_t); size += header_size; typename std::map::const_iterator iter; @@ -76,19 +76,19 @@ char* StdMapSerializer::Write(const std::map& m, // Write header: // Number of nodes. - dest = SimpleSerializer::Write(m.size(), dest); + dest = SimpleSerializer::Write(m.size(), dest); // Nodes offsets. - uint32_t* offsets = reinterpret_cast(dest); - dest += sizeof(uint32_t) * m.size(); + uint64_t* offsets = reinterpret_cast(dest); + dest += sizeof(uint64_t) * m.size(); char* key_address = dest; dest += sizeof(Key) * m.size(); // Traverse map. typename std::map::const_iterator iter; - int index = 0; + int64_t index = 0; for (iter = m.begin(); iter != m.end(); ++iter, ++index) { - offsets[index] = static_cast(dest - start_address); + offsets[index] = static_cast(dest - start_address); key_address = key_serializer_.Write(iter->first, key_address); dest = value_serializer_.Write(iter->second, dest); } @@ -97,9 +97,9 @@ char* StdMapSerializer::Write(const std::map& m, template char* StdMapSerializer::Serialize( - const std::map& m, unsigned int* size) const { + const std::map& m, uint64_t* size) const { // Compute size of memory to be allocated. - unsigned int size_to_alloc = SizeOf(m); + uint64_t size_to_alloc = SizeOf(m); // Allocate memory. char* serialized_data = new char[size_to_alloc]; if (!serialized_data) { @@ -118,7 +118,7 @@ template size_t RangeMapSerializer::SizeOf( const RangeMap& m) const { size_t size = 0; - size_t header_size = (1 + m.map_.size()) * sizeof(uint32_t); + size_t header_size = (1 + m.map_.size()) * sizeof(uint64_t); size += header_size; typename std::map::const_iterator iter; @@ -144,19 +144,19 @@ char* RangeMapSerializer::Write( // Write header: // Number of nodes. - dest = SimpleSerializer::Write(m.map_.size(), dest); + dest = SimpleSerializer::Write(m.map_.size(), dest); // Nodes offsets. - uint32_t* offsets = reinterpret_cast(dest); - dest += sizeof(uint32_t) * m.map_.size(); + uint64_t* offsets = reinterpret_cast(dest); + dest += sizeof(uint64_t) * m.map_.size(); char* key_address = dest; dest += sizeof(Address) * m.map_.size(); // Traverse map. typename std::map::const_iterator iter; - int index = 0; + int64_t index = 0; for (iter = m.map_.begin(); iter != m.map_.end(); ++iter, ++index) { - offsets[index] = static_cast(dest - start_address); + offsets[index] = static_cast(dest - start_address); key_address = address_serializer_.Write(iter->first, key_address); dest = address_serializer_.Write(iter->second.base(), dest); dest = entry_serializer_.Write(iter->second.entry(), dest); @@ -166,9 +166,9 @@ char* RangeMapSerializer::Write( template char* RangeMapSerializer::Serialize( - const RangeMap& m, unsigned int* size) const { + const RangeMap& m, uint64_t* size) const { // Compute size of memory to be allocated. - unsigned int size_to_alloc = SizeOf(m); + uint64_t size_to_alloc = SizeOf(m); // Allocate memory. char* serialized_data = new char[size_to_alloc]; if (!serialized_data) { @@ -191,12 +191,12 @@ size_t ContainedRangeMapSerializer::SizeOf( size_t size = 0; size_t header_size = addr_serializer_.SizeOf(m->base_) + entry_serializer_.SizeOf(m->entry_) - + sizeof(uint32_t); + + sizeof(uint64_t); size += header_size; // In case m.map_ == NULL, we treat it as an empty map: - size += sizeof(uint32_t); + size += sizeof(uint64_t); if (m->map_) { - size += m->map_->size() * sizeof(uint32_t); + size += m->map_->size() * sizeof(uint64_t); typename Map::const_iterator iter; for (iter = m->map_->begin(); iter != m->map_->end(); ++iter) { size += addr_serializer_.SizeOf(iter->first); @@ -215,27 +215,27 @@ char* ContainedRangeMapSerializer::Write( return NULL; } dest = addr_serializer_.Write(m->base_, dest); - dest = SimpleSerializer::Write(entry_serializer_.SizeOf(m->entry_), + dest = SimpleSerializer::Write(entry_serializer_.SizeOf(m->entry_), dest); dest = entry_serializer_.Write(m->entry_, dest); // Write map<: char* map_address = dest; if (m->map_ == NULL) { - dest = SimpleSerializer::Write(0, dest); + dest = SimpleSerializer::Write(0, dest); } else { - dest = SimpleSerializer::Write(m->map_->size(), dest); - uint32_t* offsets = reinterpret_cast(dest); - dest += sizeof(uint32_t) * m->map_->size(); + dest = SimpleSerializer::Write(m->map_->size(), dest); + uint64_t* offsets = reinterpret_cast(dest); + dest += sizeof(uint64_t) * m->map_->size(); char* key_address = dest; dest += sizeof(AddrType) * m->map_->size(); // Traverse map. typename Map::const_iterator iter; - int index = 0; + int64_t index = 0; for (iter = m->map_->begin(); iter != m->map_->end(); ++iter, ++index) { - offsets[index] = static_cast(dest - map_address); + offsets[index] = static_cast(dest - map_address); key_address = addr_serializer_.Write(iter->first, key_address); // Recursively write. dest = Write(iter->second, dest); @@ -246,8 +246,8 @@ char* ContainedRangeMapSerializer::Write( template char* ContainedRangeMapSerializer::Serialize( - const ContainedRangeMap* m, unsigned int* size) const { - unsigned int size_to_alloc = SizeOf(m); + const ContainedRangeMap* m, uint64_t* size) const { + uint64_t size_to_alloc = SizeOf(m); // Allocating memory. char* serialized_data = new char[size_to_alloc]; if (!serialized_data) { diff --git a/src/processor/map_serializers.h b/src/processor/map_serializers.h index 54153f8a2..933ff6fde 100644 --- a/src/processor/map_serializers.h +++ b/src/processor/map_serializers.h @@ -65,7 +65,7 @@ class StdMapSerializer { // Returns a pointer to the serialized data. If size != NULL, *size is set // to the size of serialized data, i.e., SizeOf(m). // Caller has the ownership of memory allocated as "new char[]". - char* Serialize(const std::map& m, unsigned int* size) const; + char* Serialize(const std::map& m, uint64_t* size) const; private: SimpleSerializer key_serializer_; @@ -93,7 +93,7 @@ class AddressMapSerializer { // Returns a pointer to the serialized data. If size != NULL, *size is set // to the size of serialized data, i.e., SizeOf(m). // Caller has the ownership of memory allocated as "new char[]". - char* Serialize(const AddressMap& m, unsigned int* size) const { + char* Serialize(const AddressMap& m, uint64_t* size) const { return std_map_serializer_.Serialize(m.map_, size); } @@ -120,7 +120,7 @@ class RangeMapSerializer { // Returns a pointer to the serialized data. If size != NULL, *size is set // to the size of serialized data, i.e., SizeOf(m). // Caller has the ownership of memory allocated as "new char[]". - char* Serialize(const RangeMap& m, unsigned int* size) const; + char* Serialize(const RangeMap& m, uint64_t* size) const; private: // Convenient type name for Range. @@ -151,7 +151,7 @@ class ContainedRangeMapSerializer { // to the size of serialized data, i.e., SizeOf(m). // Caller has the ownership of memory allocated as "new char[]". char* Serialize(const ContainedRangeMap* m, - unsigned int* size) const; + uint64_t* size) const; private: // Convenient type name for the underlying map type. diff --git a/src/processor/map_serializers_unittest.cc b/src/processor/map_serializers_unittest.cc index cd31ddc83..855c4dae1 100644 --- a/src/processor/map_serializers_unittest.cc +++ b/src/processor/map_serializers_unittest.cc @@ -31,6 +31,7 @@ // // Author: Siyang Xie (lambxsy@google.com) +#include #ifdef HAVE_CONFIG_H #include // Must come first #endif @@ -49,8 +50,8 @@ #include "processor/range_map-inl.h" #include "processor/contained_range_map-inl.h" -typedef int32_t AddrType; -typedef int32_t EntryType; +typedef int64_t AddrType; +typedef int64_t EntryType; class TestStdMapSerializer : public ::testing::Test { protected: @@ -65,13 +66,13 @@ class TestStdMapSerializer : public ::testing::Test { std::map std_map_; google_breakpad::StdMapSerializer serializer_; - uint32_t serialized_size_; + uint64_t serialized_size_; char* serialized_data_; }; TEST_F(TestStdMapSerializer, EmptyMapTestCase) { - const int32_t correct_data[] = { 0 }; - uint32_t correct_size = sizeof(correct_data); + const int64_t correct_data[] = { 0 }; + uint64_t correct_size = sizeof(correct_data); // std_map_ is empty. serialized_data_ = serializer_.Serialize(std_map_, &serialized_size_); @@ -81,17 +82,17 @@ TEST_F(TestStdMapSerializer, EmptyMapTestCase) { } TEST_F(TestStdMapSerializer, MapWithTwoElementsTestCase) { - const int32_t correct_data[] = { + const int64_t correct_data[] = { // # of nodes 2, // Offsets - 20, 24, + 40, 48, // Keys 1, 3, // Values 2, 6 }; - uint32_t correct_size = sizeof(correct_data); + uint64_t correct_size = sizeof(correct_data); std_map_.insert(std::make_pair(1, 2)); std_map_.insert(std::make_pair(3, 6)); @@ -103,17 +104,17 @@ TEST_F(TestStdMapSerializer, MapWithTwoElementsTestCase) { } TEST_F(TestStdMapSerializer, MapWithFiveElementsTestCase) { - const int32_t correct_data[] = { + const int64_t correct_data[] = { // # of nodes 5, // Offsets - 44, 48, 52, 56, 60, + 88, 96, 104, 112, 120, // Keys 1, 2, 3, 4, 5, // Values 11, 12, 13, 14, 15 }; - uint32_t correct_size = sizeof(correct_data); + uint64_t correct_size = sizeof(correct_data); for (int i = 1; i < 6; ++i) std_map_.insert(std::make_pair(i, 10 + i)); @@ -137,13 +138,13 @@ class TestAddressMapSerializer : public ::testing::Test { google_breakpad::AddressMap address_map_; google_breakpad::AddressMapSerializer serializer_; - uint32_t serialized_size_; + uint64_t serialized_size_; char* serialized_data_; }; TEST_F(TestAddressMapSerializer, EmptyMapTestCase) { - const int32_t correct_data[] = { 0 }; - uint32_t correct_size = sizeof(correct_data); + const int64_t correct_data[] = { 0 }; + uint64_t correct_size = sizeof(correct_data); // std_map_ is empty. serialized_data_ = serializer_.Serialize(address_map_, &serialized_size_); @@ -153,17 +154,17 @@ TEST_F(TestAddressMapSerializer, EmptyMapTestCase) { } TEST_F(TestAddressMapSerializer, MapWithTwoElementsTestCase) { - const int32_t correct_data[] = { + const int64_t correct_data[] = { // # of nodes 2, // Offsets - 20, 24, + 40, 48, // Keys 1, 3, // Values 2, 6 }; - uint32_t correct_size = sizeof(correct_data); + uint64_t correct_size = sizeof(correct_data); address_map_.Store(1, 2); address_map_.Store(3, 6); @@ -175,17 +176,17 @@ TEST_F(TestAddressMapSerializer, MapWithTwoElementsTestCase) { } TEST_F(TestAddressMapSerializer, MapWithFourElementsTestCase) { - const int32_t correct_data[] = { + const int64_t correct_data[] = { // # of nodes 4, // Offsets - 36, 40, 44, 48, + 72, 80, 88, 96, // Keys -6, -4, 8, 123, // Values 2, 3, 5, 8 }; - uint32_t correct_size = sizeof(correct_data); + uint64_t correct_size = sizeof(correct_data); address_map_.Store(-6, 2); address_map_.Store(-4, 3); @@ -212,13 +213,13 @@ class TestRangeMapSerializer : public ::testing::Test { google_breakpad::RangeMap range_map_; google_breakpad::RangeMapSerializer serializer_; - uint32_t serialized_size_; + uint64_t serialized_size_; char* serialized_data_; }; TEST_F(TestRangeMapSerializer, EmptyMapTestCase) { - const int32_t correct_data[] = { 0 }; - uint32_t correct_size = sizeof(correct_data); + const int64_t correct_data[] = { 0 }; + uint64_t correct_size = sizeof(correct_data); // range_map_ is empty. serialized_data_ = serializer_.Serialize(range_map_, &serialized_size_); @@ -228,17 +229,17 @@ TEST_F(TestRangeMapSerializer, EmptyMapTestCase) { } TEST_F(TestRangeMapSerializer, MapWithOneRangeTestCase) { - const int32_t correct_data[] = { + const int64_t correct_data[] = { // # of nodes 1, // Offsets - 12, + 24, // Keys: high address 10, // Values: (low address, entry) pairs 1, 6 }; - uint32_t correct_size = sizeof(correct_data); + uint64_t correct_size = sizeof(correct_data); range_map_.StoreRange(1, 10, 6); @@ -249,17 +250,17 @@ TEST_F(TestRangeMapSerializer, MapWithOneRangeTestCase) { } TEST_F(TestRangeMapSerializer, MapWithThreeRangesTestCase) { - const int32_t correct_data[] = { + const int64_t correct_data[] = { // # of nodes 3, // Offsets - 28, 36, 44, + 56, 72, 88, // Keys: high address - 5, 9, 20, + 5, 9, 20, // Values: (low address, entry) pairs - 2, 1, 6, 2, 10, 3 + 2, 1, 6, 2, 10, 3 }; - uint32_t correct_size = sizeof(correct_data); + uint64_t correct_size = sizeof(correct_data); ASSERT_TRUE(range_map_.StoreRange(2, 4, 1)); ASSERT_TRUE(range_map_.StoreRange(6, 4, 2)); @@ -285,18 +286,18 @@ class TestContainedRangeMapSerializer : public ::testing::Test { google_breakpad::ContainedRangeMap crm_map_; google_breakpad::ContainedRangeMapSerializer serializer_; - uint32_t serialized_size_; + uint64_t serialized_size_; char* serialized_data_; }; TEST_F(TestContainedRangeMapSerializer, EmptyMapTestCase) { - const int32_t correct_data[] = { + const int64_t correct_data[] = { 0, // base address of root - 4, // size of entry + 8, // size of entry 0, // entry stored at root 0 // empty map stored at root }; - uint32_t correct_size = sizeof(correct_data); + uint64_t correct_size = sizeof(correct_data); // crm_map_ is empty. serialized_data_ = serializer_.Serialize(&crm_map_, &serialized_size_); @@ -306,21 +307,21 @@ TEST_F(TestContainedRangeMapSerializer, EmptyMapTestCase) { } TEST_F(TestContainedRangeMapSerializer, MapWithOneRangeTestCase) { - const int32_t correct_data[] = { + const int64_t correct_data[] = { 0, // base address of root - 4, // size of entry + 8, // size of entry 0, // entry stored at root // Map stored at root node: 1, // # of nodes - 12, // offset + 24, // offset 9, // key // value: a child ContainedRangeMap 3, // base address of child CRM - 4, // size of entry + 8, // size of entry -1, // entry stored in child CRM 0 // empty sub-map stored in child CRM }; - uint32_t correct_size = sizeof(correct_data); + uint64_t correct_size = sizeof(correct_data); crm_map_.StoreRange(3, 7, -1); @@ -342,27 +343,27 @@ TEST_F(TestContainedRangeMapSerializer, MapWithTwoLevelsTestCase) { // / \ | // 3~4 6~7 16-20 level 2: grandchild1, grandchild2, grandchild3 - const int32_t correct_data[] = { + const int64_t correct_data[] = { // root: base, entry_size, entry - 0, 4, 0, + 0, 8, 0, // root's map: # of nodes, offset1, offset2, key1, key2 - 2, 20, 84, 8, 20, + 2, 40, 168, 8, 20, // child1: base, entry_size, entry: - 2, 4, -1, + 2, 8, -1, // child1's map: # of nodes, offset1, offset2, key1, key2 - 2, 20, 36, 4, 7, + 2, 40, 72, 4, 7, // grandchild1: base, entry_size, entry, empty_map - 3, 4, -1, 0, + 3, 8, -1, 0, // grandchild2: base, entry_size, entry, empty_map - 6, 4, -1, 0, + 6, 8, -1, 0, // child2: base, entry_size, entry: - 10, 4, -1, + 10, 8, -1, // child2's map: # of nodes, offset1, key1 - 1, 12, 20, + 1, 24, 20, // grandchild3: base, entry_size, entry, empty_map - 16, 4, -1, 0 + 16, 8, -1, 0 }; - uint32_t correct_size = sizeof(correct_data); + uint64_t correct_size = sizeof(correct_data); // Store child1. ASSERT_TRUE(crm_map_.StoreRange(2, 7, -1)); diff --git a/src/processor/minidump.cc b/src/processor/minidump.cc index 83e5a8688..22dce7626 100644 --- a/src/processor/minidump.cc +++ b/src/processor/minidump.cc @@ -32,6 +32,11 @@ // // Author: Mark Mentovai +// For PRI* macros, before anything else might #include it. +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif /* __STDC_FORMAT_MACROS */ + #ifdef HAVE_CONFIG_H #include // Must come first #endif @@ -39,6 +44,7 @@ #include "google_breakpad/processor/minidump.h" #include +#include #include #include #include @@ -820,7 +826,7 @@ bool MinidumpContext::Read(uint32_t expected_size) { // Context may include xsave registers and so be larger than // sizeof(MDRawContextX86). For now we skip this extended data. if (context_flags & MD_CONTEXT_X86_XSTATE) { - size_t bytes_left = expected_size - sizeof(MDRawContextX86); + int64_t bytes_left = expected_size - sizeof(MDRawContextX86); if (bytes_left > kMaxXSaveAreaSize) { BPLOG(ERROR) << "MinidumpContext oversized xstate area"; return false; @@ -897,8 +903,12 @@ bool MinidumpContext::Read(uint32_t expected_size) { // Skip extended xstate data if present in X86 context. if (context_flags & MD_CONTEXT_X86_XSTATE) { - minidump_->SeekSet((minidump_->Tell() - sizeof(MDRawContextX86)) + - expected_size); + if (!minidump_->SeekSet( + (minidump_->Tell() - sizeof(MDRawContextX86)) + + expected_size)) { + BPLOG(ERROR) << "MinidumpContext cannot seek to past xstate data"; + return false; + } } break; diff --git a/src/processor/minidump_processor.cc b/src/processor/minidump_processor.cc index 5d2dea6d8..44e69c890 100644 --- a/src/processor/minidump_processor.cc +++ b/src/processor/minidump_processor.cc @@ -35,6 +35,8 @@ #include #include +#include +#include #include #include #include @@ -64,7 +66,8 @@ MinidumpProcessor::MinidumpProcessor(SymbolSupplier* supplier, own_frame_symbolizer_(true), enable_exploitability_(false), enable_objdump_(false), - enable_objdump_for_exploitability_(false) { + enable_objdump_for_exploitability_(false), + max_thread_count_(-1) { } MinidumpProcessor::MinidumpProcessor(SymbolSupplier* supplier, @@ -74,7 +77,8 @@ MinidumpProcessor::MinidumpProcessor(SymbolSupplier* supplier, own_frame_symbolizer_(true), enable_exploitability_(enable_exploitability), enable_objdump_(false), - enable_objdump_for_exploitability_(false) { + enable_objdump_for_exploitability_(false), + max_thread_count_(-1) { } MinidumpProcessor::MinidumpProcessor(StackFrameSymbolizer* frame_symbolizer, @@ -83,7 +87,8 @@ MinidumpProcessor::MinidumpProcessor(StackFrameSymbolizer* frame_symbolizer, own_frame_symbolizer_(false), enable_exploitability_(enable_exploitability), enable_objdump_(false), - enable_objdump_for_exploitability_(false) { + enable_objdump_for_exploitability_(false), + max_thread_count_(-1) { assert(frame_symbolizer_); } @@ -208,29 +213,32 @@ ProcessResult MinidumpProcessor::Process( bool interrupted = false; bool found_requesting_thread = false; unsigned int thread_count = threads->thread_count(); + process_state->original_thread_count_ = thread_count; // Reset frame_symbolizer_ at the beginning of stackwalk for each minidump. frame_symbolizer_->Reset(); - MinidumpThreadNameList* thread_names = dump->GetThreadNameList(); std::map thread_id_to_name; if (thread_names) { const unsigned int thread_name_count = thread_names->thread_name_count(); for (unsigned int thread_name_index = 0; - thread_name_index < thread_name_count; - ++thread_name_index) { - MinidumpThreadName* thread_name = thread_names->GetThreadNameAtIndex(thread_name_index); + thread_name_index < thread_name_count; ++thread_name_index) { + MinidumpThreadName* thread_name = + thread_names->GetThreadNameAtIndex(thread_name_index); if (!thread_name) { - BPLOG(ERROR) << "Could not get thread name for thread at index " << thread_name_index; + BPLOG(ERROR) << "Could not get thread name for thread at index " + << thread_name_index; return PROCESS_ERROR_GETTING_THREAD_NAME; } uint32_t thread_id; if (!thread_name->GetThreadID(&thread_id)) { - BPLOG(ERROR) << "Could not get thread ID for thread at index " << thread_name_index; + BPLOG(ERROR) << "Could not get thread ID for thread at index " + << thread_name_index; return PROCESS_ERROR_GETTING_THREAD_NAME; } - thread_id_to_name.insert(std::make_pair(thread_id, thread_name->GetThreadName())); + thread_id_to_name.insert( + std::make_pair(thread_id, thread_name->GetThreadName())); } } @@ -270,6 +278,7 @@ ProcessResult MinidumpProcessor::Process( // dump of itself (when both its context and its stack are in flux), // processing that stack wouldn't provide much useful data. if (has_dump_thread && thread_id == dump_thread_id) { + process_state->original_thread_count_--; continue; } @@ -290,6 +299,13 @@ ProcessResult MinidumpProcessor::Process( // be the index of the current thread when it's pushed into the // vector. process_state->requesting_thread_ = process_state->threads_.size(); + if (max_thread_count_ >= 0) { + thread_count = + std::min(thread_count, + std::max(static_cast( + process_state->requesting_thread_ + 1), + static_cast(max_thread_count_))); + } found_requesting_thread = true; @@ -823,8 +839,6 @@ static void CalculateFaultAddressFromInstruction(Minidump* dump, DisassemblerObjdump disassembler(context->GetContextCPU(), memory_region, instruction_ptr); - fprintf(stderr, "%s %s %s\n", disassembler.operation().c_str(), - disassembler.src().c_str(), disassembler.dest().c_str()); if (!disassembler.IsValid()) { BPLOG(INFO) << "Disassembling fault instruction failed."; return; @@ -1343,7 +1357,7 @@ string MinidumpProcessor::GetCrashReason(Minidump* dump, uint64_t* address, // an attempt to read data, 1 if it was an attempt to write data, // and 8 if this was a data execution violation. // exception_information[2] contains the underlying NTSTATUS code, - // which is the explanation for why this error occured. + // which is the explanation for why this error occurred. // This information is useful in addition to the code address, which // will be present in the crash thread's instruction field anyway. if (raw_exception->exception_record.number_parameters >= 1) { diff --git a/src/processor/module_serializer.cc b/src/processor/module_serializer.cc index 05519958f..33130353a 100644 --- a/src/processor/module_serializer.cc +++ b/src/processor/module_serializer.cc @@ -38,11 +38,21 @@ #include "processor/module_serializer.h" +#include +#include #include #include +#include "common/scoped_ptr.h" +#include "google_breakpad/processor/basic_source_line_resolver.h" +#include "google_breakpad/processor/code_module.h" +#include "google_breakpad/processor/fast_source_line_resolver.h" #include "processor/basic_code_module.h" +#include "processor/linked_ptr.h" #include "processor/logging.h" +#include "processor/map_serializers.h" +#include "processor/simple_serializer.h" +#include "processor/windows_frame_info.h" namespace google_breakpad { @@ -78,7 +88,7 @@ size_t ModuleSerializer::SizeOf(const BasicSourceLineResolver::Module& module) { inline_origin_serializer_.SizeOf(module.inline_origins_); // Header size. - total_size_alloc_ += kNumberMaps_ * sizeof(uint32_t); + total_size_alloc_ += kNumberMaps_ * sizeof(uint64_t); for (int i = 0; i < kNumberMaps_; ++i) { total_size_alloc_ += map_sizes_[i]; @@ -95,8 +105,8 @@ char* ModuleSerializer::Write(const BasicSourceLineResolver::Module& module, // Write the is_corrupt flag. dest = SimpleSerializer::Write(module.is_corrupt_, dest); // Write header. - memcpy(dest, map_sizes_, kNumberMaps_ * sizeof(uint32_t)); - dest += kNumberMaps_ * sizeof(uint32_t); + memcpy(dest, map_sizes_, kNumberMaps_ * sizeof(uint64_t)); + dest += kNumberMaps_ * sizeof(uint64_t); // Write each map. dest = files_serializer_.Write(module.files_, dest); dest = functions_serializer_.Write(module.functions_, dest); @@ -161,7 +171,7 @@ bool ModuleSerializer::SerializeModuleAndLoadIntoFastResolver( // Copy the data into string. // Must pass string to LoadModuleUsingMapBuffer(), instead of passing char* to - // LoadModuleUsingMemoryBuffer(), becaused of data ownership/lifetime issue. + // LoadModuleUsingMemoryBuffer(), because of data ownership/lifetime issue. string symbol_data_string(symbol_data.get(), size); symbol_data.reset(); @@ -213,7 +223,7 @@ char* ModuleSerializer::SerializeSymbolFileData(const string& symbol_data, return NULL; } buffer.reset(NULL); - return Serialize(*(module.get()), size); + return Serialize(*module, size); } } // namespace google_breakpad diff --git a/src/processor/module_serializer.h b/src/processor/module_serializer.h index fd387cbbc..675d78488 100644 --- a/src/processor/module_serializer.h +++ b/src/processor/module_serializer.h @@ -36,6 +36,7 @@ #ifndef PROCESSOR_MODULE_SERIALIZER_H__ #define PROCESSOR_MODULE_SERIALIZER_H__ +#include #include #include @@ -110,7 +111,7 @@ class ModuleSerializer { FastSourceLineResolver::Module::kNumberMaps_; // Memory sizes required to serialize map components in Module. - uint32_t map_sizes_[kNumberMaps_]; + uint64_t map_sizes_[kNumberMaps_]; // Serializers for each individual map component in Module class. StdMapSerializer files_serializer_; diff --git a/src/processor/proc_maps_linux.cc b/src/processor/proc_maps_linux.cc index 6fcb909a1..5234633e6 100644 --- a/src/processor/proc_maps_linux.cc +++ b/src/processor/proc_maps_linux.cc @@ -2,9 +2,10 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +// For PRI* macros, before anything else might #include it. #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS -#endif +#endif /* __STDC_FORMAT_MACROS */ #ifdef HAVE_CONFIG_H #include // Must come first diff --git a/src/processor/process_state.cc b/src/processor/process_state.cc index c5c38b6cc..54459c653 100644 --- a/src/processor/process_state.cc +++ b/src/processor/process_state.cc @@ -54,6 +54,7 @@ void ProcessState::Clear() { crash_address_ = 0; assertion_.clear(); requesting_thread_ = -1; + original_thread_count_ = 0; for (vector::const_iterator iterator = threads_.begin(); iterator != threads_.end(); ++iterator) { diff --git a/src/processor/range_map-inl.h b/src/processor/range_map-inl.h index d1a194f31..e38fe50a7 100644 --- a/src/processor/range_map-inl.h +++ b/src/processor/range_map-inl.h @@ -250,7 +250,7 @@ bool RangeMap::RetrieveNearestRange( template bool RangeMap::RetrieveRangeAtIndex( - int index, EntryType* entry, AddressType* entry_base, + int64_t index, EntryType* entry, AddressType* entry_base, AddressType* entry_delta, AddressType* entry_size) const { BPLOG_IF(ERROR, !entry) << "RangeMap::RetrieveRangeAtIndex requires |entry|"; assert(entry); @@ -263,7 +263,7 @@ bool RangeMap::RetrieveRangeAtIndex( // Walk through the map. Although it's ordered, it's not a vector, so it // can't be addressed directly by index. MapConstIterator iterator = map_.begin(); - for (int this_index = 0; this_index < index; ++this_index) + for (int64_t this_index = 0; this_index < index; ++this_index) ++iterator; *entry = iterator->second.entry(); @@ -279,8 +279,8 @@ bool RangeMap::RetrieveRangeAtIndex( template -int RangeMap::GetCount() const { - return static_cast(map_.size()); +int64_t RangeMap::GetCount() const { + return static_cast(map_.size()); } diff --git a/src/processor/range_map.h b/src/processor/range_map.h index 578bd1442..05c659758 100644 --- a/src/processor/range_map.h +++ b/src/processor/range_map.h @@ -40,6 +40,7 @@ #define PROCESSOR_RANGE_MAP_H__ +#include #include @@ -109,12 +110,12 @@ class RangeMap { // entry was shrunk down (original start address was increased by delta). // // RetrieveRangeAtIndex is not optimized for speedy operation. - bool RetrieveRangeAtIndex(int index, EntryType* entry, + bool RetrieveRangeAtIndex(int64_t index, EntryType* entry, AddressType* entry_base, AddressType* entry_delta, AddressType* entry_size) const; // Returns the number of ranges stored in the RangeMap. - int GetCount() const; + int64_t GetCount() const; // Empties the range map, restoring it to the state it was when it was // initially created. diff --git a/src/processor/range_map_unittest.cc b/src/processor/range_map_unittest.cc index 8735bb095..f8e3dfbe5 100644 --- a/src/processor/range_map_unittest.cc +++ b/src/processor/range_map_unittest.cc @@ -30,11 +30,16 @@ // // Author: Mark Mentovai +// For PRI* macros, before anything else might #include it. +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif /* __STDC_FORMAT_MACROS */ #ifdef HAVE_CONFIG_H #include // Must come first #endif +#include #include #include @@ -332,11 +337,11 @@ static bool RetrieveIndexTest(TestMap* range_map, int set) { return true; } -// Additional RetriveAtIndex test to expose the bug in RetrieveRangeAtIndex(). +// Additional RetrieveAtIndex test to expose the bug in RetrieveRangeAtIndex(). // Bug info: RetrieveRangeAtIndex() previously retrieves the high address of // entry, however, it is supposed to retrieve the base address of entry as // stated in the comment in range_map.h. -static bool RetriveAtIndexTest2() { +static bool RetrieveAtIndexTest2() { scoped_ptr range_map(new TestMap()); // Store ranges with base address = 2 * object_id: @@ -360,7 +365,7 @@ static bool RetriveAtIndexTest2() { int expected_base = 2 * object->id(); if (base != expected_base) { - fprintf(stderr, "FAILED: RetriveAtIndexTest2 index %d, " + fprintf(stderr, "FAILED: RetrieveAtIndexTest2 index %d, " "expected base %d, observed base %d", object_index, expected_base, base); return false; @@ -504,7 +509,7 @@ static bool RunTests() { // The RangeMap's own count of objects should also match. if (range_map->GetCount() != stored_count) { fprintf(stderr, "FAILED: stored object count doesn't match GetCount, " - "expected %d, observed %d\n", + "expected %d, observed %" PRId64 "\n", stored_count, range_map->GetCount()); return false; @@ -542,7 +547,7 @@ static bool RunTests() { } } - if (!RetriveAtIndexTest2()) { + if (!RetrieveAtIndexTest2()) { fprintf(stderr, "FAILED: did not pass RetrieveAtIndexTest2()\n"); return false; } diff --git a/src/processor/source_line_resolver_base.cc b/src/processor/source_line_resolver_base.cc index da9ff7624..351157cea 100644 --- a/src/processor/source_line_resolver_base.cc +++ b/src/processor/source_line_resolver_base.cc @@ -253,7 +253,6 @@ bool SourceLineResolverBase::LoadModuleUsingMemoryBuffer( // Returning false from here would be an indication that the symbols for // this module are missing which would be wrong. Intentionally fall through // and add the module to both the modules_ and the corrupt_modules_ lists. - assert(basic_module->IsCorrupt()); } modules_->insert(make_pair(module->code_file(), basic_module)); diff --git a/src/processor/stackwalker.cc b/src/processor/stackwalker.cc index 1ff6cf7cb..e7ef42d62 100644 --- a/src/processor/stackwalker.cc +++ b/src/processor/stackwalker.cc @@ -263,6 +263,8 @@ Stackwalker* Stackwalker::StackwalkerForCPU( int fp_register = -1; if (system_info->os_short == "ios") fp_register = MD_CONTEXT_ARM_REG_IOS_FP; + else + fp_register = MD_CONTEXT_ARM_REG_FP; cpu_stackwalker = new StackwalkerARM(system_info, context->GetContextARM(), fp_register, memory, modules, diff --git a/src/processor/stackwalker_amd64.cc b/src/processor/stackwalker_amd64.cc index b934e73b4..f3d7f119e 100644 --- a/src/processor/stackwalker_amd64.cc +++ b/src/processor/stackwalker_amd64.cc @@ -147,7 +147,6 @@ StackFrameAMD64* StackwalkerAMD64::GetCallerByCFIFrameInfo( return NULL; if (!frame->context.rip || !frame->context.rsp) { - BPLOG(ERROR) << "invalid rip/rsp"; return NULL; } diff --git a/src/processor/stackwalker_arm64.cc b/src/processor/stackwalker_arm64.cc index 0fa02e04f..6c3168ecc 100644 --- a/src/processor/stackwalker_arm64.cc +++ b/src/processor/stackwalker_arm64.cc @@ -36,6 +36,7 @@ #include // Must come first #endif +#include #include #include "common/scoped_ptr.h" @@ -269,11 +270,15 @@ void StackwalkerARM64::CorrectRegLRByFramePointer( // Searching for a real callee frame. Skipping inline frames since they // don't contain context (and cannot be downcasted to StackFrameARM64). - size_t last_frame_callee_id = frames.size() - 2; + int64_t last_frame_callee_id = frames.size() - 2; while (last_frame_callee_id >= 0 && frames[last_frame_callee_id]->trust == StackFrame::FRAME_TRUST_INLINE) { last_frame_callee_id--; } + // last_frame_callee_id should not become negative because at the top of the + // stack trace we always have a context frame (FRAME_TRUST_CONTEXT) so the + // above loop should end before last_frame_callee_id gets negative. But we are + // being extra defensive here and bail if it ever becomes negative. if (last_frame_callee_id < 0) return; StackFrameARM64* last_frame_callee = static_cast(frames[last_frame_callee_id]); @@ -284,8 +289,6 @@ void StackwalkerARM64::CorrectRegLRByFramePointer( uint64_t last_fp = 0; if (last_frame_callee_fp && !memory_->GetMemoryAtAddress(last_frame_callee_fp, &last_fp)) { - BPLOG(ERROR) << "Unable to read last_fp from last_last_fp: 0x" << std::hex - << last_frame_callee_fp; return; } // Give up if STACK CFI doesn't agree with frame pointer. diff --git a/src/processor/static_address_map_unittest.cc b/src/processor/static_address_map_unittest.cc index aebab976c..2e8461675 100644 --- a/src/processor/static_address_map_unittest.cc +++ b/src/processor/static_address_map_unittest.cc @@ -124,7 +124,7 @@ class TestStaticAddressMap : public ::testing::Test { srand(time(0)); for (int data_item = 0; data_item < testsize[testcase]; ++data_item) { - // Retrive (aka, search) for target address and compare results from + // Retrieve (aka, search) for target address and compare results from // AddressMap and StaticAddressMap. // First, assign the search target to be one of original testdata that is diff --git a/src/processor/static_contained_range_map-inl.h b/src/processor/static_contained_range_map-inl.h index 60606ddc6..589bce4bf 100644 --- a/src/processor/static_contained_range_map-inl.h +++ b/src/processor/static_contained_range_map-inl.h @@ -45,7 +45,7 @@ template StaticContainedRangeMap::StaticContainedRangeMap( const char *base) : base_(*(reinterpret_cast(base))), - entry_size_(*(reinterpret_cast(base + sizeof(base_)))), + entry_size_(*(reinterpret_cast(base + sizeof(base_)))), entry_ptr_(reinterpret_cast( base + sizeof(base_) + sizeof(entry_size_))), map_(base + sizeof(base_) + sizeof(entry_size_) + entry_size_) { diff --git a/src/processor/static_contained_range_map.h b/src/processor/static_contained_range_map.h index 86e54666d..eea03db72 100644 --- a/src/processor/static_contained_range_map.h +++ b/src/processor/static_contained_range_map.h @@ -86,7 +86,7 @@ class StaticContainedRangeMap { // actually contain an entry, so its entry_ field is meaningless. For // this reason, the entry_ field should only be accessed on child // ContainedRangeMap objects, and never on |this|. - uint32_t entry_size_; + uint64_t entry_size_; const EntryType *entry_ptr_; // The map containing child ranges, keyed by each child range's high diff --git a/src/processor/static_contained_range_map_unittest.cc b/src/processor/static_contained_range_map_unittest.cc index d0507a4b1..c6d78400d 100644 --- a/src/processor/static_contained_range_map_unittest.cc +++ b/src/processor/static_contained_range_map_unittest.cc @@ -31,6 +31,7 @@ // // Author: Siyang Xie (lambxsy@google.com) +#include #ifdef HAVE_CONFIG_H #include // Must come first #endif @@ -161,7 +162,7 @@ class TestStaticCRMMap : public ::testing::Test { protected: void SetUp(); - // A referrence map for testing StaticCRMMap. + // A reference map for testing StaticCRMMap. google_breakpad::ContainedRangeMap crm_map_; // Static version of crm_map using serialized data of crm_map. @@ -177,7 +178,7 @@ class TestStaticCRMMap : public ::testing::Test { void TestStaticCRMMap::SetUp() { // First, do the StoreRange tests. This validates the containment // rules. - // We confirm the referrence map correctly stores data during setup. + // We confirm the reference map correctly stores data during setup. ASSERT_TRUE (crm_map_.StoreRange(10, 10, 1)); ASSERT_FALSE(crm_map_.StoreRange(10, 10, 2)); // exactly equal to 1 ASSERT_FALSE(crm_map_.StoreRange(11, 10, 3)); // begins inside 1 and extends up @@ -228,7 +229,7 @@ void TestStaticCRMMap::SetUp() { ASSERT_FALSE(crm_map_.StoreRange(86, 2, 48)); // Serialize crm_map to generate serialized data. - unsigned int size; + uint64_t size; serialized_data_.reset(serializer_.Serialize(&crm_map_, &size)); BPLOG(INFO) << "Serialized data size: " << size << " Bytes."; @@ -239,12 +240,12 @@ void TestStaticCRMMap::SetUp() { TEST_F(TestStaticCRMMap, TestEmptyMap) { CRMMap empty_crm_map; - unsigned int size; + uint64_t size; scoped_array serialized_data; serialized_data.reset(serializer_.Serialize(&empty_crm_map, &size)); scoped_ptr test_map(new TestMap(serialized_data.get())); - const unsigned int kCorrectSizeForEmptyMap = 16; + const unsigned int kCorrectSizeForEmptyMap = 24; ASSERT_EQ(kCorrectSizeForEmptyMap, size); const int *entry_test; @@ -259,12 +260,12 @@ TEST_F(TestStaticCRMMap, TestSingleElementMap) { int entry = 1; crm_map.StoreRange(10, 10, entry); - unsigned int size; + uint64_t size; scoped_array serialized_data; serialized_data.reset(serializer_.Serialize(&crm_map, &size)); scoped_ptr test_map(new TestMap(serialized_data.get())); - const unsigned int kCorrectSizeForSingleElementMap = 40; + const unsigned int kCorrectSizeForSingleElementMap = 60; ASSERT_EQ(kCorrectSizeForSingleElementMap, size); const int *entry_test; @@ -283,7 +284,7 @@ TEST_F(TestStaticCRMMap, TestRetrieveRangeEntries) { crm_map.StoreRange(2, 6, 1); crm_map.StoreRange(2, 7, 2); - unsigned int size; + uint64_t size; scoped_array serialized_data; serialized_data.reset(serializer_.Serialize(&crm_map, &size)); scoped_ptr test_map(new TestMap(serialized_data.get())); diff --git a/src/processor/static_map-inl.h b/src/processor/static_map-inl.h index f9929efe9..2e827b9f7 100644 --- a/src/processor/static_map-inl.h +++ b/src/processor/static_map-inl.h @@ -47,23 +47,23 @@ StaticMap::StaticMap(const char* raw_data) : raw_data_(raw_data), compare_() { // First 4 Bytes store the number of nodes. - num_nodes_ = *(reinterpret_cast(raw_data_)); + num_nodes_ = *(reinterpret_cast(raw_data_)); - offsets_ = reinterpret_cast( + offsets_ = reinterpret_cast( raw_data_ + sizeof(num_nodes_)); keys_ = reinterpret_cast( - raw_data_ + (1 + num_nodes_) * sizeof(uint32_t)); + raw_data_ + (1 + num_nodes_) * sizeof(uint64_t)); } // find(), lower_bound() and upper_bound() implement binary search algorithm. template StaticMapIterator StaticMap::find(const Key& key) const { - int begin = 0; - int end = num_nodes_; - int middle; - int compare_result; + int64_t begin = 0; + int64_t end = num_nodes_; + int64_t middle; + int64_t compare_result; while (begin < end) { middle = begin + (end - begin) / 2; compare_result = compare_(key, GetKeyAtIndex(middle)); @@ -81,10 +81,10 @@ StaticMap::find(const Key& key) const { template StaticMapIterator StaticMap::lower_bound(const Key& key) const { - int begin = 0; - int end = num_nodes_; - int middle; - int comp_result; + int64_t begin = 0; + int64_t end = num_nodes_; + int64_t middle; + int64_t comp_result; while (begin < end) { middle = begin + (end - begin) / 2; comp_result = compare_(key, GetKeyAtIndex(middle)); @@ -102,10 +102,10 @@ StaticMap::lower_bound(const Key& key) const { template StaticMapIterator StaticMap::upper_bound(const Key& key) const { - int begin = 0; - int end = num_nodes_; - int middle; - int compare_result; + int64_t begin = 0; + int64_t end = num_nodes_; + int64_t middle; + int64_t compare_result; while (begin < end) { middle = begin + (end - begin) / 2; compare_result = compare_(key, GetKeyAtIndex(middle)); @@ -124,22 +124,22 @@ template bool StaticMap::ValidateInMemoryStructure() const { // check the number of nodes is non-negative: if (!raw_data_) return false; - int32_t num_nodes = *(reinterpret_cast(raw_data_)); + int64_t num_nodes = *(reinterpret_cast(raw_data_)); if (num_nodes < 0) { BPLOG(INFO) << "StaticMap check failed: negative number of nodes"; return false; } - int node_index = 0; + int64_t node_index = 0; if (num_nodes_) { - uint64_t first_offset = sizeof(int32_t) * (num_nodes_ + 1) + uint64_t first_offset = sizeof(int64_t) * (num_nodes_ + 1) + sizeof(Key) * num_nodes_; // Num_nodes_ is too large. if (first_offset > 0xffffffffUL) { BPLOG(INFO) << "StaticMap check failed: size exceeds limit"; return false; } - if (offsets_[node_index] != static_cast(first_offset)) { + if (offsets_[node_index] != static_cast(first_offset)) { BPLOG(INFO) << "StaticMap check failed: first node offset is incorrect"; return false; } @@ -162,7 +162,7 @@ bool StaticMap::ValidateInMemoryStructure() const { } template -const Key StaticMap::GetKeyAtIndex(int index) const { +const Key StaticMap::GetKeyAtIndex(int64_t index) const { if (index < 0 || index >= num_nodes_) { BPLOG(ERROR) << "Key index out of range error"; // Key type is required to be primitive type. Return 0 if index is invalid. diff --git a/src/processor/static_map.h b/src/processor/static_map.h index a8f495820..be2c7d55a 100644 --- a/src/processor/static_map.h +++ b/src/processor/static_map.h @@ -34,11 +34,11 @@ // // The chunk of memory should contain data with pre-defined pattern: // **************** header *************** -// uint32 (4 bytes): number of nodes -// uint32 (4 bytes): address offset of node1's mapped_value -// uint32 (4 bytes): address offset of node2's mapped_value +// int64 (8 bytes): number of nodes +// uint64 (8 bytes): address offset of node1's mapped_value +// uint64 (8 bytes): address offset of node2's mapped_value // ... -// uint32 (4 bytes): address offset of nodeN's mapped_value +// uint64 (8 bytes): address offset of nodeN's mapped_value // // ************* Key array ************ // (X bytes): node1's key @@ -54,9 +54,6 @@ // // REQUIREMENT: Key type MUST be primitive type or pointers so that: // X = sizeof(typename Key); -// -// Note: since address offset is stored as uint32, user should keep in mind that -// StaticMap only supports up to 4GB size of memory data. // Author: Siyang Xie (lambxsy@google.com) @@ -72,7 +69,7 @@ namespace google_breakpad { template class DefaultCompare { public: - int operator()(const Key& k1, const Key& k2) const { + int64_t operator()(const Key& k1, const Key& k2) const { if (k1 < k2) return -1; if (k1 == k2) return 0; return 1; @@ -93,13 +90,13 @@ class StaticMap { explicit StaticMap(const char* raw_data); inline bool empty() const { return num_nodes_ == 0; } - inline unsigned int size() const { return num_nodes_; } + inline uint64_t size() const { return num_nodes_; } // Return iterators. inline iterator begin() const { return IteratorAtIndex(0); } inline iterator last() const { return IteratorAtIndex(num_nodes_ - 1); } inline iterator end() const { return IteratorAtIndex(num_nodes_); } - inline iterator IteratorAtIndex(int index) const { + inline iterator IteratorAtIndex(int64_t index) const { return iterator(raw_data_, index); } @@ -120,18 +117,18 @@ class StaticMap { bool ValidateInMemoryStructure() const; private: - const Key GetKeyAtIndex(int i) const; + const Key GetKeyAtIndex(int64_t i) const; // Start address of a raw memory chunk with serialized data. const char* raw_data_; // Number of nodes in the static map. - int32_t num_nodes_; + int64_t num_nodes_; // Array of offset addresses for stored values. // For example: // address_of_i-th_node_value = raw_data_ + offsets_[i] - const uint32_t* offsets_; + const uint64_t* offsets_; // keys_[i] = key of i_th node const Key* keys_; diff --git a/src/processor/static_map_iterator-inl.h b/src/processor/static_map_iterator-inl.h index 01a1b7f7b..e24a70963 100644 --- a/src/processor/static_map_iterator-inl.h +++ b/src/processor/static_map_iterator-inl.h @@ -43,12 +43,12 @@ namespace google_breakpad { template StaticMapIterator::StaticMapIterator(const char* base, - const int& index): + int64_t index): index_(index), base_(base) { // See static_map.h for documentation on // bytes format of serialized StaticMap data. - num_nodes_ = *(reinterpret_cast(base_)); - offsets_ = reinterpret_cast(base_ + sizeof(num_nodes_)); + num_nodes_ = *(reinterpret_cast(base_)); + offsets_ = reinterpret_cast(base_ + sizeof(num_nodes_)); keys_ = reinterpret_cast( base_ + (1 + num_nodes_) * sizeof(num_nodes_)); } diff --git a/src/processor/static_map_iterator.h b/src/processor/static_map_iterator.h index 6c190e975..8cfe9e080 100644 --- a/src/processor/static_map_iterator.h +++ b/src/processor/static_map_iterator.h @@ -87,21 +87,21 @@ class StaticMapIterator { friend class StaticMap; // Only StaticMap can call this constructor. - explicit StaticMapIterator(const char* base, const int32_t& index); + explicit StaticMapIterator(const char* base, int64_t index); // Index of node that the iterator is pointing to. - int32_t index_; + int64_t index_; // Beginning address of the serialized map data. const char* base_; // Number of nodes in the map. Use it to identify end() iterator. - int32_t num_nodes_; + int64_t num_nodes_; // offsets_ is an array of offset addresses of mapped values. // For example: // address_of_i-th_node_value = base_ + offsets_[i] - const uint32_t* offsets_; + const uint64_t* offsets_; // keys_[i] = key of i_th node. const Key* keys_; diff --git a/src/processor/static_map_unittest.cc b/src/processor/static_map_unittest.cc index 67b201b63..1fbb9a498 100644 --- a/src/processor/static_map_unittest.cc +++ b/src/processor/static_map_unittest.cc @@ -30,6 +30,7 @@ // // Author: Siyang Xie (lambxsy@google.com) +#include #ifdef HAVE_CONFIG_H #include // Must come first #endif @@ -52,8 +53,8 @@ class SimpleMapSerializer { static char* Serialize(const std::map& stdmap, unsigned int* size = NULL) { unsigned int size_per_node = - sizeof(uint32_t) + sizeof(Key) + sizeof(Value); - unsigned int memsize = sizeof(int32_t) + size_per_node * stdmap.size(); + sizeof(uint64_t) + sizeof(Key) + sizeof(Value); + unsigned int memsize = sizeof(int64_t) + size_per_node * stdmap.size(); if (size) *size = memsize; // Allocate memory for serialized data: @@ -61,12 +62,12 @@ class SimpleMapSerializer { char* address = mem; // Writer the number of nodes: - new (address) uint32_t(static_cast(stdmap.size())); - address += sizeof(uint32_t); + new (address) uint64_t(static_cast(stdmap.size())); + address += sizeof(uint64_t); // Nodes' offset: - uint32_t* offsets = reinterpret_cast(address); - address += sizeof(uint32_t) * stdmap.size(); + uint64_t* offsets = reinterpret_cast(address); + address += sizeof(uint64_t) * stdmap.size(); // Keys: Key* keys = reinterpret_cast(address); @@ -98,16 +99,16 @@ class TestInvalidMap : public ::testing::Test { }; TEST_F(TestInvalidMap, TestNegativeNumberNodes) { - memset(data, 0xff, sizeof(uint32_t)); // Set the number of nodes = -1 + memset(data, 0xff, sizeof(uint64_t)); // Set the number of nodes = -1 test_map = TestMap(data); ASSERT_FALSE(test_map.ValidateInMemoryStructure()); } TEST_F(TestInvalidMap, TestWrongOffsets) { - uint32_t* header = reinterpret_cast(data); - const uint32_t kNumNodes = 2; - const uint32_t kHeaderOffset = - sizeof(uint32_t) + kNumNodes * (sizeof(uint32_t) + sizeof(KeyType)); + uint64_t* header = reinterpret_cast(data); + const uint64_t kNumNodes = 2; + const uint64_t kHeaderOffset = + sizeof(uint64_t) + kNumNodes * (sizeof(uint64_t) + sizeof(KeyType)); header[0] = kNumNodes; header[1] = kHeaderOffset + 3; // Wrong offset for first node @@ -121,16 +122,16 @@ TEST_F(TestInvalidMap, TestWrongOffsets) { } TEST_F(TestInvalidMap, TestUnSortedKeys) { - uint32_t* header = reinterpret_cast(data); - const uint32_t kNumNodes = 2; - const uint32_t kHeaderOffset = - sizeof(uint32_t) + kNumNodes * (sizeof(uint32_t) + sizeof(KeyType)); + uint64_t* header = reinterpret_cast(data); + const uint64_t kNumNodes = 2; + const uint64_t kHeaderOffset = + sizeof(uint64_t) + kNumNodes * (sizeof(uint64_t) + sizeof(KeyType)); header[0] = kNumNodes; header[1] = kHeaderOffset; header[2] = kHeaderOffset + sizeof(ValueType); KeyType* keys = reinterpret_cast( - data + (kNumNodes + 1) * sizeof(uint32_t)); + data + (kNumNodes + 1) * sizeof(uint64_t)); // Set keys in non-increasing order. keys[0] = 10; keys[1] = 7; @@ -174,10 +175,10 @@ class TestValidMap : public ::testing::Test { // Set correct size of memory allocation for each test case. unsigned int size_per_node = - sizeof(uint32_t) + sizeof(KeyType) + sizeof(ValueType); + sizeof(uint64_t) + sizeof(KeyType) + sizeof(ValueType); for (testcase = 0; testcase < kNumberTestCases; ++testcase) { correct_size[testcase] = - sizeof(uint32_t) + std_map[testcase].size() * size_per_node; + sizeof(uint64_t) + std_map[testcase].size() * size_per_node; } } diff --git a/src/processor/static_range_map-inl.h b/src/processor/static_range_map-inl.h index b0a327479..3ffeec975 100644 --- a/src/processor/static_range_map-inl.h +++ b/src/processor/static_range_map-inl.h @@ -102,7 +102,7 @@ bool StaticRangeMap::RetrieveNearestRange( template bool StaticRangeMap::RetrieveRangeAtIndex( - int index, const EntryType*& entry, + int64_t index, const EntryType*& entry, AddressType* entry_base, AddressType* entry_size) const { if (index >= GetCount()) { diff --git a/src/processor/static_range_map.h b/src/processor/static_range_map.h index 319085db2..c63d2fa94 100644 --- a/src/processor/static_range_map.h +++ b/src/processor/static_range_map.h @@ -73,12 +73,12 @@ class StaticRangeMap { // range. // // RetrieveRangeAtIndex is not optimized for speedy operation. - bool RetrieveRangeAtIndex(int index, const EntryType*& entry, + bool RetrieveRangeAtIndex(int64_t index, const EntryType*& entry, AddressType* entry_base, AddressType* entry_size) const; // Returns the number of ranges stored in the RangeMap. - inline int GetCount() const { return map_.size(); } + inline int64_t GetCount() const { return map_.size(); } private: friend class ModuleComparer; diff --git a/src/third_party/libdisasm/ia32_modrm.c b/src/third_party/libdisasm/ia32_modrm.c index b0fe2ed3d..c80c1b95e 100644 --- a/src/third_party/libdisasm/ia32_modrm.c +++ b/src/third_party/libdisasm/ia32_modrm.c @@ -2,6 +2,9 @@ #include "ia32_reg.h" #include "x86_imm.h" +#include +#include + /* NOTE: when decoding ModR/M and SIB, we have to add 1 to all register * values obtained from decoding the ModR/M or SIB byte, since they * are encoded with eAX = 0 and the tables in ia32_reg.c use eAX = 1. @@ -72,16 +75,18 @@ static unsigned int imm32_signsized( unsigned char *buf, size_t buf_len, return 0; } + int16_t local_short; switch (size) { case 1: *dest = *((signed char *) buf); break; case 2: - *dest = *((signed short *) buf); + memcpy(&local_short, buf, 2); + *dest = local_short; break; case 4: default: - *dest = *((signed int *) buf); + memcpy(dest, buf, 4); break; } diff --git a/src/third_party/libdisasm/x86_imm.c b/src/third_party/libdisasm/x86_imm.c index cd59bfc9a..11c8a7de9 100644 --- a/src/third_party/libdisasm/x86_imm.c +++ b/src/third_party/libdisasm/x86_imm.c @@ -2,36 +2,15 @@ #include "x86_imm.h" #include +#include unsigned int x86_imm_signsized( unsigned char * buf, size_t buf_len, void *dest, unsigned int size ) { - signed char *cp = (signed char *) dest; - signed short *sp = (signed short *) dest; - int32_t *lp = (int32_t *) dest; - qword_t *qp = (qword_t *) dest; - if ( size > buf_len ) { return 0; } - /* Copy 'size' bytes from *buf to *op - * return number of bytes copied */ - switch (size) { - case 1: /* BYTE */ - *cp = *((signed char *) buf); - break; - case 2: /* WORD */ - *sp = *((signed short *) buf); - break; - case 6: - case 8: /* QWORD */ - *qp = *((qword_t *) buf); - break; - case 4: /* DWORD */ - default: - *lp = *((int32_t *) buf); - break; - } + memcpy(dest, buf, size); return (size); } diff --git a/src/tools/linux/dump_syms/dump_syms.cc b/src/tools/linux/dump_syms/dump_syms.cc index 2fce23c2a..007b46094 100644 --- a/src/tools/linux/dump_syms/dump_syms.cc +++ b/src/tools/linux/dump_syms/dump_syms.cc @@ -52,11 +52,14 @@ int usage(const char* self) { google_breakpad::BaseName(self).c_str()); fprintf(stderr, "Options:\n"); fprintf(stderr, " -i: Output module header information only.\n"); + fprintf(stderr, " -a Preserve the load address - Do not normalize " + "the load address to zero.\n"); fprintf(stderr, " -c Do not generate CFI section\n"); fprintf(stderr, " -d Generate INLINE/INLINE_ORIGIN records\n"); fprintf(stderr, " -r Do not handle inter-compilation " "unit references\n"); fprintf(stderr, " -v Print all warnings to stderr\n"); + fprintf(stderr, " -b Use specified id for the module id\n"); fprintf(stderr, " -n Use specified name for name of the object\n"); fprintf(stderr, " -o Use specified name for the " "operating system\n"); @@ -69,6 +72,7 @@ int usage(const char* self) { int main(int argc, char** argv) { if (argc < 2) return usage(argv[0]); + bool preserve_load_address = false; bool header_only = false; bool cfi = true; bool handle_inlines = false; @@ -76,12 +80,15 @@ int main(int argc, char** argv) { bool log_to_stderr = false; bool enable_multiple_field = false; std::string obj_name; + std::string module_id; const char* obj_os = "Linux"; int arg_index = 1; while (arg_index < argc && strlen(argv[arg_index]) > 0 && argv[arg_index][0] == '-') { if (strcmp("-i", argv[arg_index]) == 0) { header_only = true; + } else if (strcmp("-a", argv[arg_index]) == 0) { + preserve_load_address = true; } else if (strcmp("-c", argv[arg_index]) == 0) { cfi = false; } else if (strcmp("-d", argv[arg_index]) == 0) { @@ -90,6 +97,13 @@ int main(int argc, char** argv) { handle_inter_cu_refs = false; } else if (strcmp("-v", argv[arg_index]) == 0) { log_to_stderr = true; + } else if (strcmp("-b", argv[arg_index]) == 0) { + if (arg_index + 1 >= argc) { + fprintf(stderr, "Missing argument to -b\n"); + return usage(argv[0]); + } + module_id = argv[arg_index + 1]; + ++arg_index; } else if (strcmp("-n", argv[arg_index]) == 0) { if (arg_index + 1 >= argc) { fprintf(stderr, "Missing argument to -n\n"); @@ -135,7 +149,7 @@ int main(int argc, char** argv) { obj_name = binary; if (header_only) { - if (!WriteSymbolFileHeader(binary, obj_name, obj_os, std::cout)) { + if (!WriteSymbolFileHeader(binary, obj_name, obj_os, module_id, std::cout)) { fprintf(saved_stderr, "Failed to process file.\n"); return 1; } @@ -143,8 +157,8 @@ int main(int argc, char** argv) { SymbolData symbol_data = (handle_inlines ? INLINES : NO_DATA) | (cfi ? CFI : NO_DATA) | SYMBOLS_AND_FILES; google_breakpad::DumpOptions options(symbol_data, handle_inter_cu_refs, - enable_multiple_field); - if (!WriteSymbolFile(binary, obj_name, obj_os, debug_dirs, options, + enable_multiple_field, preserve_load_address); + if (!WriteSymbolFile(binary, obj_name, obj_os, module_id, debug_dirs, options, std::cout)) { fprintf(saved_stderr, "Failed to write symbol file.\n"); return 1; diff --git a/src/tools/linux/md2core/minidump-2-core.cc b/src/tools/linux/md2core/minidump-2-core.cc index 3e310bc7d..29560ed18 100644 --- a/src/tools/linux/md2core/minidump-2-core.cc +++ b/src/tools/linux/md2core/minidump-2-core.cc @@ -349,6 +349,34 @@ struct CrashedProcess { std::vector link_map; }; +/* NT_FILE note as defined by linux kernel in fs/binfmt_elf.c + * is structured as: + * long count -- how many files are mapped + * long page_size -- units for file_ofs + * array of [COUNT] elements of + * long start + * long end + * long file_ofs + * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... + * we can re-use the file mappings info + */ +struct NtFileNote { + NtFileNote() + // XXX: we really should source page size from the minidump itself but + // I cannot find anywhere in the minidump generation code where this + // would be stashed. + : page_sz((unsigned long)getpagesize()), + filename_count(0), + filenames_length(0) { + } + + unsigned long page_sz; + unsigned long filename_count; + std::vector file_mappings; + std::vector filenames; + size_t filenames_length; +}; + #if defined(__i386__) static uint32_t U32(const uint8_t* data) { @@ -1298,6 +1326,8 @@ AugmentMappings(const Options& options, CrashedProcess* crashinfo, } AddDataToMapping(crashinfo, crashinfo->dynamic_data, (uintptr_t)crashinfo->debug.dynamic); + } else { + fprintf(stderr, "dynamic data empty\n"); } } @@ -1417,19 +1447,40 @@ main(int argc, const char* argv[]) { if (!writea(options.out_fd, &ehdr, sizeof(Ehdr))) return 1; + struct NtFileNote nt_file; + for (auto iter = crashinfo.mappings.begin(); + iter != crashinfo.mappings.end(); iter++) { + if (iter->second.filename.empty()) + continue; + nt_file.file_mappings.push_back(iter->second.start_address); + nt_file.file_mappings.push_back(iter->second.end_address); + nt_file.file_mappings.push_back(iter->second.offset); + nt_file.filenames.push_back(iter->second.filename); + nt_file.filenames_length += iter->second.filename.length() + 1; + nt_file.filename_count += 1; + } + // implementation of NT_FILE note seems to pad alignment by 4 bytes but + // keep the header size the true size of the note. so we keep nt_file_align + // as separate field. + size_t nt_file_data_sz = (2 * sizeof(unsigned long)) + + (nt_file.file_mappings.size() * sizeof(unsigned long)) + + nt_file.filenames_length; + size_t nt_file_align = nt_file_data_sz % 4 == 0 ? 0 : 4 - + (nt_file_data_sz % 4); size_t offset = sizeof(Ehdr) + ehdr.e_phnum * sizeof(Phdr); size_t filesz = sizeof(Nhdr) + 8 + sizeof(prpsinfo) + - // sizeof(Nhdr) + 8 + sizeof(user) + - sizeof(Nhdr) + 8 + crashinfo.auxv_length + - crashinfo.threads.size() * ( - (sizeof(Nhdr) + 8 + sizeof(prstatus)) + // sizeof(Nhdr) + 8 + sizeof(user) + + sizeof(Nhdr) + 8 + crashinfo.auxv_length + + sizeof(Nhdr) + 8 + nt_file_data_sz + nt_file_align + + crashinfo.threads.size() * ( + (sizeof(Nhdr) + 8 + sizeof(prstatus)) #if defined(__i386__) || defined(__x86_64__) - + sizeof(Nhdr) + 8 + sizeof(user_fpregs_struct) + + sizeof(Nhdr) + 8 + sizeof(user_fpregs_struct) #endif #if defined(__i386__) - + sizeof(Nhdr) + 8 + sizeof(user_fpxregs_struct) + + sizeof(Nhdr) + 8 + sizeof(user_fpxregs_struct) #endif - ); + ); Phdr phdr; memset(&phdr, 0, sizeof(Phdr)); @@ -1492,6 +1543,28 @@ main(int argc, const char* argv[]) { return 1; } + nhdr.n_descsz = nt_file_data_sz; + nhdr.n_type = NT_FILE; + if (!writea(options.out_fd, &nhdr, sizeof(nhdr)) || + !writea(options.out_fd, "CORE\0\0\0\0", 8) || + !writea(options.out_fd, + &nt_file.filename_count, sizeof(nt_file.filename_count)) || + !writea(options.out_fd, &nt_file.page_sz, sizeof(nt_file.page_sz))) { + return 1; + } + for (auto iter = nt_file.file_mappings.begin(); + iter != nt_file.file_mappings.end(); iter++) { + if (!writea(options.out_fd, &*iter, sizeof(*iter))) + return 1; + } + for (auto iter = nt_file.filenames.begin(); + iter != nt_file.filenames.end(); iter++) { + if (!writea(options.out_fd, iter->c_str(), iter->length() + 1)) + return 1; + } + if (!writea(options.out_fd, "\0\0\0\0", nt_file_align)) + return 1; + for (const auto& current_thread : crashinfo.threads) { if (current_thread.tid == crashinfo.exception.tid) { // Use the exception record's context for the crashed thread instead of diff --git a/src/tools/mac/dump_syms/dump_syms_tool.cc b/src/tools/mac/dump_syms/dump_syms_tool.cc index ded4c10e5..cd8e565d7 100644 --- a/src/tools/mac/dump_syms/dump_syms_tool.cc +++ b/src/tools/mac/dump_syms/dump_syms_tool.cc @@ -55,29 +55,19 @@ using google_breakpad::scoped_ptr; using std::vector; struct Options { - Options() - : srcPath(), - dsymPath(), - arch(), - header_only(false), - cfi(true), - handle_inter_cu_refs(true), - handle_inlines(false), - enable_multiple(false), - module_name(), - prefer_extern_name(false) {} + Options() = default; string srcPath; string dsymPath; std::optional arch; - bool header_only; - bool cfi; - bool handle_inter_cu_refs; - bool handle_inlines; - bool enable_multiple; + bool header_only = false; + bool cfi = true; + bool handle_inter_cu_refs = true; + bool handle_inlines = false; + bool enable_multiple = false; string module_name; - bool prefer_extern_name; - bool report_warnings; + bool prefer_extern_name = false; + bool report_warnings = false; }; static bool StackFrameEntryComparator(const Module::StackFrameEntry* a, diff --git a/src/tools/mac/upload_system_symbols/extract.go b/src/tools/mac/upload_system_symbols/extract.go new file mode 100644 index 000000000..f991e3aa8 --- /dev/null +++ b/src/tools/mac/upload_system_symbols/extract.go @@ -0,0 +1,515 @@ +package main + +// #cgo LDFLAGS: -lParallelCompression +// #include +// #include +// #include +// +// typedef struct +// { +// int64_t unknown1; +// int64_t unknown2; +// char *input; +// char *output; +// char *patch; +// uint32_t not_cryptex_cache; +// uint32_t threads; +// uint32_t verbose; +// } RawImage; +// +// extern int32_t RawImagePatch(RawImage *) __attribute__((weak)); +import "C" + +import ( + "archive/zip" + "bytes" + "errors" + "fmt" + "io" + "log" + "os" + "os/exec" + "path" + "path/filepath" + "strconv" + "strings" + "unsafe" +) + +type ArchiveFormat int + +const ( + IPSW ArchiveFormat = iota + Installer +) + +// ExtractCaches extracts any dyld shared caches from `archive` to `destination`. +func ExtractCaches(format ArchiveFormat, archive string, destination string, verbose bool) error { + opts := ExtractorOptions{Verbose: true, Source: archive, Destination: destination} + var e *Extractor + switch format { + case IPSW: + e = NewIPSWExtractor(opts) + case Installer: + e = NewInstallAssistantExtractor(opts) + default: + return fmt.Errorf("unknown format %v", format) + } + return e.Extract() +} + +// NewIPSWExtractor returns an `Extractor` that can handle `.ipsw` files. +func NewIPSWExtractor(opts ExtractorOptions) *Extractor { + ie := &ipswExtractor{} + ie.Extractor = &Extractor{opts: opts, impl: ie} + return ie.Extractor +} + +// NewInstallAssistantExtractor returns an extractor that can handle Apple installers. +func NewInstallAssistantExtractor(opts ExtractorOptions) *Extractor { + ie := &installAssistantExtractor{} + ie.Extractor = &Extractor{opts: opts, impl: ie} + return ie.Extractor +} + +// ExtractorOptions are provided to an extractor to specify source file, destination, +// and whether verbose logging should be used. +type ExtractorOptions struct { + Verbose bool + Source string + Destination string +} + +// Extractor encapsulates the process of extracting dyld shared caches from an IPSW or installer. +type Extractor struct { + opts ExtractorOptions + impl extractorImpl + + scratchDir string + + // If non-empty, the path at which the DMG was mounted. This will + // be un-mounted at the end of Extract(). + dmgMountPaths []string +} + +// Extract extracts any dyld shared caches in `opts.Source` to `opts.Destination`. +func (e *Extractor) Extract() error { + scratchDir, err := os.MkdirTemp("", "extracted_system") + if err != nil { + return fmt.Errorf("couldn't create scratch directory to extract: %v", err) + } + defer os.RemoveAll(scratchDir) + e.scratchDir = scratchDir + + err = e.impl.doExtract() + + for _, path := range e.dmgMountPaths { + unmountErr := unmountDMG(path) + if unmountErr != nil { + err = errors.Join(err, unmountErr) + } + } + + return err +} + +// vlog logs if `opts.Verbose` is set and is a no-op otherwise. +func (e *Extractor) vlog(format string, args ...interface{}) { + if e.opts.Verbose { + fmt.Printf(format+"\n", args...) + } +} + +// mountDMG mounts the disk image at `dmgPath` to mountpoint and tracks it so that +// it can be unmounted by the end of `Extract` +func (e *Extractor) mountDMG(dmgPath string, mountpoint string) error { + cmd := exec.Command("hdiutil", "attach", dmgPath, "-mountpoint", mountpoint, "-quiet", "-nobrowse", "-readonly") + err := cmd.Run() + if err == nil { + e.dmgMountPaths = append(e.dmgMountPaths, mountpoint) + } + return err +} + +// extractorImpl is a private interface implemented by the backend +// extractors. +type extractorImpl interface { + doExtract() error +} + +// ipswExtractor extracts IPSWs +type ipswExtractor struct { + *Extractor +} + +// doExtract extracts dyld shared caches from an IPSW. +// It: +// Extracts the system disk image from the IPSW and mounts it. +// Copies any dyld shared caches from /System/Library/dyld on the mounted +// image to `opts.Destination`. +func (e *ipswExtractor) doExtract() error { + e.vlog("Extracting and mounting system disk:\n") + system, err := e.mountSystemDMG(e.opts.Source) + if err != nil { + return fmt.Errorf("couldn't mount system DMG: %v", err) + } + e.vlog("System mounted at %v\n", system) + e.vlog("Extracting shared caches:\n") + cachesPath := path.Join(system, "System/Library/dyld") + if !pathExists(cachesPath) { + return errors.New("couldn't find /System/Library/dyld") + } + + caches, err := filepath.Glob(path.Join(cachesPath, "dyld_shared_cache*")) + if err != nil { + // "The only possible returned error is ErrBadPattern" so treat + // this like a programmer error. + log.Fatalf("Failed to glob %v", path.Join(cachesPath, "dyld_shared_cache*")) + } + + for _, cache := range caches { + src, err := os.Open(cache) + if err != nil { + return err + } + defer src.Close() + filename := path.Base(cache) + dst, err := os.Create(path.Join(e.opts.Destination, filename)) + if err != nil { + return err + } + defer dst.Close() + e.vlog("Extracted %v\n", filename) + if _, err := io.Copy(dst, src); err != nil { + return err + } + } + + return nil +} + +// mountSystemDMG finds the name of the system image disk in the build manifest inside the +// IPSW at `ipswPath`, mounts it inside `e.scratchDir` and returns the mountpoint. +func (e *ipswExtractor) mountSystemDMG(ipswPath string) (string, error) { + r, err := zip.OpenReader(ipswPath) + if err != nil { + return "", fmt.Errorf("couldn't open ipsw at %s: %v", ipswPath, err) + } + defer r.Close() + dmgPath := "" + for _, f := range r.File { + if f.Name == "BuildManifest.plist" { + manifest, err := os.Create(path.Join(e.scratchDir, f.Name)) + if err != nil { + return "", err + } + if err := extractFileToPath(f, path.Join(e.scratchDir, f.Name)); err != nil { + return "", err + } + path, err := e.getSystemDMGPath(manifest.Name()) + if err != nil { + return "", err + } + dmgPath = path + } + } + if dmgPath == "" { + return "", errors.New("couldn't find build manifest") + } + for _, f := range r.File { + if filepath.Base(f.Name) == dmgPath { + dmgPath := path.Join(e.scratchDir, f.Name) + if err := extractFileToPath(f, dmgPath); err != nil { + return "", err + } + dmgMountpoint := path.Join(e.scratchDir, "Root") + e.vlog("Mounting %v at %v\n", dmgPath, dmgMountpoint) + if err := e.mountDMG(dmgPath, dmgMountpoint); err != nil { + return "", err + } + return dmgMountpoint, nil + } + } + return "", fmt.Errorf("%v not present in %v", dmgPath, ipswPath) +} + +// getSystemDMGPath finds the system disk image inside a IPSW from the build manifest +// at `manifest`. +func (e *ipswExtractor) getSystemDMGPath(manifest string) (string, error) { + print_cmd := "print :BuildIdentities:1:Manifest:Cryptex1,SystemOS:Info:Path" + result, err := exec.Command("PlistBuddy", "-c", print_cmd, manifest).Output() + if err != nil { + return "", err + } + return strings.TrimSpace(string(result)), nil +} + +// installAssistantExtractor extracts Apple installers. +type installAssistantExtractor struct { + *Extractor +} + +// doExtract extracts dyld shared caches from an Apple installer. +// It: +// 1. Expands the installer to disk +// 2. Finds and mounts SharedSupport.dmg +// 3. Determines based on system version whether this installer contains cryptexes or not. +// 4. If cryptexes are present, extracts them to disk images, mounts them, then copies any shared caches +// to `opts.Destination`. Otherwise, finds any payload zips containing shared caches, extracts them, and +// copies the caches to `opts.Destination` +func (e *installAssistantExtractor) doExtract() error { + expandedPath := path.Join(e.scratchDir, "installer") + e.vlog("Expanding installer to %v\n", expandedPath) + if err := e.expandInstaller(e.opts.Source, expandedPath); err != nil { + return fmt.Errorf("expand installer: %v", err) + } + + dmgPath := path.Join(expandedPath, "SharedSupport.dmg") + if !pathExists(dmgPath) { + return fmt.Errorf("couldn't find SharedSupport.dmg at %v", dmgPath) + } + dmgMountpoint := path.Join(e.scratchDir, "shared_support") + e.vlog("Mounting %v at %v\n", dmgPath, dmgMountpoint) + if err := e.mountDMG(dmgPath, dmgMountpoint); err != nil { + return fmt.Errorf("mount %v: %v", dmgPath, err) + } + + zipsPath := path.Join(dmgMountpoint, "com_apple_MobileAsset_MacSoftwareUpdate") + if !pathExists(zipsPath) { + return fmt.Errorf("couldn't find com_apple_MobileAsset_MacSoftwareUpdate on SharedSupport.dmg") + } + hasCryptexes, err := e.hasCryptexes(path.Join(zipsPath, "com_apple_MobileAsset_MacSoftwareUpdate.xml")) + if err != nil { + return fmt.Errorf("couldn't determine system version: %v", err) + } + zips, err := filepath.Glob(path.Join(zipsPath, "*.zip")) + if err != nil { + // "The only possible returned error is ErrBadPattern" so treat + // this like a programmer error. + log.Fatalf("Failed to glob %v", path.Join(zipsPath, "*.zip")) + } + return e.extractCachesFromZips(zips, e.opts.Destination, hasCryptexes) +} + +// expandInstaller expands the installer at `installerPath` to `destinaton`. +func (e *installAssistantExtractor) expandInstaller(installerPath string, destination string) error { + return exec.Command("pkgutil", "--expand-full", installerPath, destination).Run() +} + +// hasCryptexes returns true if the installer containing the plist at `plistPath` is for +// macOS version 13 or higher, and accordingly stores dyld shared caches inside cryptexes. +func (e *installAssistantExtractor) hasCryptexes(plistPath string) (bool, error) { + print_cmd := "print :Assets:0:OSVersion" + result, err := exec.Command("PlistBuddy", "-c", print_cmd, plistPath).Output() + if err != nil { + return false, fmt.Errorf("couldn't read OS version from %s: %v", plistPath, err) + } + majorVersion := strings.Split(string(result), ".")[0] + if v, err := strconv.Atoi(majorVersion); err != nil { + return false, fmt.Errorf("couldn't parse major version %s:%v", majorVersion, err) + } else { + return v >= 13, nil + } +} + +// extractCachesFromZips extracts zips that contain dyld shared caches, and extracts the dyld shared caches from them. +// The specifics depend on whether this installer uses cryptexes or payload files. +func (e *installAssistantExtractor) extractCachesFromZips(zips []string, destination string, hasCryptexes bool) error { + containerPath := path.Join(e.scratchDir, "container") + if !hasCryptexes { + if err := e.unarchiveZipsMatching(zips, containerPath, "AssetData/payloadv2/payload.0??"); err != nil { + return err + } + if err := e.extractCachesFromPayloads(containerPath, destination); err != nil { + return fmt.Errorf("couldn't extract caches from %v: %v", containerPath, err) + } + } else { + if err := e.unarchiveZipsMatching(zips, containerPath, "AssetData/payloadv2/image_patches/cryptex-system-*"); err != nil { + return err + } + if err := e.extractCachesFromCryptexes(containerPath, destination); err != nil { + return fmt.Errorf("couldn't extract caches from %v: %v", containerPath, err) + } + } + return nil +} + +// unarchiveZipsMatching unarchives all files matching `glob` from the zip files in `zips` to destination. +func (e *installAssistantExtractor) unarchiveZipsMatching(zips []string, destination string, glob string) error { + for _, zipFile := range zips { + archive, err := zip.OpenReader(zipFile) + if err != nil { + return fmt.Errorf("couldn't read %v: %v", zipFile, err) + } + defer archive.Close() + e.vlog("Unarchiving %v\n", zipFile) + if err := e.unarchiveFilesMatching(archive, destination, glob); err != nil { + return fmt.Errorf("couldn't unarchive files matching %v from %v", glob, zipFile) + } + } + return nil +} + +// unarchiveFilesMatching unarchives all files matching `glob` from `r` to destination. +func (e *installAssistantExtractor) unarchiveFilesMatching(r *zip.ReadCloser, destination string, glob string) error { + if err := os.MkdirAll(destination, 0755); err != nil { + return err + } + for _, file := range r.File { + if file.FileInfo().IsDir() { + continue + } + if ok, err := path.Match(glob, file.Name); !ok || err != nil { + continue + } + _, filename := path.Split(file.Name) + writePath := path.Join(destination, filename) + if err := extractFileToPath(file, writePath); err != nil { + return err + } + } + return nil +} + +// extractCachesFromPayloads unarchives any files containing dyld shared caches from `payloadsPath` +// then copies any shared caches to `destination`. +func (e *installAssistantExtractor) extractCachesFromPayloads(payloadsPath string, destination string) error { + scratchDir, err := os.MkdirTemp(e.scratchDir, "payload") + if err != nil { + return err + } + files, err := os.ReadDir(payloadsPath) + if err != nil { + return err + } + for _, f := range files { + payload := path.Join(payloadsPath, f.Name()) + if e.payloadHasSharedCache(payload) { + e.vlog("Extracting %v\n", payload) + e.extractPayload(payload, scratchDir) + } + } + return e.copySharedCaches(scratchDir, destination) +} + +// payloadHasSharedCache returns true if the archive at `payloadPath` contains a dyld shared cache. +func (e *installAssistantExtractor) payloadHasSharedCache(payloadPath string) bool { + out, err := exec.Command("yaa", "list", "-i", payloadPath).Output() + return err == nil && bytes.Contains(out, []byte("/dyld_shared_cache")) +} + +// extractPayload extracts the apple archive at `payloadPath` to `destination`. +func (e *installAssistantExtractor) extractPayload(payloadPath string, destination string) error { + return exec.Command("yaa", "extract", "-i", payloadPath, "-d", destination).Run() +} + +// copySharedCaches copies the contents of `System/Library/dyld` in `from` to `to`. +func (e *installAssistantExtractor) copySharedCaches(from, to string) error { + dyldPath := path.Join(from, "System/Library/dyld") + if !pathExists(dyldPath) { + return fmt.Errorf("couldn't find System/Library/dyld in %s", dyldPath) + } + cacheFiles, err := os.ReadDir(dyldPath) + if err != nil { + return err + } + for _, cacheFile := range cacheFiles { + name := cacheFile.Name() + src := path.Join(dyldPath, name) + dst := path.Join(to, name) + e.vlog("Copying %v to %v\n", src, dst) + if err := copyFile(src, dst); err != nil { + return fmt.Errorf("couldn't copy %s to %s: %v", src, dst, err) + } + } + return nil +} + +// extractCachesFromCryptexes extracts disk images from any cryptexes at `cryptexesPath`, mounts them, +// and extracts any dyld shared caches to `destination`. +func (e *installAssistantExtractor) extractCachesFromCryptexes(cryptexesPath string, destination string) error { + scratchDir, err := os.MkdirTemp(e.scratchDir, "cryptex_dmg") + if err != nil { + return err + } + files, err := os.ReadDir(cryptexesPath) + if err != nil { + return err + } + for _, f := range files { + cryptexMountpoint := path.Join(scratchDir, "cryptex_"+f.Name()) + cryptex := path.Join(cryptexesPath, f.Name()) + dmgPath := path.Join(scratchDir, f.Name()+".dmg") + e.extractCryptexDMG(cryptex, dmgPath) + e.vlog("Mounting %s at %s\n", cryptex, dmgPath) + e.mountDMG(dmgPath, cryptexMountpoint) + if err := e.copySharedCaches(cryptexMountpoint, destination); err != nil { + return err + } + } + return nil +} + +// extractCryptexDMG extracts the cryptex at `cryptexPath` to `dmgPath` using libParallelCompression. +func (e *installAssistantExtractor) extractCryptexDMG(cryptexPath, dmgPath string) error { + inp := C.CString("") + defer C.free(unsafe.Pointer(inp)) + cryptex := C.CString(cryptexPath) + defer C.free(unsafe.Pointer(cryptex)) + result := C.CString(dmgPath) + defer C.free(unsafe.Pointer(result)) + + ri := C.RawImage{unknown1: 0, unknown2: 0, input: inp, output: result, patch: cryptex, not_cryptex_cache: 0, threads: 0, verbose: 1} + if exitCode := C.RawImagePatch(&ri); exitCode != 0 { + return fmt.Errorf("RawImagePatch failed with %d", exitCode) + } + return nil +} + +// pathExists returns true if `path` exists. +func pathExists(path string) bool { + _, err := os.Stat(path) + return !os.IsNotExist(err) +} + +// extractFileToPath extracts the contents of `f` to `path`. +func extractFileToPath(f *zip.File, path string) error { + w, err := os.Create(path) + if err != nil { + return err + } + defer w.Close() + reader, err := f.Open() + if err != nil { + return err + } + + if _, err := io.Copy(w, reader); err != nil { + return err + } + return nil +} + +// copyFile copies `src` to `dst`, which can be on different volumes. +func copyFile(src, dst string) error { + w, err := os.Create(dst) + if err != nil { + return err + } + defer w.Close() + + reader, err := os.Open(src) + if err != nil { + return err + } + defer reader.Close() + if _, err := io.Copy(w, reader); err != nil { + return err + } + return nil +} + +// unmountDMG unmounts the disk image at `mountpoint`. +func unmountDMG(mountpoint string) error { + return exec.Command("hdiutil", "detach", mountpoint).Run() +} diff --git a/src/tools/mac/upload_system_symbols/upload_system_symbols.go b/src/tools/mac/upload_system_symbols/upload_system_symbols.go index f34c288ae..b7b3cb556 100644 --- a/src/tools/mac/upload_system_symbols/upload_system_symbols.go +++ b/src/tools/mac/upload_system_symbols/upload_system_symbols.go @@ -46,11 +46,11 @@ import ( "flag" "fmt" "io" - "io/ioutil" "log" "os" "os/exec" "path" + "path/filepath" "regexp" "strings" "sync" @@ -61,9 +61,11 @@ var ( breakpadTools = flag.String("breakpad-tools", "out/Release/", "Path to the Breakpad tools directory, containing dump_syms and symupload.") uploadOnlyPath = flag.String("upload-from", "", "Upload a directory of symbol files that has been dumped independently.") dumpOnlyPath = flag.String("dump-to", "", "Dump the symbols to the specified directory, but do not upload them.") - systemRoot = flag.String("system-root", "", "Path to the root of the Mac OS X system whose symbols will be dumped.") + systemRoot = flag.String("system-root", "", "Path to the root of the macOS system whose symbols will be dumped. Mutually exclusive with --installer and --ipsw.") dumpArchitecture = flag.String("arch", "", "The CPU architecture for which symbols should be dumped. If not specified, dumps all architectures.") apiKey = flag.String("api-key", "", "API key to use. If this is present, the `sym-upload-v2` protocol is used.\nSee https://chromium.googlesource.com/breakpad/breakpad/+/HEAD/docs/sym_upload_v2_protocol.md or\n`symupload`'s help for more information.") + installer = flag.String("installer", "", "Path to macOS installer. Mutually exclusive with --system-root and --ipsw.") + ipsw = flag.String("ipsw", "", "Path to macOS IPSW. Mutually exclusive with --system-root and --installer.") ) var ( @@ -129,26 +131,13 @@ func main() { return } - if *systemRoot == "" { - log.Fatal("Need a -system-root to dump symbols for") - } - - if *dumpOnlyPath != "" { - // -dump-to specified, so make sure that the path is a directory. - if fi, err := os.Stat(*dumpOnlyPath); err != nil { - log.Fatalf("-dump-to location: %v", err) - } else if !fi.IsDir() { - log.Fatal("-dump-to location is not a directory") - } - } - dumpPath := *dumpOnlyPath if *dumpOnlyPath == "" { // If -dump-to was not specified, then run the upload pipeline and create // a temporary dump output directory. uq = StartUploadQueue() - if p, err := ioutil.TempDir("", "upload_system_symbols"); err != nil { + if p, err := os.MkdirTemp("", "upload_system_symbols"); err != nil { log.Fatalf("Failed to create temporary directory: %v", err) } else { dumpPath = p @@ -156,13 +145,61 @@ func main() { } } - dq := StartDumpQueue(*systemRoot, dumpPath, uq) + tempDir, err := os.MkdirTemp("", "systemRoots") + if err != nil { + log.Fatalf("Failed to create temporary directory: %v", err) + } + defer os.RemoveAll(tempDir) + roots := getSystemRoots(tempDir) + + if *dumpOnlyPath != "" { + // -dump-to specified, so make sure that the path is a directory. + if fi, err := os.Stat(*dumpOnlyPath); err != nil { + log.Fatalf("-dump-to location: %v", err) + } else if !fi.IsDir() { + log.Fatal("-dump-to location is not a directory") + } + } + + dq := StartDumpQueue(roots, dumpPath, uq) dq.Wait() if uq != nil { uq.Wait() } } +// getSystemRoots returns which system roots should be dumped from the parsed +// flags, extracting them if necessary. +func getSystemRoots(tempDir string) []string { + hasInstaller := len(*installer) > 0 + hasIPSW := len(*ipsw) > 0 + hasRoot := len(*systemRoot) > 0 + + if hasInstaller { + if hasIPSW || hasRoot { + log.Fatalf("--installer, --ipsw, and --system-root are mutually exclusive") + } + if rs, err := extractSystems(Installer, *installer, tempDir); err != nil { + log.Fatalf("Couldn't extract installer at %s: %v", *installer, err) + } else { + return rs + } + } else if hasIPSW { + if hasRoot { + log.Fatalf("--installer, --ipsw, and --system-root are mutually exclusive") + } + if rs, err := extractSystems(IPSW, *ipsw, tempDir); err != nil { + log.Fatalf("Couldn't extract IPSW at %s: %v", *ipsw, err) + } else { + return rs + } + } else if hasRoot { + return []string{*systemRoot} + } + log.Fatal("Need a --system-root, --installer, or --ipsw to dump symbols for") + return []string{} +} + // manglePath reduces an absolute filesystem path to a string suitable as the // base for a file name which encodes some of the original path. The result // concatenates the leading initial from each path component except the last to @@ -282,7 +319,7 @@ type dumpRequest struct { // StartDumpQueue creates a new worker pool to find all the Mach-O libraries in // root and dump their symbols to dumpPath. If an UploadQueue is passed, the // path to the symbol file will be enqueued there, too. -func StartDumpQueue(root, dumpPath string, uq *UploadQueue) *DumpQueue { +func StartDumpQueue(roots []string, dumpPath string, uq *UploadQueue) *DumpQueue { dq := &DumpQueue{ dumpPath: dumpPath, queue: make(chan dumpRequest), @@ -290,7 +327,7 @@ func StartDumpQueue(root, dumpPath string, uq *UploadQueue) *DumpQueue { } dq.WorkerPool = StartWorkerPool(12, dq.worker) - findLibsInRoot(root, dq) + findLibsInRoots(roots, dq) return dq } @@ -369,21 +406,22 @@ type findQueue struct { dq *DumpQueue } -// findLibsInRoot looks in all the pathsToScan in the root and manages the +// findLibsInRoot looks in all the pathsToScan in all roots and manages the // interaction between findQueue and DumpQueue. -func findLibsInRoot(root string, dq *DumpQueue) { +func findLibsInRoots(roots []string, dq *DumpQueue) { fq := &findQueue{ queue: make(chan string, 10), dq: dq, } fq.WorkerPool = StartWorkerPool(12, fq.worker) + for _, root := range roots { + for _, p := range pathsToScan { + fq.findLibsInPath(path.Join(root, p), true) + } - for _, p := range pathsToScan { - fq.findLibsInPath(path.Join(root, p), true) - } - - for _, p := range optionalPathsToScan { - fq.findLibsInPath(path.Join(root, p), false) + for _, p := range optionalPathsToScan { + fq.findLibsInPath(path.Join(root, p), false) + } } close(fq.queue) @@ -482,3 +520,45 @@ func (fq *findQueue) dumpMachOFile(fp string, image *macho.File) { fq.dq.DumpSymbols(fp, arch) } } + +// extractSystems extracts any dyld shared caches from `archive`, then extracts the caches +// into macOS system libraries, returning the locations on disk of any systems extracted. +func extractSystems(format ArchiveFormat, archive string, extractPath string) ([]string, error) { + cachesPath := path.Join(extractPath, "caches") + if err := os.MkdirAll(cachesPath, 0755); err != nil { + return nil, err + } + if err := ExtractCaches(format, archive, cachesPath, true); err != nil { + return nil, err + } + files, err := os.ReadDir(cachesPath) + if err != nil { + return nil, err + } + cachePrefix := "dyld_shared_cache_" + extractedDirPath := path.Join(extractPath, "extracted") + roots := make([]string, len(files)) + for _, file := range files { + fileName := file.Name() + if filepath.Ext(fileName) == "" && strings.HasPrefix(fileName, cachePrefix) { + arch := strings.TrimPrefix(fileName, cachePrefix) + extractedSystemPath := path.Join(extractedDirPath, arch) + // XXX: Maybe this shouldn't be fatal? + if err := extractDyldSharedCache(path.Join(cachesPath, fileName), extractedSystemPath); err != nil { + return nil, err + } + roots = append(roots, extractedSystemPath) + } + } + return roots, nil +} + +// extractDyldSharedCache extracts the dyld shared cache at `cachePath` to `destination`. +func extractDyldSharedCache(cachePath string, destination string) error { + dscExtractor := path.Join(*breakpadTools, "dsc_extractor") + cmd := exec.Command(dscExtractor, cachePath, destination) + if output, err := cmd.Output(); err != nil { + return fmt.Errorf("extracting shared cache at %s: %v. dsc_extractor said %v", cachePath, err, output) + } + return nil +} diff --git a/src/tools/windows/converter_exe/winsymconv.cmd b/src/tools/windows/converter_exe/winsymconv.cmd index bea84b589..ca1847847 100644 --- a/src/tools/windows/converter_exe/winsymconv.cmd +++ b/src/tools/windows/converter_exe/winsymconv.cmd @@ -39,6 +39,7 @@ google_converter.exe ^ -n https://download.amd.com/dir/bin ^ -n https://driver-symbols.nvidia.com ^ -n https://software.intel.com/sites/downloads/symbols ^ + -n https://www.nvaccess.org/files/nvda/symbols ^ -l %SYMBOL_DIR% ^ -s https://clients2.google.com/cr/staging_symbol ^ -m https://clients2.google.com/cr/staging_symbol/missingsymbols ^ @@ -58,6 +59,7 @@ google_converter.exe ^ -n https://download.amd.com/dir/bin ^ -n https://driver-symbols.nvidia.com ^ -n https://software.intel.com/sites/downloads/symbols ^ + -n https://www.nvaccess.org/files/nvda/symbols ^ -l %SYMBOL_DIR% ^ -s https://clients2.google.com/cr/symbol ^ -m https://clients2.google.com/cr/symbol/missingsymbols ^ diff --git a/src/tools/windows/converter_exe/winsymconv_test.cmd b/src/tools/windows/converter_exe/winsymconv_test.cmd index c17770660..448244d84 100644 --- a/src/tools/windows/converter_exe/winsymconv_test.cmd +++ b/src/tools/windows/converter_exe/winsymconv_test.cmd @@ -37,6 +37,7 @@ google_converter.exe ^ -n https://download.amd.com/dir/bin ^ -n https://driver-symbols.nvidia.com ^ -n https://software.intel.com/sites/downloads/symbols ^ + -n https://www.nvaccess.org/files/nvda/symbols ^ -l %SYMBOL_DIR% ^ -s https://clients2.google.com/cr/staging_symbol ^ -mf %SCRIPT_LOCATION%missing_symbols_test.txt ^ @@ -56,6 +57,7 @@ google_converter.exe ^ -n https://download.amd.com/dir/bin ^ -n https://driver-symbols.nvidia.com ^ -n https://software.intel.com/sites/downloads/symbols ^ + -n https://www.nvaccess.org/files/nvda/symbols ^ -l %SYMBOL_DIR% ^ -s https://clients2.google.com/cr/symbol ^ -mf %SCRIPT_LOCATION%missing_symbols_test.txt ^ diff --git a/src/tools/windows/symupload/symupload.cc b/src/tools/windows/symupload/symupload.cc index 46ace95ad..20f48a277 100644 --- a/src/tools/windows/symupload/symupload.cc +++ b/src/tools/windows/symupload/symupload.cc @@ -305,12 +305,12 @@ int wmain(int argc, wchar_t* argv[]) { _wunlink(symbol_file.c_str()); - if (success) { - wprintf(L"Uploaded breakpad symbols for windows-%s/%s/%s (%s %s)\n", - pdb_info.cpu.c_str(), pdb_info.debug_file.c_str(), - pdb_info.debug_identifier.c_str(), code_file.c_str(), - file_version.c_str()); - } + fwprintf(success ? stdout : stderr, + L"%S breakpad symbols for windows-%s/%s/%s (%s %s)\n", + success ? "Uploaded" : "Failed to upload", + pdb_info.cpu.c_str(), pdb_info.debug_file.c_str(), + pdb_info.debug_identifier.c_str(), code_file.c_str(), + file_version.c_str()); return success ? 0 : 1; }