From 22f54f197fe5be28abda6e00bdfaaed9e87f9851 Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Fri, 5 Jan 2024 12:29:29 -0500 Subject: [PATCH 01/37] [dump_syms] Handle DW_TAG_lexical_block to caputre inline info inside. Bug: b/317143556 Change-Id: Iba82712fedf7d126c2392cfc0f157ded2bca5219 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5171059 Reviewed-by: Ivan Penkov --- src/common/dwarf_cu_to_module.cc | 52 ++++++++++++++++++++++++++++++++ src/common/dwarf_cu_to_module.h | 1 + 2 files changed, 53 insertions(+) diff --git a/src/common/dwarf_cu_to_module.cc b/src/common/dwarf_cu_to_module.cc index 94a0d428e..613f97854 100644 --- a/src/common/dwarf_cu_to_module.cc +++ b/src/common/dwarf_cu_to_module.cc @@ -562,6 +562,29 @@ static bool IsEmptyRange(const vector& ranges) { return size == 0; } +// A handler for DW_TAG_lexical_block DIEs. +class DwarfCUToModule::LexicalBlockHandler : public GenericDIEHandler { + public: + LexicalBlockHandler(CUContext* cu_context, + uint64_t offset, + int inline_nest_level, + vector>& inlines) + : GenericDIEHandler(cu_context, nullptr, offset), + inline_nest_level_(inline_nest_level), + inlines_(inlines) {} + + DIEHandler* FindChildHandler(uint64_t offset, enum DwarfTag tag); + bool EndAttributes() { return true; } + void Finish(); + + private: + int inline_nest_level_; + // A vector of inlines in the same nest level. It's owned by its parent + // function/inline. At Finish(), add this inline into the vector. + vector>& inlines_; + // A vector of child inlines. + vector> child_inlines_; +}; // A handler for DW_TAG_inlined_subroutine DIEs. class DwarfCUToModule::InlineHandler : public GenericDIEHandler { @@ -645,6 +668,9 @@ DIEHandler* DwarfCUToModule::InlineHandler::FindChildHandler( case DW_TAG_inlined_subroutine: return new InlineHandler(cu_context_, nullptr, offset, inline_nest_level_ + 1, child_inlines_); + case DW_TAG_lexical_block: + return new LexicalBlockHandler(cu_context_, offset, + inline_nest_level_ + 1, child_inlines_); default: return NULL; } @@ -715,6 +741,29 @@ void DwarfCUToModule::InlineHandler::Finish() { inlines_.push_back(std::move(in)); } +DIEHandler* DwarfCUToModule::LexicalBlockHandler::FindChildHandler( + uint64_t offset, + enum DwarfTag tag) { + switch (tag) { + case DW_TAG_inlined_subroutine: + return new InlineHandler(cu_context_, nullptr, offset, inline_nest_level_, + child_inlines_); + case DW_TAG_lexical_block: + return new LexicalBlockHandler(cu_context_, offset, inline_nest_level_, + child_inlines_); + default: + return nullptr; + } +} + +void DwarfCUToModule::LexicalBlockHandler::Finish() { + // Insert child inlines inside the lexical block into the inline vector from + // parent as if the block does not exit. + inlines_.insert(inlines_.end(), + std::make_move_iterator(child_inlines_.begin()), + std::make_move_iterator(child_inlines_.end())); +} + // A handler for DIEs that contain functions and contribute a // component to their names: namespaces, classes, etc. class DwarfCUToModule::NamedScopeHandler: public GenericDIEHandler { @@ -831,6 +880,9 @@ DIEHandler* DwarfCUToModule::FuncHandler::FindChildHandler( case DW_TAG_union_type: return new NamedScopeHandler(cu_context_, &child_context_, offset, handle_inline_); + case DW_TAG_lexical_block: + if (handle_inline_) + return new LexicalBlockHandler(cu_context_, offset, 0, child_inlines_); default: return NULL; } diff --git a/src/common/dwarf_cu_to_module.h b/src/common/dwarf_cu_to_module.h index c7ada306c..d83108437 100644 --- a/src/common/dwarf_cu_to_module.h +++ b/src/common/dwarf_cu_to_module.h @@ -357,6 +357,7 @@ class DwarfCUToModule: public RootDIEHandler { struct Specification; class GenericDIEHandler; class FuncHandler; + class LexicalBlockHandler; class InlineHandler; class NamedScopeHandler; From 62ecd463583d09eb7d15b1d410055f30b2c7bcb4 Mon Sep 17 00:00:00 2001 From: Leonard Grey Date: Thu, 11 Jan 2024 11:05:43 -0500 Subject: [PATCH 02/37] Mac: teach upload_system_symbols to extract installers and IPSWs This change introduces two new flags: `--ipsw` and `--installer` which are mutually exclusive with `--system-root` and each other. Each takes a file path as an argument, which is expected to be an IPSW for `--ipsw` and an Apple installer for `--installer`. Calling `upload_system_symbols` with these arguments will cause it to find any dyld shared caches present inside the given IPSW or installer, extract them via `dsc_extractor` in `--breakpad-tools`, then behave as if it had been called with the resulting libraries as `--system-root`. Bug: chromium:1400770 Change-Id: I7f98e0c6ab069a2e960f12773d800d8a5a37221f Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5089008 Reviewed-by: Robert Sesek --- .../mac/upload_system_symbols/extract.go | 514 ++++++++++++++++++ .../upload_system_symbols.go | 134 ++++- 2 files changed, 621 insertions(+), 27 deletions(-) create mode 100644 src/tools/mac/upload_system_symbols/extract.go diff --git a/src/tools/mac/upload_system_symbols/extract.go b/src/tools/mac/upload_system_symbols/extract.go new file mode 100644 index 000000000..acd674ef3 --- /dev/null +++ b/src/tools/mac/upload_system_symbols/extract.go @@ -0,0 +1,514 @@ +package main + +// #cgo LDFLAGS: -lParallelCompression +// #include +// #include +// #include +// +// typedef struct +// { +// int64_t unknown1; +// int64_t unknown2; +// char *input; +// char *output; +// char *patch; +// uint32_t not_cryptex_cache; +// uint32_t threads; +// uint32_t verbose; +// } RawImage; +// +// extern int32_t RawImagePatch(RawImage *); +import "C" + +import ( + "archive/zip" + "bytes" + "errors" + "fmt" + "io" + "log" + "os" + "os/exec" + "path" + "path/filepath" + "strconv" + "strings" + "unsafe" +) + +type ArchiveFormat int + +const ( + IPSW ArchiveFormat = iota + Installer +) + +// ExtractCaches extracts any dyld shared caches from `archive` to `destination`. +func ExtractCaches(format ArchiveFormat, archive string, destination string, verbose bool) error { + opts := ExtractorOptions{Verbose: true, Source: archive, Destination: destination} + var e *Extractor + switch format { + case IPSW: + e = NewIPSWExtractor(opts) + case Installer: + e = NewInstallAssistantExtractor(opts) + default: + return fmt.Errorf("unknown format %v", format) + } + return e.Extract() +} + +// NewIPSWExtractor returns an `Extractor` that can handle `.ipsw` files. +func NewIPSWExtractor(opts ExtractorOptions) *Extractor { + ie := &ipswExtractor{} + ie.Extractor = &Extractor{opts: opts, impl: ie} + return ie.Extractor +} + +// NewInstallAssistantExtractor returns an extractor that can handle Apple installers. +func NewInstallAssistantExtractor(opts ExtractorOptions) *Extractor { + ie := &installAssistantExtractor{} + ie.Extractor = &Extractor{opts: opts, impl: ie} + return ie.Extractor +} + +// ExtractorOptions are provided to an extractor to specify source file, destination, +// and whether verbose logging should be used. +type ExtractorOptions struct { + Verbose bool + Source string + Destination string +} + +// Extractor encapsulates the process of extracting dyld shared caches from an IPSW or installer. +type Extractor struct { + opts ExtractorOptions + impl extractorImpl + + scratchDir string + + // If non-empty, the path at which the DMG was mounted. This will + // be un-mounted at the end of Extract(). + dmgMountPaths []string +} + +// Extract extracts any dyld shared caches in `opts.Source` to `opts.Destination`. +func (e *Extractor) Extract() error { + scratchDir, err := os.MkdirTemp("", "extracted_system") + if err != nil { + return fmt.Errorf("couldn't create scratch directory to extract: %v", err) + } + defer os.RemoveAll(scratchDir) + e.scratchDir = scratchDir + + err = e.impl.doExtract() + + for _, path := range e.dmgMountPaths { + unmountErr := unmountDMG(path) + if unmountErr != nil { + err = errors.Join(err, unmountErr) + } + } + + return err +} + +// vlog logs if `opts.Verbose` is set and is a no-op otherwise. +func (e *Extractor) vlog(format string, args ...interface{}) { + if e.opts.Verbose { + fmt.Printf(format+"\n", args...) + } +} + +// mountDMG mounts the disk image at `dmgPath` to mountpoint and tracks it so that +// it can be unmounted by the end of `Extract` +func (e *Extractor) mountDMG(dmgPath string, mountpoint string) error { + cmd := exec.Command("hdiutil", "attach", dmgPath, "-mountpoint", mountpoint, "-quiet", "-nobrowse", "-readonly") + err := cmd.Run() + if err != nil { + e.dmgMountPaths = append(e.dmgMountPaths, mountpoint) + } + return err +} + +// extractorImpl is a private interface implemented by the backend +// extractors. +type extractorImpl interface { + doExtract() error +} + +// ipswExtractor extracts IPSWs +type ipswExtractor struct { + *Extractor +} + +// doExtract extracts dyld shared caches from an IPSW. +// It: +// Extracts the system disk image from the IPSW and mounts it. +// Copies any dyld shared caches from /System/Library/dyld on the mounted +// image to `opts.Destination`. +func (e *ipswExtractor) doExtract() error { + e.vlog("Extracting and mounting system disk:\n") + system, err := e.mountSystemDMG(e.opts.Source) + if err != nil { + return fmt.Errorf("couldn't mount system DMG: %v", err) + } + e.vlog("System mounted at %v\n", system) + e.vlog("Extracting shared caches:\n") + cachesPath := path.Join(system, "System/Library/dyld") + if !pathExists(cachesPath) { + return errors.New("couldn't find /System/Library/dyld") + } + + caches, err := filepath.Glob(path.Join(cachesPath, "dyld_shared_cache*")) + if err != nil { + // "The only possible returned error is ErrBadPattern" so treat + // this like a programmer error. + log.Fatalf("Failed to glob %v", path.Join(cachesPath, "dyld_shared_cache*")) + } + + for _, cache := range caches { + src, err := os.Open(cache) + if err != nil { + return err + } + defer src.Close() + filename := path.Base(cache) + dst, err := os.Create(path.Join(e.opts.Destination, filename)) + if err != nil { + return err + } + defer dst.Close() + e.vlog("Extracted %v\n", filename) + if _, err := io.Copy(dst, src); err != nil { + return err + } + } + + return nil +} + +// mountSystemDMG finds the name of the system image disk in the build manifest inside the +// IPSW at `ipswPath`, mounts it inside `e.scratchDir` and returns the mountpoint. +func (e *ipswExtractor) mountSystemDMG(ipswPath string) (string, error) { + r, err := zip.OpenReader(ipswPath) + if err != nil { + return "", fmt.Errorf("couldn't open ipsw at %s: %v", ipswPath, err) + } + defer r.Close() + dmgPath := "" + for _, f := range r.File { + if f.Name == "BuildManifest.plist" { + manifest, err := os.Create(path.Join(e.scratchDir, f.Name)) + if err != nil { + return "", err + } + if err := extractFileToPath(f, path.Join(e.scratchDir, f.Name)); err != nil { + return "", err + } + path, err := e.getSystemDMGPath(manifest.Name()) + if err != nil { + return "", err + } + dmgPath = path + } + } + if dmgPath == "" { + return "", errors.New("couldn't find build manifest") + } + for _, f := range r.File { + if filepath.Base(f.Name) == dmgPath { + dmgPath := path.Join(e.scratchDir, f.Name) + if err := extractFileToPath(f, dmgPath); err != nil { + return "", err + } + dmgMountpoint := path.Join(e.scratchDir, "Root") + e.vlog("Mounting %v at %v\n", dmgPath, dmgMountpoint) + if err := e.mountDMG(dmgPath, dmgMountpoint); err != nil { + return "", err + } + return dmgMountpoint, nil + } + } + return "", fmt.Errorf("%v not present in %v", dmgPath, ipswPath) +} + +// getSystemDMGPath finds the system disk image inside a IPSW from the build manifest +// at `manifest`. +func (e *ipswExtractor) getSystemDMGPath(manifest string) (string, error) { + print_cmd := "print :BuildIdentities:1:Manifest:Cryptex1,SystemOS:Info:Path" + result, err := exec.Command("PlistBuddy", "-c", print_cmd, manifest).Output() + if err != nil { + return "", err + } + return strings.TrimSpace(string(result)), nil +} + +// installAssistantExtractor extracts Apple installers. +type installAssistantExtractor struct { + *Extractor +} + +// doExtract extracts dyld shared caches from an Apple installer. +// It: +// 1. Expands the installer to disk +// 2. Finds and mounts SharedSupport.dmg +// 3. Determines based on system version whether this installer contains cryptexes or not. +// 4. If cryptexes are present, extracts them to disk images, mounts them, then copies any shared caches +// to `opts.Destination`. Otherwise, finds any payload zips containing shared caches, extracts them, and +// copies the caches to `opts.Destination` +func (e *installAssistantExtractor) doExtract() error { + expandedPath := path.Join(e.scratchDir, "installer") + e.vlog("Expanding installer to %v\n", expandedPath) + if err := e.expandInstaller(e.opts.Source, expandedPath); err != nil { + return fmt.Errorf("expand installer: %v", err) + } + + dmgPath := path.Join(expandedPath, "SharedSupport.dmg") + if !pathExists(dmgPath) { + return fmt.Errorf("couldn't find SharedSupport.dmg at %v", dmgPath) + } + dmgMountpoint := path.Join(e.scratchDir, "shared_support") + e.vlog("Mounting %v at %v\n", dmgPath, dmgMountpoint) + if err := e.mountDMG(dmgPath, dmgMountpoint); err != nil { + return fmt.Errorf("mount %v: %v", dmgPath, err) + } + + zipsPath := path.Join(dmgMountpoint, "com_apple_MobileAsset_MacSoftwareUpdate") + if !pathExists(zipsPath) { + return fmt.Errorf("couldn't find com_apple_MobileAsset_MacSoftwareUpdate on SharedSupport.dmg") + } + hasCryptexes, err := e.hasCryptexes(path.Join(zipsPath, "com_apple_MobileAsset_MacSoftwareUpdate.xml")) + if err != nil { + return fmt.Errorf("couldn't determine system version: %v", err) + } + zips, err := filepath.Glob(path.Join(zipsPath, "*.zip")) + if err != nil { + // "The only possible returned error is ErrBadPattern" so treat + // this like a programmer error. + log.Fatalf("Failed to glob %v", path.Join(zipsPath, "*.zip")) + } + return e.extractCachesFromZips(zips, e.opts.Destination, hasCryptexes) +} + +// expandInstaller expands the installer at `installerPath` to `destinaton`. +func (e *installAssistantExtractor) expandInstaller(installerPath string, destination string) error { + return exec.Command("pkgutil", "--expand-full", installerPath, destination).Run() +} + +// hasCryptexes returns true if the installer containing the plist at `plistPath` is for +// macOS version 13 or higher, and accordingly stores dyld shared caches inside cryptexes. +func (e *installAssistantExtractor) hasCryptexes(plistPath string) (bool, error) { + print_cmd := "print :Assets:1:OSVersion" + result, err := exec.Command("PlistBuddy", "-c", print_cmd, plistPath).Output() + if err != nil { + return false, fmt.Errorf("couldn't read OS version from %s: %v", plistPath, err) + } + majorVersion := strings.Split(string(result), ".")[0] + if v, err := strconv.Atoi(majorVersion); err != nil { + return false, fmt.Errorf("couldn't parse major version %s:%v", majorVersion, err) + } else { + return v >= 13, nil + } +} + +// extractCachesFromZips extracts zips that contain dyld shared caches, and extracts the dyld shared caches from them. +// The specifics depend on whether this installer uses cryptexes or payload files. +func (e *installAssistantExtractor) extractCachesFromZips(zips []string, destination string, hasCryptexes bool) error { + containerPath := path.Join(e.scratchDir, "container") + if !hasCryptexes { + if err := e.unarchiveZipsMatching(zips, containerPath, "AssetData/payloadv2/payload.0??"); err != nil { + return err + } + if err := e.extractCachesFromPayloads(containerPath, destination); err != nil { + return fmt.Errorf("couldn't extract caches from %v: %v", containerPath, err) + } + } else { + if err := e.unarchiveZipsMatching(zips, containerPath, "AssetData/payloadv2/image_patches/cryptex-system-*"); err != nil { + return err + } + if err := e.extractCachesFromCryptexes(containerPath, destination); err != nil { + return fmt.Errorf("couldn't extract caches from %v: %v", containerPath, err) + } + } + return nil +} + +// unarchiveZipsMatching unarchives all files matching `glob` from the zip files in `zips` to destination. +func (e *installAssistantExtractor) unarchiveZipsMatching(zips []string, destination string, glob string) error { + for _, zipFile := range zips { + archive, err := zip.OpenReader(zipFile) + if err != nil { + return fmt.Errorf("couldn't read %v: %v", zipFile, err) + } + defer archive.Close() + e.vlog("Unarchiving %v\n", zipFile) + if err := e.unarchiveFilesMatching(archive, destination, glob); err != nil { + return fmt.Errorf("couldn't unarchive files matching %v from %v", glob, zipFile) + } + } + return nil +} + +// unarchiveFilesMatching unarchives all files matching `glob` from `r` to destination. +func (e *installAssistantExtractor) unarchiveFilesMatching(r *zip.ReadCloser, destination string, glob string) error { + if err := os.MkdirAll(destination, 0755); err != nil { + return err + } + for _, file := range r.File { + if file.FileInfo().IsDir() { + continue + } + if ok, err := path.Match(glob, file.Name); !ok || err != nil { + continue + } + _, filename := path.Split(file.Name) + writePath := path.Join(destination, filename) + if err := extractFileToPath(file, writePath); err != nil { + return err + } + } + return nil +} + +// extractCachesFromPayloads unarchives any files containing dyld shared caches from `payloadsPath` +// then copies any shared caches to `destination`. +func (e *installAssistantExtractor) extractCachesFromPayloads(payloadsPath string, destination string) error { + scratchDir, err := os.MkdirTemp(e.scratchDir, "payload") + if err != nil { + return err + } + files, err := os.ReadDir(payloadsPath) + if err != nil { + return err + } + for _, f := range files { + payload := path.Join(payloadsPath, f.Name()) + if e.payloadHasSharedCache(payload) { + e.vlog("Extracting %v\n", payload) + e.extractPayload(payload, scratchDir) + } + } + return e.copySharedCaches(scratchDir, destination) +} + +// payloadHasSharedCache returns true if the archive at `payloadPath` contains a dyld shared cache. +func (e *installAssistantExtractor) payloadHasSharedCache(payloadPath string) bool { + out, err := exec.Command("yaa", "list", "-i", payloadPath).Output() + return err == nil && bytes.Contains(out, []byte("/dyld_shared_cache")) +} + +// extractPayload extracts the apple archive at `payloadPath` to `destination`. +func (e *installAssistantExtractor) extractPayload(payloadPath string, destination string) error { + return exec.Command("yaa", "extract", "-i", payloadPath, "-d", destination).Run() +} + +// copySharedCaches copies the contents of `System/Library/dyld` in `from` to `to`. +func (e *installAssistantExtractor) copySharedCaches(from, to string) error { + dyldPath := path.Join(from, "System/Library/dyld") + if !pathExists(dyldPath) { + return fmt.Errorf("couldn't find System/Library/dyld in %s", dyldPath) + } + cacheFiles, err := os.ReadDir(dyldPath) + if err != nil { + return err + } + for _, cacheFile := range cacheFiles { + name := cacheFile.Name() + src := path.Join(dyldPath, name) + dst := path.Join(to, name) + e.vlog("Copying %v to %v\n", src, dst) + if err := copyFile(src, dst); err != nil { + return fmt.Errorf("couldn't copy %s to %s: %v", src, dst, err) + } + } + return nil +} + +// extractCachesFromCryptexes extracts disk images from any cryptexes at `cryptexesPath`, mounts them, +// and extracts any dyld shared caches to `destination`. +func (e *installAssistantExtractor) extractCachesFromCryptexes(cryptexesPath string, destination string) error { + scratchDir, err := os.MkdirTemp(e.scratchDir, "cryptex_dmg") + if err != nil { + return err + } + files, err := os.ReadDir(cryptexesPath) + if err != nil { + return err + } + for _, f := range files { + cryptexMountpoint := path.Join(scratchDir, "cryptex_"+f.Name()) + cryptex := path.Join(cryptexesPath, f.Name()) + dmgPath := path.Join(scratchDir, f.Name()+".dmg") + e.extractCryptexDMG(cryptex, dmgPath) + e.vlog("Mounting %s at %s\n", cryptex, dmgPath) + e.mountDMG(dmgPath, cryptexMountpoint) + if err := e.copySharedCaches(cryptexMountpoint, destination); err != nil { + return err + } + } + return nil +} + +// extractCryptexDMG extracts the cryptex at `cryptexPath` to `dmgPath` using libParallelCompression. +func (e *installAssistantExtractor) extractCryptexDMG(cryptexPath, dmgPath string) error { + inp := C.CString("") + defer C.free(unsafe.Pointer(inp)) + cryptex := C.CString(cryptexPath) + defer C.free(unsafe.Pointer(cryptex)) + result := C.CString(dmgPath) + defer C.free(unsafe.Pointer(result)) + + ri := C.RawImage{unknown1: 0, unknown2: 0, input: inp, output: result, patch: cryptex, not_cryptex_cache: 0, threads: 0, verbose: 1} + if exitCode := C.RawImagePatch(&ri); exitCode != 0 { + return fmt.Errorf("RawImagePatch failed with %d", exitCode) + } + return nil +} + +// pathExists returns true if `path` exists. +func pathExists(path string) bool { + _, err := os.Stat(path) + return !os.IsNotExist(err) +} + +// extractFileToPath extracts the contents of `f` to `path`. +func extractFileToPath(f *zip.File, path string) error { + w, err := os.Create(path) + if err != nil { + return err + } + defer w.Close() + reader, err := f.Open() + if err != nil { + return err + } + + if _, err := io.Copy(w, reader); err != nil { + return err + } + return nil +} + +// copyFile copies `src` to `dst`, which can be on different volumes. +func copyFile(src, dst string) error { + w, err := os.Create(dst) + if err != nil { + return err + } + defer w.Close() + + reader, err := os.Open(src) + if err != nil { + return err + } + if _, err := io.Copy(w, reader); err != nil { + return err + } + return nil +} + +// unmountDMG unmounts the disk image at `mountpoint`. +func unmountDMG(mountpoint string) error { + return exec.Command("hdiutil", "detach", mountpoint).Run() +} diff --git a/src/tools/mac/upload_system_symbols/upload_system_symbols.go b/src/tools/mac/upload_system_symbols/upload_system_symbols.go index f34c288ae..b7b3cb556 100644 --- a/src/tools/mac/upload_system_symbols/upload_system_symbols.go +++ b/src/tools/mac/upload_system_symbols/upload_system_symbols.go @@ -46,11 +46,11 @@ import ( "flag" "fmt" "io" - "io/ioutil" "log" "os" "os/exec" "path" + "path/filepath" "regexp" "strings" "sync" @@ -61,9 +61,11 @@ var ( breakpadTools = flag.String("breakpad-tools", "out/Release/", "Path to the Breakpad tools directory, containing dump_syms and symupload.") uploadOnlyPath = flag.String("upload-from", "", "Upload a directory of symbol files that has been dumped independently.") dumpOnlyPath = flag.String("dump-to", "", "Dump the symbols to the specified directory, but do not upload them.") - systemRoot = flag.String("system-root", "", "Path to the root of the Mac OS X system whose symbols will be dumped.") + systemRoot = flag.String("system-root", "", "Path to the root of the macOS system whose symbols will be dumped. Mutually exclusive with --installer and --ipsw.") dumpArchitecture = flag.String("arch", "", "The CPU architecture for which symbols should be dumped. If not specified, dumps all architectures.") apiKey = flag.String("api-key", "", "API key to use. If this is present, the `sym-upload-v2` protocol is used.\nSee https://chromium.googlesource.com/breakpad/breakpad/+/HEAD/docs/sym_upload_v2_protocol.md or\n`symupload`'s help for more information.") + installer = flag.String("installer", "", "Path to macOS installer. Mutually exclusive with --system-root and --ipsw.") + ipsw = flag.String("ipsw", "", "Path to macOS IPSW. Mutually exclusive with --system-root and --installer.") ) var ( @@ -129,26 +131,13 @@ func main() { return } - if *systemRoot == "" { - log.Fatal("Need a -system-root to dump symbols for") - } - - if *dumpOnlyPath != "" { - // -dump-to specified, so make sure that the path is a directory. - if fi, err := os.Stat(*dumpOnlyPath); err != nil { - log.Fatalf("-dump-to location: %v", err) - } else if !fi.IsDir() { - log.Fatal("-dump-to location is not a directory") - } - } - dumpPath := *dumpOnlyPath if *dumpOnlyPath == "" { // If -dump-to was not specified, then run the upload pipeline and create // a temporary dump output directory. uq = StartUploadQueue() - if p, err := ioutil.TempDir("", "upload_system_symbols"); err != nil { + if p, err := os.MkdirTemp("", "upload_system_symbols"); err != nil { log.Fatalf("Failed to create temporary directory: %v", err) } else { dumpPath = p @@ -156,13 +145,61 @@ func main() { } } - dq := StartDumpQueue(*systemRoot, dumpPath, uq) + tempDir, err := os.MkdirTemp("", "systemRoots") + if err != nil { + log.Fatalf("Failed to create temporary directory: %v", err) + } + defer os.RemoveAll(tempDir) + roots := getSystemRoots(tempDir) + + if *dumpOnlyPath != "" { + // -dump-to specified, so make sure that the path is a directory. + if fi, err := os.Stat(*dumpOnlyPath); err != nil { + log.Fatalf("-dump-to location: %v", err) + } else if !fi.IsDir() { + log.Fatal("-dump-to location is not a directory") + } + } + + dq := StartDumpQueue(roots, dumpPath, uq) dq.Wait() if uq != nil { uq.Wait() } } +// getSystemRoots returns which system roots should be dumped from the parsed +// flags, extracting them if necessary. +func getSystemRoots(tempDir string) []string { + hasInstaller := len(*installer) > 0 + hasIPSW := len(*ipsw) > 0 + hasRoot := len(*systemRoot) > 0 + + if hasInstaller { + if hasIPSW || hasRoot { + log.Fatalf("--installer, --ipsw, and --system-root are mutually exclusive") + } + if rs, err := extractSystems(Installer, *installer, tempDir); err != nil { + log.Fatalf("Couldn't extract installer at %s: %v", *installer, err) + } else { + return rs + } + } else if hasIPSW { + if hasRoot { + log.Fatalf("--installer, --ipsw, and --system-root are mutually exclusive") + } + if rs, err := extractSystems(IPSW, *ipsw, tempDir); err != nil { + log.Fatalf("Couldn't extract IPSW at %s: %v", *ipsw, err) + } else { + return rs + } + } else if hasRoot { + return []string{*systemRoot} + } + log.Fatal("Need a --system-root, --installer, or --ipsw to dump symbols for") + return []string{} +} + // manglePath reduces an absolute filesystem path to a string suitable as the // base for a file name which encodes some of the original path. The result // concatenates the leading initial from each path component except the last to @@ -282,7 +319,7 @@ type dumpRequest struct { // StartDumpQueue creates a new worker pool to find all the Mach-O libraries in // root and dump their symbols to dumpPath. If an UploadQueue is passed, the // path to the symbol file will be enqueued there, too. -func StartDumpQueue(root, dumpPath string, uq *UploadQueue) *DumpQueue { +func StartDumpQueue(roots []string, dumpPath string, uq *UploadQueue) *DumpQueue { dq := &DumpQueue{ dumpPath: dumpPath, queue: make(chan dumpRequest), @@ -290,7 +327,7 @@ func StartDumpQueue(root, dumpPath string, uq *UploadQueue) *DumpQueue { } dq.WorkerPool = StartWorkerPool(12, dq.worker) - findLibsInRoot(root, dq) + findLibsInRoots(roots, dq) return dq } @@ -369,21 +406,22 @@ type findQueue struct { dq *DumpQueue } -// findLibsInRoot looks in all the pathsToScan in the root and manages the +// findLibsInRoot looks in all the pathsToScan in all roots and manages the // interaction between findQueue and DumpQueue. -func findLibsInRoot(root string, dq *DumpQueue) { +func findLibsInRoots(roots []string, dq *DumpQueue) { fq := &findQueue{ queue: make(chan string, 10), dq: dq, } fq.WorkerPool = StartWorkerPool(12, fq.worker) + for _, root := range roots { + for _, p := range pathsToScan { + fq.findLibsInPath(path.Join(root, p), true) + } - for _, p := range pathsToScan { - fq.findLibsInPath(path.Join(root, p), true) - } - - for _, p := range optionalPathsToScan { - fq.findLibsInPath(path.Join(root, p), false) + for _, p := range optionalPathsToScan { + fq.findLibsInPath(path.Join(root, p), false) + } } close(fq.queue) @@ -482,3 +520,45 @@ func (fq *findQueue) dumpMachOFile(fp string, image *macho.File) { fq.dq.DumpSymbols(fp, arch) } } + +// extractSystems extracts any dyld shared caches from `archive`, then extracts the caches +// into macOS system libraries, returning the locations on disk of any systems extracted. +func extractSystems(format ArchiveFormat, archive string, extractPath string) ([]string, error) { + cachesPath := path.Join(extractPath, "caches") + if err := os.MkdirAll(cachesPath, 0755); err != nil { + return nil, err + } + if err := ExtractCaches(format, archive, cachesPath, true); err != nil { + return nil, err + } + files, err := os.ReadDir(cachesPath) + if err != nil { + return nil, err + } + cachePrefix := "dyld_shared_cache_" + extractedDirPath := path.Join(extractPath, "extracted") + roots := make([]string, len(files)) + for _, file := range files { + fileName := file.Name() + if filepath.Ext(fileName) == "" && strings.HasPrefix(fileName, cachePrefix) { + arch := strings.TrimPrefix(fileName, cachePrefix) + extractedSystemPath := path.Join(extractedDirPath, arch) + // XXX: Maybe this shouldn't be fatal? + if err := extractDyldSharedCache(path.Join(cachesPath, fileName), extractedSystemPath); err != nil { + return nil, err + } + roots = append(roots, extractedSystemPath) + } + } + return roots, nil +} + +// extractDyldSharedCache extracts the dyld shared cache at `cachePath` to `destination`. +func extractDyldSharedCache(cachePath string, destination string) error { + dscExtractor := path.Join(*breakpadTools, "dsc_extractor") + cmd := exec.Command(dscExtractor, cachePath, destination) + if output, err := cmd.Output(); err != nil { + return fmt.Errorf("extracting shared cache at %s: %v. dsc_extractor said %v", cachePath, err, output) + } + return nil +} From 255dbbd061a400e7a3f601e82a62e65058b39e5e Mon Sep 17 00:00:00 2001 From: Ivan Penkov Date: Thu, 11 Jan 2024 11:25:57 -0800 Subject: [PATCH 03/37] Fixing a buffer overflow in the Breakpad symbol serialization code caused by an integer overflow. We started getting Breakpad symbol files that exceed some of the 32-bit limits. Change-Id: I1f81905c0b7f88a0c93f10b651e5e160876487fd Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5186360 Reviewed-by: Joshua Peraza --- src/processor/fast_source_line_resolver.cc | 13 +++++++------ src/processor/module_serializer.cc | 20 +++++++++++++++----- src/processor/module_serializer.h | 3 ++- src/processor/source_line_resolver_base.cc | 1 - 4 files changed, 24 insertions(+), 13 deletions(-) diff --git a/src/processor/fast_source_line_resolver.cc b/src/processor/fast_source_line_resolver.cc index 79803f2ca..1c329772d 100644 --- a/src/processor/fast_source_line_resolver.cc +++ b/src/processor/fast_source_line_resolver.cc @@ -44,6 +44,7 @@ #include "processor/fast_source_line_resolver_types.h" #include +#include #include #include #include @@ -154,7 +155,7 @@ void FastSourceLineResolver::Module::ConstructInlineFrames( } } - // Use the starting adress of the inlined range as inlined function base. + // Use the starting address of the inlined range as inlined function base. new_frame->function_base = new_frame->module->base_address(); for (const auto& range : in->inline_ranges) { if (address >= range.first && address < range.first + range.second) { @@ -228,21 +229,21 @@ bool FastSourceLineResolver::Module::LoadMapFromMemory( const char* mem_buffer = memory_buffer; mem_buffer = SimpleSerializer::Read(mem_buffer, &is_corrupt_); - const uint32_t* map_sizes = reinterpret_cast(mem_buffer); + const uint64_t* map_sizes = reinterpret_cast(mem_buffer); - unsigned int header_size = kNumberMaps_ * sizeof(unsigned int); + unsigned int header_size = kNumberMaps_ * sizeof(uint64_t); // offsets[]: an array of offset addresses (with respect to mem_buffer), // for each "Static***Map" component of Module. // "Static***Map": static version of std::map or map wrapper, i.e., StaticMap, // StaticAddressMap, StaticContainedRangeMap, and StaticRangeMap. - unsigned int offsets[kNumberMaps_]; + uint64_t offsets[kNumberMaps_]; offsets[0] = header_size; for (int i = 1; i < kNumberMaps_; ++i) { offsets[i] = offsets[i - 1] + map_sizes[i - 1]; } - unsigned int expected_size = sizeof(bool) + offsets[kNumberMaps_ - 1] + - map_sizes[kNumberMaps_ - 1] + 1; + size_t expected_size = sizeof(bool) + offsets[kNumberMaps_ - 1] + + map_sizes[kNumberMaps_ - 1] + 1; if (expected_size != memory_buffer_size && // Allow for having an extra null terminator. expected_size != memory_buffer_size - 1) { diff --git a/src/processor/module_serializer.cc b/src/processor/module_serializer.cc index 05519958f..33130353a 100644 --- a/src/processor/module_serializer.cc +++ b/src/processor/module_serializer.cc @@ -38,11 +38,21 @@ #include "processor/module_serializer.h" +#include +#include #include #include +#include "common/scoped_ptr.h" +#include "google_breakpad/processor/basic_source_line_resolver.h" +#include "google_breakpad/processor/code_module.h" +#include "google_breakpad/processor/fast_source_line_resolver.h" #include "processor/basic_code_module.h" +#include "processor/linked_ptr.h" #include "processor/logging.h" +#include "processor/map_serializers.h" +#include "processor/simple_serializer.h" +#include "processor/windows_frame_info.h" namespace google_breakpad { @@ -78,7 +88,7 @@ size_t ModuleSerializer::SizeOf(const BasicSourceLineResolver::Module& module) { inline_origin_serializer_.SizeOf(module.inline_origins_); // Header size. - total_size_alloc_ += kNumberMaps_ * sizeof(uint32_t); + total_size_alloc_ += kNumberMaps_ * sizeof(uint64_t); for (int i = 0; i < kNumberMaps_; ++i) { total_size_alloc_ += map_sizes_[i]; @@ -95,8 +105,8 @@ char* ModuleSerializer::Write(const BasicSourceLineResolver::Module& module, // Write the is_corrupt flag. dest = SimpleSerializer::Write(module.is_corrupt_, dest); // Write header. - memcpy(dest, map_sizes_, kNumberMaps_ * sizeof(uint32_t)); - dest += kNumberMaps_ * sizeof(uint32_t); + memcpy(dest, map_sizes_, kNumberMaps_ * sizeof(uint64_t)); + dest += kNumberMaps_ * sizeof(uint64_t); // Write each map. dest = files_serializer_.Write(module.files_, dest); dest = functions_serializer_.Write(module.functions_, dest); @@ -161,7 +171,7 @@ bool ModuleSerializer::SerializeModuleAndLoadIntoFastResolver( // Copy the data into string. // Must pass string to LoadModuleUsingMapBuffer(), instead of passing char* to - // LoadModuleUsingMemoryBuffer(), becaused of data ownership/lifetime issue. + // LoadModuleUsingMemoryBuffer(), because of data ownership/lifetime issue. string symbol_data_string(symbol_data.get(), size); symbol_data.reset(); @@ -213,7 +223,7 @@ char* ModuleSerializer::SerializeSymbolFileData(const string& symbol_data, return NULL; } buffer.reset(NULL); - return Serialize(*(module.get()), size); + return Serialize(*module, size); } } // namespace google_breakpad diff --git a/src/processor/module_serializer.h b/src/processor/module_serializer.h index fd387cbbc..675d78488 100644 --- a/src/processor/module_serializer.h +++ b/src/processor/module_serializer.h @@ -36,6 +36,7 @@ #ifndef PROCESSOR_MODULE_SERIALIZER_H__ #define PROCESSOR_MODULE_SERIALIZER_H__ +#include #include #include @@ -110,7 +111,7 @@ class ModuleSerializer { FastSourceLineResolver::Module::kNumberMaps_; // Memory sizes required to serialize map components in Module. - uint32_t map_sizes_[kNumberMaps_]; + uint64_t map_sizes_[kNumberMaps_]; // Serializers for each individual map component in Module class. StdMapSerializer files_serializer_; diff --git a/src/processor/source_line_resolver_base.cc b/src/processor/source_line_resolver_base.cc index da9ff7624..351157cea 100644 --- a/src/processor/source_line_resolver_base.cc +++ b/src/processor/source_line_resolver_base.cc @@ -253,7 +253,6 @@ bool SourceLineResolverBase::LoadModuleUsingMemoryBuffer( // Returning false from here would be an indication that the symbols for // this module are missing which would be wrong. Intentionally fall through // and add the module to both the modules_ and the corrupt_modules_ lists. - assert(basic_module->IsCorrupt()); } modules_->insert(make_pair(module->code_file(), basic_module)); From 2481554490026ab9386ebc654343132d06194ce5 Mon Sep 17 00:00:00 2001 From: Ian McKellar Date: Tue, 16 Jan 2024 17:21:10 +0000 Subject: [PATCH 04/37] Regenerate Makefile.in for zstd dependency The version of Makefile.in that landed in https://crrev.com/c/4722191 had a Makefile.in that was generated from a previous version of Makefile.am where the libzstd dependency wasn't optional. That's causing some problems (see: https://crrev.com/c/5193965) and is a simple mistake. I generated this CL by simply running `automake`, so it simply makes the checked in generated Makefile.in based on the checked in source Makefile.am Change-Id: Iabb4a99bfac3f5ef6067a140bd373c9fb894878a Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5200626 Reviewed-by: Mike Frysinger --- Makefile.in | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Makefile.in b/Makefile.in index 0c47bf87d..7637800c0 100644 --- a/Makefile.in +++ b/Makefile.in @@ -2691,11 +2691,13 @@ src_tools_linux_dump_syms_dump_syms_SOURCES = \ src/tools/linux/dump_syms/dump_syms.cc src_tools_linux_dump_syms_dump_syms_CXXFLAGS = \ - $(RUSTC_DEMANGLE_CFLAGS) + $(RUSTC_DEMANGLE_CFLAGS) \ + $(ZSTD_CFLAGS) src_tools_linux_dump_syms_dump_syms_LDADD = \ $(RUSTC_DEMANGLE_LIBS) \ - -lz -lzstd + $(ZSTD_CFLAGS) \ + -lz src_tools_linux_md2core_minidump_2_core_SOURCES = \ src/common/linux/memory_mapped_file.cc \ @@ -2822,13 +2824,15 @@ src_common_dumper_unittest_SOURCES = \ src_common_dumper_unittest_CPPFLAGS = \ $(AM_CPPFLAGS) $(TEST_CFLAGS) \ $(RUSTC_DEMANGLE_CFLAGS) \ - $(PTHREAD_CFLAGS) + $(PTHREAD_CFLAGS) \ + $(ZSTD_CFLAGS) src_common_dumper_unittest_LDADD = \ $(TEST_LIBS) \ $(RUSTC_DEMANGLE_LIBS) \ $(PTHREAD_CFLAGS) $(PTHREAD_LIBS) \ - -lz -lzstd + $(ZSTD_LIBS) \ + -lz src_common_mac_macho_reader_unittest_SOURCES = \ src/common/dwarf_cfi_to_module.cc \ From 38365ae4ec3a5fec5a1ead13bcaa0025b32cbb24 Mon Sep 17 00:00:00 2001 From: Greg Thompson Date: Wed, 17 Jan 2024 12:15:06 +0000 Subject: [PATCH 05/37] Process symbols provided by NV Access Change-Id: I862905a164370298ca2f21d2030a0aab860cc08c Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5204606 Reviewed-by: Nelson Billing --- src/tools/windows/converter_exe/winsymconv.cmd | 2 ++ src/tools/windows/converter_exe/winsymconv_test.cmd | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/tools/windows/converter_exe/winsymconv.cmd b/src/tools/windows/converter_exe/winsymconv.cmd index bea84b589..ca1847847 100644 --- a/src/tools/windows/converter_exe/winsymconv.cmd +++ b/src/tools/windows/converter_exe/winsymconv.cmd @@ -39,6 +39,7 @@ google_converter.exe ^ -n https://download.amd.com/dir/bin ^ -n https://driver-symbols.nvidia.com ^ -n https://software.intel.com/sites/downloads/symbols ^ + -n https://www.nvaccess.org/files/nvda/symbols ^ -l %SYMBOL_DIR% ^ -s https://clients2.google.com/cr/staging_symbol ^ -m https://clients2.google.com/cr/staging_symbol/missingsymbols ^ @@ -58,6 +59,7 @@ google_converter.exe ^ -n https://download.amd.com/dir/bin ^ -n https://driver-symbols.nvidia.com ^ -n https://software.intel.com/sites/downloads/symbols ^ + -n https://www.nvaccess.org/files/nvda/symbols ^ -l %SYMBOL_DIR% ^ -s https://clients2.google.com/cr/symbol ^ -m https://clients2.google.com/cr/symbol/missingsymbols ^ diff --git a/src/tools/windows/converter_exe/winsymconv_test.cmd b/src/tools/windows/converter_exe/winsymconv_test.cmd index c17770660..448244d84 100644 --- a/src/tools/windows/converter_exe/winsymconv_test.cmd +++ b/src/tools/windows/converter_exe/winsymconv_test.cmd @@ -37,6 +37,7 @@ google_converter.exe ^ -n https://download.amd.com/dir/bin ^ -n https://driver-symbols.nvidia.com ^ -n https://software.intel.com/sites/downloads/symbols ^ + -n https://www.nvaccess.org/files/nvda/symbols ^ -l %SYMBOL_DIR% ^ -s https://clients2.google.com/cr/staging_symbol ^ -mf %SCRIPT_LOCATION%missing_symbols_test.txt ^ @@ -56,6 +57,7 @@ google_converter.exe ^ -n https://download.amd.com/dir/bin ^ -n https://driver-symbols.nvidia.com ^ -n https://software.intel.com/sites/downloads/symbols ^ + -n https://www.nvaccess.org/files/nvda/symbols ^ -l %SYMBOL_DIR% ^ -s https://clients2.google.com/cr/symbol ^ -mf %SCRIPT_LOCATION%missing_symbols_test.txt ^ From 6551ac3632eb7236642366f70a2eb865b87a3329 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20Bostr=C3=B6m?= Date: Mon, 29 Jan 2024 09:09:39 -0800 Subject: [PATCH 06/37] Fix uninitialized report_warnings in Mac dump_syms This uninitialized-memory use breaks the Mac ubsan build as dump_syms runs during the build step. Bug: chromium:1324701 Change-Id: Id4e0a7d38893b2ceb49e58d1f5c99a056d84a921 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5243705 Reviewed-by: Lei Zhang --- src/tools/mac/dump_syms/dump_syms_tool.cc | 26 +++++++---------------- 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/src/tools/mac/dump_syms/dump_syms_tool.cc b/src/tools/mac/dump_syms/dump_syms_tool.cc index ded4c10e5..cd8e565d7 100644 --- a/src/tools/mac/dump_syms/dump_syms_tool.cc +++ b/src/tools/mac/dump_syms/dump_syms_tool.cc @@ -55,29 +55,19 @@ using google_breakpad::scoped_ptr; using std::vector; struct Options { - Options() - : srcPath(), - dsymPath(), - arch(), - header_only(false), - cfi(true), - handle_inter_cu_refs(true), - handle_inlines(false), - enable_multiple(false), - module_name(), - prefer_extern_name(false) {} + Options() = default; string srcPath; string dsymPath; std::optional arch; - bool header_only; - bool cfi; - bool handle_inter_cu_refs; - bool handle_inlines; - bool enable_multiple; + bool header_only = false; + bool cfi = true; + bool handle_inter_cu_refs = true; + bool handle_inlines = false; + bool enable_multiple = false; string module_name; - bool prefer_extern_name; - bool report_warnings; + bool prefer_extern_name = false; + bool report_warnings = false; }; static bool StackFrameEntryComparator(const Module::StackFrameEntry* a, From 5a0e1e34b0c456dec03b9f114d918a6dea7bb553 Mon Sep 17 00:00:00 2001 From: Volodymyr Riazantsev Date: Mon, 29 Jan 2024 18:24:08 -0800 Subject: [PATCH 07/37] linux_ptrace_dumper: Handle ARMEABI binaries on ARM64 kernels For ARM32 binaries running on ARM64 Linux (kernel) reading of FP registers fails. It's guarded for Android as well as softap platforms, but other ARM platforms can suffer from this issue. The ARMEABI linux_ptrace_dumper_unittest fails on system that runs under ARM64 kernel. In order to mitigate the issue, we adding a VFP registers read. The Breakpad does not support include of VFP registers into a minidump file, so that read is noop for the backend, but just a fix for broken systems. Bug: internal 322205293 Test: Run linux_ptrace_dumper_unittest on following [userland:kernel] combinations: armeabi-hardfp:armeabi armeabi-softfp:armeabi armeabi-hardfp:aarch64 aarch64:aarch64 Signed-off-by: Volodymyr Riazantsev Change-Id: I0709ae9a7ff913340ebc89de703ab2cb9c823b14 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5247149 Reviewed-by: Joshua Peraza --- .../minidump_writer/linux_ptrace_dumper.cc | 70 ++++++++++++++++++- 1 file changed, 68 insertions(+), 2 deletions(-) diff --git a/src/client/linux/minidump_writer/linux_ptrace_dumper.cc b/src/client/linux/minidump_writer/linux_ptrace_dumper.cc index 2adc39e12..ba73e3669 100644 --- a/src/client/linux/minidump_writer/linux_ptrace_dumper.cc +++ b/src/client/linux/minidump_writer/linux_ptrace_dumper.cc @@ -62,6 +62,20 @@ #include "common/linux/linux_libc_support.h" #include "third_party/lss/linux_syscall_support.h" +#if defined(__arm__) +/* + * https://elixir.bootlin.com/linux/v6.8-rc2/source/arch/arm/include/asm/user.h#L81 + * User specific VFP registers. If only VFPv2 is present, registers 16 to 31 + * are ignored by the ptrace system call and the signal handler. + */ +typedef struct user_vfp { + unsigned long long fpregs[32]; + unsigned long fpscr; +// Kernel just appends fpscr to the copy of fpregs, so we need to force +// compiler to build the same layout. +} __attribute__((packed, aligned(4))) user_vfp_t; +#endif // defined(__arm__) + // Suspends a thread by attaching to it. static bool SuspendThread(pid_t pid) { // This may fail if the thread has just died or debugged. @@ -152,6 +166,24 @@ bool LinuxPtraceDumper::CopyFromProcess(void* dest, pid_t child, return true; } +// This read VFP registers via either PTRACE_GETREGSET or PTRACE_GETREGS +#if defined(__arm__) +static bool ReadVFPRegistersArm32(pid_t tid, struct iovec* io) { +#ifdef PTRACE_GETREGSET + if (sys_ptrace(PTRACE_GETREGSET, tid, reinterpret_cast(NT_ARM_VFP), + io) == 0 && io->iov_len == sizeof(user_vfp_t)) { + return true; + } +#endif // PTRACE_GETREGSET +#ifdef PTRACE_GETVFPREGS + if (sys_ptrace(PTRACE_GETVFPREGS, tid, nullptr, io->iov_base) == 0) { + return true; + } +#endif // PTRACE_GETVFPREGS + return false; +} +#endif // defined(__arm__) + bool LinuxPtraceDumper::ReadRegisterSet(ThreadInfo* info, pid_t tid) { #ifdef PTRACE_GETREGSET @@ -163,7 +195,24 @@ bool LinuxPtraceDumper::ReadRegisterSet(ThreadInfo* info, pid_t tid) info->GetFloatingPointRegisters(&io.iov_base, &io.iov_len); if (sys_ptrace(PTRACE_GETREGSET, tid, (void*)NT_FPREGSET, (void*)&io) == -1) { - return false; + // We are going to check if we can read VFP registers on ARM32. + // Currently breakpad does not support VFP registers to be a part of minidump, + // so this is only to confirm that we can actually read FP registers. + // That is needed to prevent a false-positive minidumps failures with ARM32 + // binaries running on top of ARM64 Linux kernels. +#if defined(__arm__) + switch (errno) { + case EIO: + case EINVAL: + user_vfp vfp; + struct iovec io; + io.iov_base = &vfp; + io.iov_len = sizeof(vfp); + return ReadVFPRegistersArm32(tid, &io); + default: + return false; + } +#endif // defined(__arm__) } return true; #else @@ -194,7 +243,24 @@ bool LinuxPtraceDumper::ReadRegisters(ThreadInfo* info, pid_t tid) { void* fp_addr; info->GetFloatingPointRegisters(&fp_addr, NULL); if (sys_ptrace(PTRACE_GETFPREGS, tid, NULL, fp_addr) == -1) { - return false; + // We are going to check if we can read VFP registers on ARM32. + // Currently breakpad does not support VFP registers to be a part of minidump, + // so this is only to confirm that we can actually read FP registers. + // That is needed to prevent a false-positive minidumps failures with ARM32 + // binaries running on top of ARM64 Linux kernels. +#if defined(__arm__) + switch (errno) { + case EIO: + case EINVAL: + user_vfp vfp; + struct iovec io; + io.iov_base = &vfp; + io.iov_len = sizeof(vfp); + return ReadVFPRegistersArm32(tid, &io); + default: + return false; + } +#endif // defined(__arm__) } #endif // !(defined(__ANDROID__) && defined(__ARM_EABI__)) #endif // !defined(__SOFTFP__) From 38ac9ae56f9a46919f3898aee4c1a8e848e4e197 Mon Sep 17 00:00:00 2001 From: Volodymyr Riazantsev Date: Tue, 30 Jan 2024 15:32:04 -0800 Subject: [PATCH 08/37] linux_ptrace_dumper: Fix type definition Fix a type definition, so it doesn't collide if the similar type is already defined in the sysroot headers. Test: Build for Android/Linux. Bug: Internal 322205293 Change-Id: I3453de725083b01f2e69a61a7fc948f9f8ca5eca Signed-off-by: Volodymyr Riazantsev Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5251488 Reviewed-by: Joshua Peraza --- src/client/linux/minidump_writer/linux_ptrace_dumper.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/client/linux/minidump_writer/linux_ptrace_dumper.cc b/src/client/linux/minidump_writer/linux_ptrace_dumper.cc index ba73e3669..e5850063c 100644 --- a/src/client/linux/minidump_writer/linux_ptrace_dumper.cc +++ b/src/client/linux/minidump_writer/linux_ptrace_dumper.cc @@ -68,7 +68,7 @@ * User specific VFP registers. If only VFPv2 is present, registers 16 to 31 * are ignored by the ptrace system call and the signal handler. */ -typedef struct user_vfp { +typedef struct { unsigned long long fpregs[32]; unsigned long fpscr; // Kernel just appends fpscr to the copy of fpregs, so we need to force @@ -204,7 +204,7 @@ bool LinuxPtraceDumper::ReadRegisterSet(ThreadInfo* info, pid_t tid) switch (errno) { case EIO: case EINVAL: - user_vfp vfp; + user_vfp_t vfp; struct iovec io; io.iov_base = &vfp; io.iov_len = sizeof(vfp); @@ -252,7 +252,7 @@ bool LinuxPtraceDumper::ReadRegisters(ThreadInfo* info, pid_t tid) { switch (errno) { case EIO: case EINVAL: - user_vfp vfp; + user_vfp_t vfp; struct iovec io; io.iov_base = &vfp; io.iov_len = sizeof(vfp); From 6b871f4bd8b6e28191289d19e8ef074a11648a18 Mon Sep 17 00:00:00 2001 From: Nathan Moinvaziri Date: Mon, 5 Feb 2024 13:44:35 -0800 Subject: [PATCH 09/37] Code cleanup for DwarfCUToModule::NullWarningReporter changes. Updated code to use Google's modern C++ style. * Use std::unique_ptr to allocate DwarfCUToModule::WarningReporter. * Fixed reference alignment in NullWarningReporter. Change-Id: I230dac445a07b4023a64284b907010f31eadcdf4 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5265662 Reviewed-by: Lei Zhang Reviewed-by: Ivan Penkov --- src/common/dwarf_cu_to_module.h | 32 ++++++++++++++++---------------- src/common/mac/dump_syms.cc | 11 +++++------ 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/src/common/dwarf_cu_to_module.h b/src/common/dwarf_cu_to_module.h index d83108437..82c25091d 100644 --- a/src/common/dwarf_cu_to_module.h +++ b/src/common/dwarf_cu_to_module.h @@ -254,61 +254,61 @@ class DwarfCUToModule: public RootDIEHandler { void UncoveredHeading(); }; - class NullWarningReporter: public WarningReporter { + class NullWarningReporter : public WarningReporter { public: - NullWarningReporter(const string &filename, uint64_t cu_offset): - WarningReporter(filename, cu_offset) { } + NullWarningReporter(const string& filename, uint64_t cu_offset) + : WarningReporter(filename, cu_offset) {} // Set the name of the compilation unit we're processing to NAME. - void SetCUName(const string &name) { } + void SetCUName(const string& name) {} // Accessor and setter for uncovered_warnings_enabled_. // UncoveredFunction and UncoveredLine only report a problem if that is // true. By default, these warnings are disabled, because those // conditions occur occasionally in healthy code. - void set_uncovered_warnings_enabled(bool value) { } + void set_uncovered_warnings_enabled(bool value) {} // A DW_AT_specification in the DIE at OFFSET refers to a DIE we // haven't processed yet, or that wasn't marked as a declaration, // at TARGET. - void UnknownSpecification(uint64_t offset, uint64_t target) { } + void UnknownSpecification(uint64_t offset, uint64_t target) {} // A DW_AT_abstract_origin in the DIE at OFFSET refers to a DIE we // haven't processed yet, or that wasn't marked as inline, at TARGET. - void UnknownAbstractOrigin(uint64_t offset, uint64_t target) { } + void UnknownAbstractOrigin(uint64_t offset, uint64_t target) {} // We were unable to find the DWARF section named SECTION_NAME. - void MissingSection(const string §ion_name) { } + void MissingSection(const string& section_name) {} // The CU's DW_AT_stmt_list offset OFFSET is bogus. - void BadLineInfoOffset(uint64_t offset) { } + void BadLineInfoOffset(uint64_t offset) {} // FUNCTION includes code covered by no line number data. - void UncoveredFunction(const Module::Function &function) { } + void UncoveredFunction(const Module::Function& function) {} // Line number NUMBER in LINE_FILE, of length LENGTH, includes code // covered by no function. - void UncoveredLine(const Module::Line &line) { } + void UncoveredLine(const Module::Line& line) {} // The DW_TAG_subprogram DIE at OFFSET has no name specified directly // in the DIE, nor via a DW_AT_specification or DW_AT_abstract_origin // link. - void UnnamedFunction(uint64_t offset) { } + void UnnamedFunction(uint64_t offset) {} // __cxa_demangle() failed to demangle INPUT. - void DemangleError(const string &input) { } + void DemangleError(const string& input) {} // The DW_FORM_ref_addr at OFFSET to TARGET was not handled because // FilePrivate did not retain the inter-CU specification data. - void UnhandledInterCUReference(uint64_t offset, uint64_t target) { } + void UnhandledInterCUReference(uint64_t offset, uint64_t target) {} // The DW_AT_ranges at offset is malformed (truncated or outside of the // .debug_ranges section's bound). - void MalformedRangeList(uint64_t offset) { } + void MalformedRangeList(uint64_t offset) {} // A DW_AT_ranges attribute was encountered but the no .debug_ranges // section was found. - void MissingRanges() { } + void MissingRanges() {} }; // Create a DWARF debugging info handler for a compilation unit diff --git a/src/common/mac/dump_syms.cc b/src/common/mac/dump_syms.cc index bcb62413f..3c56734d6 100644 --- a/src/common/mac/dump_syms.cc +++ b/src/common/mac/dump_syms.cc @@ -528,13 +528,13 @@ void DumpSymbols::ReadDwarf(google_breakpad::Module* module, for (uint64_t offset = 0; offset < debug_info_length;) { // Make a handler for the root DIE that populates MODULE with the // debug info. - DwarfCUToModule::WarningReporter *reporter = nullptr; + std::unique_ptr reporter; if (report_warnings_) { - reporter = new DwarfCUToModule::WarningReporter( - selected_object_name_, offset); + reporter = std::make_unique( + selected_object_name_, offset); } else { - reporter = new DwarfCUToModule::NullWarningReporter( - selected_object_name_, offset); + reporter = std::make_unique( + selected_object_name_, offset); } DwarfCUToModule root_handler(&file_context, &line_to_module, &ranges_handler, reporter, @@ -554,7 +554,6 @@ void DumpSymbols::ReadDwarf(google_breakpad::Module* module, StartProcessSplitDwarf(&dwarf_reader, module, endianness, handle_inter_cu_refs, handle_inline); } - delete reporter; } } From 55036ddccf04ba096c2d133f80a7baf9c0b91e1d Mon Sep 17 00:00:00 2001 From: Leonard Grey Date: Tue, 13 Feb 2024 17:09:01 -0500 Subject: [PATCH 10/37] Mac: Installer extraction bug fixes - Mark `RawImagePatch` weak - Fix reversed condition in disk image unmounting - Ensure source files are closed when copying - Use 0-based indexing when determining installer OS version Bug: None Change-Id: I015f2b0d9c88a5ec128822d55c974e22723a1a6e Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5291963 Reviewed-by: Mark Mentovai --- src/tools/mac/upload_system_symbols/extract.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/tools/mac/upload_system_symbols/extract.go b/src/tools/mac/upload_system_symbols/extract.go index acd674ef3..f991e3aa8 100644 --- a/src/tools/mac/upload_system_symbols/extract.go +++ b/src/tools/mac/upload_system_symbols/extract.go @@ -17,7 +17,7 @@ package main // uint32_t verbose; // } RawImage; // -// extern int32_t RawImagePatch(RawImage *); +// extern int32_t RawImagePatch(RawImage *) __attribute__((weak)); import "C" import ( @@ -125,7 +125,7 @@ func (e *Extractor) vlog(format string, args ...interface{}) { func (e *Extractor) mountDMG(dmgPath string, mountpoint string) error { cmd := exec.Command("hdiutil", "attach", dmgPath, "-mountpoint", mountpoint, "-quiet", "-nobrowse", "-readonly") err := cmd.Run() - if err != nil { + if err == nil { e.dmgMountPaths = append(e.dmgMountPaths, mountpoint) } return err @@ -299,7 +299,7 @@ func (e *installAssistantExtractor) expandInstaller(installerPath string, destin // hasCryptexes returns true if the installer containing the plist at `plistPath` is for // macOS version 13 or higher, and accordingly stores dyld shared caches inside cryptexes. func (e *installAssistantExtractor) hasCryptexes(plistPath string) (bool, error) { - print_cmd := "print :Assets:1:OSVersion" + print_cmd := "print :Assets:0:OSVersion" result, err := exec.Command("PlistBuddy", "-c", print_cmd, plistPath).Output() if err != nil { return false, fmt.Errorf("couldn't read OS version from %s: %v", plistPath, err) @@ -502,6 +502,7 @@ func copyFile(src, dst string) error { if err != nil { return err } + defer reader.Close() if _, err := io.Copy(w, reader); err != nil { return err } From f80f2888031b0570783011476a5e862fb7d848a1 Mon Sep 17 00:00:00 2001 From: Ivan Penkov Date: Thu, 15 Feb 2024 17:29:51 -0800 Subject: [PATCH 11/37] Fixing integer overflows in Breakpad Processor map/range code. Recently, Breakpad symbol files have exceeded the various 32-bit limits in these utils and we started seeing integer overflows. This is also fixing a build issue in src/common/mac/dump_syms.cc. Change-Id: Ibd913816c3b2b1171ac9991718c8911ac31eda86 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5299472 Reviewed-by: Ivan Penkov Reviewed-by: Joshua Peraza --- src/common/mac/dump_syms.cc | 4 +- .../fast_source_line_resolver_types.h | 2 +- src/processor/logging.h | 11 +- src/processor/map_serializers-inl.h | 56 +++++----- src/processor/map_serializers.h | 8 +- src/processor/map_serializers_unittest.cc | 103 +++++++++--------- src/processor/range_map-inl.h | 8 +- src/processor/range_map.h | 5 +- src/processor/range_map_unittest.cc | 10 +- src/processor/static_address_map_unittest.cc | 2 +- .../static_contained_range_map-inl.h | 2 +- src/processor/static_contained_range_map.h | 2 +- .../static_contained_range_map_unittest.cc | 17 +-- src/processor/static_map-inl.h | 40 +++---- src/processor/static_map.h | 23 ++-- src/processor/static_map_iterator-inl.h | 6 +- src/processor/static_map_iterator.h | 8 +- src/processor/static_map_unittest.cc | 37 ++++--- src/processor/static_range_map-inl.h | 2 +- src/processor/static_range_map.h | 4 +- 20 files changed, 178 insertions(+), 172 deletions(-) diff --git a/src/common/mac/dump_syms.cc b/src/common/mac/dump_syms.cc index 3c56734d6..c3ea1b82c 100644 --- a/src/common/mac/dump_syms.cc +++ b/src/common/mac/dump_syms.cc @@ -406,7 +406,7 @@ bool DumpSymbols::CreateEmptyModule(scoped_ptr& module) { selected_object_name_ = object_filename_; if (object_files_.size() > 1) { selected_object_name_ += ", architecture "; - selected_object_name_ + selected_arch_name; + selected_object_name_ += selected_arch_name; } // Compute a module name, to appear in the MODULE record. @@ -537,7 +537,7 @@ void DumpSymbols::ReadDwarf(google_breakpad::Module* module, selected_object_name_, offset); } DwarfCUToModule root_handler(&file_context, &line_to_module, - &ranges_handler, reporter, + &ranges_handler, reporter.get(), handle_inline); // Make a Dwarf2Handler that drives our DIEHandler. DIEDispatcher die_dispatcher(&root_handler); diff --git a/src/processor/fast_source_line_resolver_types.h b/src/processor/fast_source_line_resolver_types.h index 75b9004f6..14913518b 100644 --- a/src/processor/fast_source_line_resolver_types.h +++ b/src/processor/fast_source_line_resolver_types.h @@ -107,7 +107,7 @@ struct FastSourceLineResolver::Inline : public SourceLineResolverBase::Inline { // De-serialize the memory data of a Inline. void CopyFrom(const char* raw) { - DESERIALIZE(raw, has_call_site_file_id); + raw = SimpleSerializer::Read(raw, &has_call_site_file_id); DESERIALIZE(raw, inline_nest_level); DESERIALIZE(raw, call_site_line); DESERIALIZE(raw, call_site_file_id); diff --git a/src/processor/logging.h b/src/processor/logging.h index 8c040837f..519c852f5 100644 --- a/src/processor/logging.h +++ b/src/processor/logging.h @@ -57,7 +57,9 @@ #define PROCESSOR_LOGGING_H__ #include +#include #include +#include #include "common/using_std_string.h" #include "google_breakpad/common/breakpad_types.h" @@ -119,9 +121,12 @@ class LogMessageVoidify { }; // Returns number formatted as a hexadecimal string, such as "0x7b". -string HexString(uint32_t number); -string HexString(uint64_t number); -string HexString(int number); +template +string HexString(T number) { + std::stringstream stream; + stream << "0x" << std::hex << number; + return stream.str(); +} // Returns the error code as set in the global errno variable, and sets // error_string, a required argument, to a string describing that error diff --git a/src/processor/map_serializers-inl.h b/src/processor/map_serializers-inl.h index 577b95f7b..5dfb0a752 100644 --- a/src/processor/map_serializers-inl.h +++ b/src/processor/map_serializers-inl.h @@ -54,7 +54,7 @@ template size_t StdMapSerializer::SizeOf( const std::map& m) const { size_t size = 0; - size_t header_size = (1 + m.size()) * sizeof(uint32_t); + size_t header_size = (1 + m.size()) * sizeof(uint64_t); size += header_size; typename std::map::const_iterator iter; @@ -76,19 +76,19 @@ char* StdMapSerializer::Write(const std::map& m, // Write header: // Number of nodes. - dest = SimpleSerializer::Write(m.size(), dest); + dest = SimpleSerializer::Write(m.size(), dest); // Nodes offsets. - uint32_t* offsets = reinterpret_cast(dest); - dest += sizeof(uint32_t) * m.size(); + uint64_t* offsets = reinterpret_cast(dest); + dest += sizeof(uint64_t) * m.size(); char* key_address = dest; dest += sizeof(Key) * m.size(); // Traverse map. typename std::map::const_iterator iter; - int index = 0; + int64_t index = 0; for (iter = m.begin(); iter != m.end(); ++iter, ++index) { - offsets[index] = static_cast(dest - start_address); + offsets[index] = static_cast(dest - start_address); key_address = key_serializer_.Write(iter->first, key_address); dest = value_serializer_.Write(iter->second, dest); } @@ -97,9 +97,9 @@ char* StdMapSerializer::Write(const std::map& m, template char* StdMapSerializer::Serialize( - const std::map& m, unsigned int* size) const { + const std::map& m, uint64_t* size) const { // Compute size of memory to be allocated. - unsigned int size_to_alloc = SizeOf(m); + uint64_t size_to_alloc = SizeOf(m); // Allocate memory. char* serialized_data = new char[size_to_alloc]; if (!serialized_data) { @@ -118,7 +118,7 @@ template size_t RangeMapSerializer::SizeOf( const RangeMap& m) const { size_t size = 0; - size_t header_size = (1 + m.map_.size()) * sizeof(uint32_t); + size_t header_size = (1 + m.map_.size()) * sizeof(uint64_t); size += header_size; typename std::map::const_iterator iter; @@ -144,19 +144,19 @@ char* RangeMapSerializer::Write( // Write header: // Number of nodes. - dest = SimpleSerializer::Write(m.map_.size(), dest); + dest = SimpleSerializer::Write(m.map_.size(), dest); // Nodes offsets. - uint32_t* offsets = reinterpret_cast(dest); - dest += sizeof(uint32_t) * m.map_.size(); + uint64_t* offsets = reinterpret_cast(dest); + dest += sizeof(uint64_t) * m.map_.size(); char* key_address = dest; dest += sizeof(Address) * m.map_.size(); // Traverse map. typename std::map::const_iterator iter; - int index = 0; + int64_t index = 0; for (iter = m.map_.begin(); iter != m.map_.end(); ++iter, ++index) { - offsets[index] = static_cast(dest - start_address); + offsets[index] = static_cast(dest - start_address); key_address = address_serializer_.Write(iter->first, key_address); dest = address_serializer_.Write(iter->second.base(), dest); dest = entry_serializer_.Write(iter->second.entry(), dest); @@ -166,9 +166,9 @@ char* RangeMapSerializer::Write( template char* RangeMapSerializer::Serialize( - const RangeMap& m, unsigned int* size) const { + const RangeMap& m, uint64_t* size) const { // Compute size of memory to be allocated. - unsigned int size_to_alloc = SizeOf(m); + uint64_t size_to_alloc = SizeOf(m); // Allocate memory. char* serialized_data = new char[size_to_alloc]; if (!serialized_data) { @@ -191,12 +191,12 @@ size_t ContainedRangeMapSerializer::SizeOf( size_t size = 0; size_t header_size = addr_serializer_.SizeOf(m->base_) + entry_serializer_.SizeOf(m->entry_) - + sizeof(uint32_t); + + sizeof(uint64_t); size += header_size; // In case m.map_ == NULL, we treat it as an empty map: - size += sizeof(uint32_t); + size += sizeof(uint64_t); if (m->map_) { - size += m->map_->size() * sizeof(uint32_t); + size += m->map_->size() * sizeof(uint64_t); typename Map::const_iterator iter; for (iter = m->map_->begin(); iter != m->map_->end(); ++iter) { size += addr_serializer_.SizeOf(iter->first); @@ -215,27 +215,27 @@ char* ContainedRangeMapSerializer::Write( return NULL; } dest = addr_serializer_.Write(m->base_, dest); - dest = SimpleSerializer::Write(entry_serializer_.SizeOf(m->entry_), + dest = SimpleSerializer::Write(entry_serializer_.SizeOf(m->entry_), dest); dest = entry_serializer_.Write(m->entry_, dest); // Write map<: char* map_address = dest; if (m->map_ == NULL) { - dest = SimpleSerializer::Write(0, dest); + dest = SimpleSerializer::Write(0, dest); } else { - dest = SimpleSerializer::Write(m->map_->size(), dest); - uint32_t* offsets = reinterpret_cast(dest); - dest += sizeof(uint32_t) * m->map_->size(); + dest = SimpleSerializer::Write(m->map_->size(), dest); + uint64_t* offsets = reinterpret_cast(dest); + dest += sizeof(uint64_t) * m->map_->size(); char* key_address = dest; dest += sizeof(AddrType) * m->map_->size(); // Traverse map. typename Map::const_iterator iter; - int index = 0; + int64_t index = 0; for (iter = m->map_->begin(); iter != m->map_->end(); ++iter, ++index) { - offsets[index] = static_cast(dest - map_address); + offsets[index] = static_cast(dest - map_address); key_address = addr_serializer_.Write(iter->first, key_address); // Recursively write. dest = Write(iter->second, dest); @@ -246,8 +246,8 @@ char* ContainedRangeMapSerializer::Write( template char* ContainedRangeMapSerializer::Serialize( - const ContainedRangeMap* m, unsigned int* size) const { - unsigned int size_to_alloc = SizeOf(m); + const ContainedRangeMap* m, uint64_t* size) const { + uint64_t size_to_alloc = SizeOf(m); // Allocating memory. char* serialized_data = new char[size_to_alloc]; if (!serialized_data) { diff --git a/src/processor/map_serializers.h b/src/processor/map_serializers.h index 54153f8a2..933ff6fde 100644 --- a/src/processor/map_serializers.h +++ b/src/processor/map_serializers.h @@ -65,7 +65,7 @@ class StdMapSerializer { // Returns a pointer to the serialized data. If size != NULL, *size is set // to the size of serialized data, i.e., SizeOf(m). // Caller has the ownership of memory allocated as "new char[]". - char* Serialize(const std::map& m, unsigned int* size) const; + char* Serialize(const std::map& m, uint64_t* size) const; private: SimpleSerializer key_serializer_; @@ -93,7 +93,7 @@ class AddressMapSerializer { // Returns a pointer to the serialized data. If size != NULL, *size is set // to the size of serialized data, i.e., SizeOf(m). // Caller has the ownership of memory allocated as "new char[]". - char* Serialize(const AddressMap& m, unsigned int* size) const { + char* Serialize(const AddressMap& m, uint64_t* size) const { return std_map_serializer_.Serialize(m.map_, size); } @@ -120,7 +120,7 @@ class RangeMapSerializer { // Returns a pointer to the serialized data. If size != NULL, *size is set // to the size of serialized data, i.e., SizeOf(m). // Caller has the ownership of memory allocated as "new char[]". - char* Serialize(const RangeMap& m, unsigned int* size) const; + char* Serialize(const RangeMap& m, uint64_t* size) const; private: // Convenient type name for Range. @@ -151,7 +151,7 @@ class ContainedRangeMapSerializer { // to the size of serialized data, i.e., SizeOf(m). // Caller has the ownership of memory allocated as "new char[]". char* Serialize(const ContainedRangeMap* m, - unsigned int* size) const; + uint64_t* size) const; private: // Convenient type name for the underlying map type. diff --git a/src/processor/map_serializers_unittest.cc b/src/processor/map_serializers_unittest.cc index cd31ddc83..855c4dae1 100644 --- a/src/processor/map_serializers_unittest.cc +++ b/src/processor/map_serializers_unittest.cc @@ -31,6 +31,7 @@ // // Author: Siyang Xie (lambxsy@google.com) +#include #ifdef HAVE_CONFIG_H #include // Must come first #endif @@ -49,8 +50,8 @@ #include "processor/range_map-inl.h" #include "processor/contained_range_map-inl.h" -typedef int32_t AddrType; -typedef int32_t EntryType; +typedef int64_t AddrType; +typedef int64_t EntryType; class TestStdMapSerializer : public ::testing::Test { protected: @@ -65,13 +66,13 @@ class TestStdMapSerializer : public ::testing::Test { std::map std_map_; google_breakpad::StdMapSerializer serializer_; - uint32_t serialized_size_; + uint64_t serialized_size_; char* serialized_data_; }; TEST_F(TestStdMapSerializer, EmptyMapTestCase) { - const int32_t correct_data[] = { 0 }; - uint32_t correct_size = sizeof(correct_data); + const int64_t correct_data[] = { 0 }; + uint64_t correct_size = sizeof(correct_data); // std_map_ is empty. serialized_data_ = serializer_.Serialize(std_map_, &serialized_size_); @@ -81,17 +82,17 @@ TEST_F(TestStdMapSerializer, EmptyMapTestCase) { } TEST_F(TestStdMapSerializer, MapWithTwoElementsTestCase) { - const int32_t correct_data[] = { + const int64_t correct_data[] = { // # of nodes 2, // Offsets - 20, 24, + 40, 48, // Keys 1, 3, // Values 2, 6 }; - uint32_t correct_size = sizeof(correct_data); + uint64_t correct_size = sizeof(correct_data); std_map_.insert(std::make_pair(1, 2)); std_map_.insert(std::make_pair(3, 6)); @@ -103,17 +104,17 @@ TEST_F(TestStdMapSerializer, MapWithTwoElementsTestCase) { } TEST_F(TestStdMapSerializer, MapWithFiveElementsTestCase) { - const int32_t correct_data[] = { + const int64_t correct_data[] = { // # of nodes 5, // Offsets - 44, 48, 52, 56, 60, + 88, 96, 104, 112, 120, // Keys 1, 2, 3, 4, 5, // Values 11, 12, 13, 14, 15 }; - uint32_t correct_size = sizeof(correct_data); + uint64_t correct_size = sizeof(correct_data); for (int i = 1; i < 6; ++i) std_map_.insert(std::make_pair(i, 10 + i)); @@ -137,13 +138,13 @@ class TestAddressMapSerializer : public ::testing::Test { google_breakpad::AddressMap address_map_; google_breakpad::AddressMapSerializer serializer_; - uint32_t serialized_size_; + uint64_t serialized_size_; char* serialized_data_; }; TEST_F(TestAddressMapSerializer, EmptyMapTestCase) { - const int32_t correct_data[] = { 0 }; - uint32_t correct_size = sizeof(correct_data); + const int64_t correct_data[] = { 0 }; + uint64_t correct_size = sizeof(correct_data); // std_map_ is empty. serialized_data_ = serializer_.Serialize(address_map_, &serialized_size_); @@ -153,17 +154,17 @@ TEST_F(TestAddressMapSerializer, EmptyMapTestCase) { } TEST_F(TestAddressMapSerializer, MapWithTwoElementsTestCase) { - const int32_t correct_data[] = { + const int64_t correct_data[] = { // # of nodes 2, // Offsets - 20, 24, + 40, 48, // Keys 1, 3, // Values 2, 6 }; - uint32_t correct_size = sizeof(correct_data); + uint64_t correct_size = sizeof(correct_data); address_map_.Store(1, 2); address_map_.Store(3, 6); @@ -175,17 +176,17 @@ TEST_F(TestAddressMapSerializer, MapWithTwoElementsTestCase) { } TEST_F(TestAddressMapSerializer, MapWithFourElementsTestCase) { - const int32_t correct_data[] = { + const int64_t correct_data[] = { // # of nodes 4, // Offsets - 36, 40, 44, 48, + 72, 80, 88, 96, // Keys -6, -4, 8, 123, // Values 2, 3, 5, 8 }; - uint32_t correct_size = sizeof(correct_data); + uint64_t correct_size = sizeof(correct_data); address_map_.Store(-6, 2); address_map_.Store(-4, 3); @@ -212,13 +213,13 @@ class TestRangeMapSerializer : public ::testing::Test { google_breakpad::RangeMap range_map_; google_breakpad::RangeMapSerializer serializer_; - uint32_t serialized_size_; + uint64_t serialized_size_; char* serialized_data_; }; TEST_F(TestRangeMapSerializer, EmptyMapTestCase) { - const int32_t correct_data[] = { 0 }; - uint32_t correct_size = sizeof(correct_data); + const int64_t correct_data[] = { 0 }; + uint64_t correct_size = sizeof(correct_data); // range_map_ is empty. serialized_data_ = serializer_.Serialize(range_map_, &serialized_size_); @@ -228,17 +229,17 @@ TEST_F(TestRangeMapSerializer, EmptyMapTestCase) { } TEST_F(TestRangeMapSerializer, MapWithOneRangeTestCase) { - const int32_t correct_data[] = { + const int64_t correct_data[] = { // # of nodes 1, // Offsets - 12, + 24, // Keys: high address 10, // Values: (low address, entry) pairs 1, 6 }; - uint32_t correct_size = sizeof(correct_data); + uint64_t correct_size = sizeof(correct_data); range_map_.StoreRange(1, 10, 6); @@ -249,17 +250,17 @@ TEST_F(TestRangeMapSerializer, MapWithOneRangeTestCase) { } TEST_F(TestRangeMapSerializer, MapWithThreeRangesTestCase) { - const int32_t correct_data[] = { + const int64_t correct_data[] = { // # of nodes 3, // Offsets - 28, 36, 44, + 56, 72, 88, // Keys: high address - 5, 9, 20, + 5, 9, 20, // Values: (low address, entry) pairs - 2, 1, 6, 2, 10, 3 + 2, 1, 6, 2, 10, 3 }; - uint32_t correct_size = sizeof(correct_data); + uint64_t correct_size = sizeof(correct_data); ASSERT_TRUE(range_map_.StoreRange(2, 4, 1)); ASSERT_TRUE(range_map_.StoreRange(6, 4, 2)); @@ -285,18 +286,18 @@ class TestContainedRangeMapSerializer : public ::testing::Test { google_breakpad::ContainedRangeMap crm_map_; google_breakpad::ContainedRangeMapSerializer serializer_; - uint32_t serialized_size_; + uint64_t serialized_size_; char* serialized_data_; }; TEST_F(TestContainedRangeMapSerializer, EmptyMapTestCase) { - const int32_t correct_data[] = { + const int64_t correct_data[] = { 0, // base address of root - 4, // size of entry + 8, // size of entry 0, // entry stored at root 0 // empty map stored at root }; - uint32_t correct_size = sizeof(correct_data); + uint64_t correct_size = sizeof(correct_data); // crm_map_ is empty. serialized_data_ = serializer_.Serialize(&crm_map_, &serialized_size_); @@ -306,21 +307,21 @@ TEST_F(TestContainedRangeMapSerializer, EmptyMapTestCase) { } TEST_F(TestContainedRangeMapSerializer, MapWithOneRangeTestCase) { - const int32_t correct_data[] = { + const int64_t correct_data[] = { 0, // base address of root - 4, // size of entry + 8, // size of entry 0, // entry stored at root // Map stored at root node: 1, // # of nodes - 12, // offset + 24, // offset 9, // key // value: a child ContainedRangeMap 3, // base address of child CRM - 4, // size of entry + 8, // size of entry -1, // entry stored in child CRM 0 // empty sub-map stored in child CRM }; - uint32_t correct_size = sizeof(correct_data); + uint64_t correct_size = sizeof(correct_data); crm_map_.StoreRange(3, 7, -1); @@ -342,27 +343,27 @@ TEST_F(TestContainedRangeMapSerializer, MapWithTwoLevelsTestCase) { // / \ | // 3~4 6~7 16-20 level 2: grandchild1, grandchild2, grandchild3 - const int32_t correct_data[] = { + const int64_t correct_data[] = { // root: base, entry_size, entry - 0, 4, 0, + 0, 8, 0, // root's map: # of nodes, offset1, offset2, key1, key2 - 2, 20, 84, 8, 20, + 2, 40, 168, 8, 20, // child1: base, entry_size, entry: - 2, 4, -1, + 2, 8, -1, // child1's map: # of nodes, offset1, offset2, key1, key2 - 2, 20, 36, 4, 7, + 2, 40, 72, 4, 7, // grandchild1: base, entry_size, entry, empty_map - 3, 4, -1, 0, + 3, 8, -1, 0, // grandchild2: base, entry_size, entry, empty_map - 6, 4, -1, 0, + 6, 8, -1, 0, // child2: base, entry_size, entry: - 10, 4, -1, + 10, 8, -1, // child2's map: # of nodes, offset1, key1 - 1, 12, 20, + 1, 24, 20, // grandchild3: base, entry_size, entry, empty_map - 16, 4, -1, 0 + 16, 8, -1, 0 }; - uint32_t correct_size = sizeof(correct_data); + uint64_t correct_size = sizeof(correct_data); // Store child1. ASSERT_TRUE(crm_map_.StoreRange(2, 7, -1)); diff --git a/src/processor/range_map-inl.h b/src/processor/range_map-inl.h index d1a194f31..e38fe50a7 100644 --- a/src/processor/range_map-inl.h +++ b/src/processor/range_map-inl.h @@ -250,7 +250,7 @@ bool RangeMap::RetrieveNearestRange( template bool RangeMap::RetrieveRangeAtIndex( - int index, EntryType* entry, AddressType* entry_base, + int64_t index, EntryType* entry, AddressType* entry_base, AddressType* entry_delta, AddressType* entry_size) const { BPLOG_IF(ERROR, !entry) << "RangeMap::RetrieveRangeAtIndex requires |entry|"; assert(entry); @@ -263,7 +263,7 @@ bool RangeMap::RetrieveRangeAtIndex( // Walk through the map. Although it's ordered, it's not a vector, so it // can't be addressed directly by index. MapConstIterator iterator = map_.begin(); - for (int this_index = 0; this_index < index; ++this_index) + for (int64_t this_index = 0; this_index < index; ++this_index) ++iterator; *entry = iterator->second.entry(); @@ -279,8 +279,8 @@ bool RangeMap::RetrieveRangeAtIndex( template -int RangeMap::GetCount() const { - return static_cast(map_.size()); +int64_t RangeMap::GetCount() const { + return static_cast(map_.size()); } diff --git a/src/processor/range_map.h b/src/processor/range_map.h index 578bd1442..1499ff296 100644 --- a/src/processor/range_map.h +++ b/src/processor/range_map.h @@ -40,6 +40,7 @@ #define PROCESSOR_RANGE_MAP_H__ +#include #include @@ -109,12 +110,12 @@ class RangeMap { // entry was shrunk down (original start address was increased by delta). // // RetrieveRangeAtIndex is not optimized for speedy operation. - bool RetrieveRangeAtIndex(int index, EntryType* entry, + bool RetrieveRangeAtIndex(int64_t index, EntryType* entry, AddressType* entry_base, AddressType* entry_delta, AddressType* entry_size) const; // Returns the number of ranges stored in the RangeMap. - int GetCount() const; + int64_t GetCount() const; // Empties the range map, restoring it to the state it was when it was // initially created. diff --git a/src/processor/range_map_unittest.cc b/src/processor/range_map_unittest.cc index 8735bb095..1bf9af24e 100644 --- a/src/processor/range_map_unittest.cc +++ b/src/processor/range_map_unittest.cc @@ -332,11 +332,11 @@ static bool RetrieveIndexTest(TestMap* range_map, int set) { return true; } -// Additional RetriveAtIndex test to expose the bug in RetrieveRangeAtIndex(). +// Additional RetrieveAtIndex test to expose the bug in RetrieveRangeAtIndex(). // Bug info: RetrieveRangeAtIndex() previously retrieves the high address of // entry, however, it is supposed to retrieve the base address of entry as // stated in the comment in range_map.h. -static bool RetriveAtIndexTest2() { +static bool RetrieveAtIndexTest2() { scoped_ptr range_map(new TestMap()); // Store ranges with base address = 2 * object_id: @@ -360,7 +360,7 @@ static bool RetriveAtIndexTest2() { int expected_base = 2 * object->id(); if (base != expected_base) { - fprintf(stderr, "FAILED: RetriveAtIndexTest2 index %d, " + fprintf(stderr, "FAILED: RetrieveAtIndexTest2 index %d, " "expected base %d, observed base %d", object_index, expected_base, base); return false; @@ -504,7 +504,7 @@ static bool RunTests() { // The RangeMap's own count of objects should also match. if (range_map->GetCount() != stored_count) { fprintf(stderr, "FAILED: stored object count doesn't match GetCount, " - "expected %d, observed %d\n", + "expected %d, observed %ld\n", stored_count, range_map->GetCount()); return false; @@ -542,7 +542,7 @@ static bool RunTests() { } } - if (!RetriveAtIndexTest2()) { + if (!RetrieveAtIndexTest2()) { fprintf(stderr, "FAILED: did not pass RetrieveAtIndexTest2()\n"); return false; } diff --git a/src/processor/static_address_map_unittest.cc b/src/processor/static_address_map_unittest.cc index aebab976c..2e8461675 100644 --- a/src/processor/static_address_map_unittest.cc +++ b/src/processor/static_address_map_unittest.cc @@ -124,7 +124,7 @@ class TestStaticAddressMap : public ::testing::Test { srand(time(0)); for (int data_item = 0; data_item < testsize[testcase]; ++data_item) { - // Retrive (aka, search) for target address and compare results from + // Retrieve (aka, search) for target address and compare results from // AddressMap and StaticAddressMap. // First, assign the search target to be one of original testdata that is diff --git a/src/processor/static_contained_range_map-inl.h b/src/processor/static_contained_range_map-inl.h index 60606ddc6..589bce4bf 100644 --- a/src/processor/static_contained_range_map-inl.h +++ b/src/processor/static_contained_range_map-inl.h @@ -45,7 +45,7 @@ template StaticContainedRangeMap::StaticContainedRangeMap( const char *base) : base_(*(reinterpret_cast(base))), - entry_size_(*(reinterpret_cast(base + sizeof(base_)))), + entry_size_(*(reinterpret_cast(base + sizeof(base_)))), entry_ptr_(reinterpret_cast( base + sizeof(base_) + sizeof(entry_size_))), map_(base + sizeof(base_) + sizeof(entry_size_) + entry_size_) { diff --git a/src/processor/static_contained_range_map.h b/src/processor/static_contained_range_map.h index 86e54666d..eea03db72 100644 --- a/src/processor/static_contained_range_map.h +++ b/src/processor/static_contained_range_map.h @@ -86,7 +86,7 @@ class StaticContainedRangeMap { // actually contain an entry, so its entry_ field is meaningless. For // this reason, the entry_ field should only be accessed on child // ContainedRangeMap objects, and never on |this|. - uint32_t entry_size_; + uint64_t entry_size_; const EntryType *entry_ptr_; // The map containing child ranges, keyed by each child range's high diff --git a/src/processor/static_contained_range_map_unittest.cc b/src/processor/static_contained_range_map_unittest.cc index d0507a4b1..c6d78400d 100644 --- a/src/processor/static_contained_range_map_unittest.cc +++ b/src/processor/static_contained_range_map_unittest.cc @@ -31,6 +31,7 @@ // // Author: Siyang Xie (lambxsy@google.com) +#include #ifdef HAVE_CONFIG_H #include // Must come first #endif @@ -161,7 +162,7 @@ class TestStaticCRMMap : public ::testing::Test { protected: void SetUp(); - // A referrence map for testing StaticCRMMap. + // A reference map for testing StaticCRMMap. google_breakpad::ContainedRangeMap crm_map_; // Static version of crm_map using serialized data of crm_map. @@ -177,7 +178,7 @@ class TestStaticCRMMap : public ::testing::Test { void TestStaticCRMMap::SetUp() { // First, do the StoreRange tests. This validates the containment // rules. - // We confirm the referrence map correctly stores data during setup. + // We confirm the reference map correctly stores data during setup. ASSERT_TRUE (crm_map_.StoreRange(10, 10, 1)); ASSERT_FALSE(crm_map_.StoreRange(10, 10, 2)); // exactly equal to 1 ASSERT_FALSE(crm_map_.StoreRange(11, 10, 3)); // begins inside 1 and extends up @@ -228,7 +229,7 @@ void TestStaticCRMMap::SetUp() { ASSERT_FALSE(crm_map_.StoreRange(86, 2, 48)); // Serialize crm_map to generate serialized data. - unsigned int size; + uint64_t size; serialized_data_.reset(serializer_.Serialize(&crm_map_, &size)); BPLOG(INFO) << "Serialized data size: " << size << " Bytes."; @@ -239,12 +240,12 @@ void TestStaticCRMMap::SetUp() { TEST_F(TestStaticCRMMap, TestEmptyMap) { CRMMap empty_crm_map; - unsigned int size; + uint64_t size; scoped_array serialized_data; serialized_data.reset(serializer_.Serialize(&empty_crm_map, &size)); scoped_ptr test_map(new TestMap(serialized_data.get())); - const unsigned int kCorrectSizeForEmptyMap = 16; + const unsigned int kCorrectSizeForEmptyMap = 24; ASSERT_EQ(kCorrectSizeForEmptyMap, size); const int *entry_test; @@ -259,12 +260,12 @@ TEST_F(TestStaticCRMMap, TestSingleElementMap) { int entry = 1; crm_map.StoreRange(10, 10, entry); - unsigned int size; + uint64_t size; scoped_array serialized_data; serialized_data.reset(serializer_.Serialize(&crm_map, &size)); scoped_ptr test_map(new TestMap(serialized_data.get())); - const unsigned int kCorrectSizeForSingleElementMap = 40; + const unsigned int kCorrectSizeForSingleElementMap = 60; ASSERT_EQ(kCorrectSizeForSingleElementMap, size); const int *entry_test; @@ -283,7 +284,7 @@ TEST_F(TestStaticCRMMap, TestRetrieveRangeEntries) { crm_map.StoreRange(2, 6, 1); crm_map.StoreRange(2, 7, 2); - unsigned int size; + uint64_t size; scoped_array serialized_data; serialized_data.reset(serializer_.Serialize(&crm_map, &size)); scoped_ptr test_map(new TestMap(serialized_data.get())); diff --git a/src/processor/static_map-inl.h b/src/processor/static_map-inl.h index f9929efe9..2e827b9f7 100644 --- a/src/processor/static_map-inl.h +++ b/src/processor/static_map-inl.h @@ -47,23 +47,23 @@ StaticMap::StaticMap(const char* raw_data) : raw_data_(raw_data), compare_() { // First 4 Bytes store the number of nodes. - num_nodes_ = *(reinterpret_cast(raw_data_)); + num_nodes_ = *(reinterpret_cast(raw_data_)); - offsets_ = reinterpret_cast( + offsets_ = reinterpret_cast( raw_data_ + sizeof(num_nodes_)); keys_ = reinterpret_cast( - raw_data_ + (1 + num_nodes_) * sizeof(uint32_t)); + raw_data_ + (1 + num_nodes_) * sizeof(uint64_t)); } // find(), lower_bound() and upper_bound() implement binary search algorithm. template StaticMapIterator StaticMap::find(const Key& key) const { - int begin = 0; - int end = num_nodes_; - int middle; - int compare_result; + int64_t begin = 0; + int64_t end = num_nodes_; + int64_t middle; + int64_t compare_result; while (begin < end) { middle = begin + (end - begin) / 2; compare_result = compare_(key, GetKeyAtIndex(middle)); @@ -81,10 +81,10 @@ StaticMap::find(const Key& key) const { template StaticMapIterator StaticMap::lower_bound(const Key& key) const { - int begin = 0; - int end = num_nodes_; - int middle; - int comp_result; + int64_t begin = 0; + int64_t end = num_nodes_; + int64_t middle; + int64_t comp_result; while (begin < end) { middle = begin + (end - begin) / 2; comp_result = compare_(key, GetKeyAtIndex(middle)); @@ -102,10 +102,10 @@ StaticMap::lower_bound(const Key& key) const { template StaticMapIterator StaticMap::upper_bound(const Key& key) const { - int begin = 0; - int end = num_nodes_; - int middle; - int compare_result; + int64_t begin = 0; + int64_t end = num_nodes_; + int64_t middle; + int64_t compare_result; while (begin < end) { middle = begin + (end - begin) / 2; compare_result = compare_(key, GetKeyAtIndex(middle)); @@ -124,22 +124,22 @@ template bool StaticMap::ValidateInMemoryStructure() const { // check the number of nodes is non-negative: if (!raw_data_) return false; - int32_t num_nodes = *(reinterpret_cast(raw_data_)); + int64_t num_nodes = *(reinterpret_cast(raw_data_)); if (num_nodes < 0) { BPLOG(INFO) << "StaticMap check failed: negative number of nodes"; return false; } - int node_index = 0; + int64_t node_index = 0; if (num_nodes_) { - uint64_t first_offset = sizeof(int32_t) * (num_nodes_ + 1) + uint64_t first_offset = sizeof(int64_t) * (num_nodes_ + 1) + sizeof(Key) * num_nodes_; // Num_nodes_ is too large. if (first_offset > 0xffffffffUL) { BPLOG(INFO) << "StaticMap check failed: size exceeds limit"; return false; } - if (offsets_[node_index] != static_cast(first_offset)) { + if (offsets_[node_index] != static_cast(first_offset)) { BPLOG(INFO) << "StaticMap check failed: first node offset is incorrect"; return false; } @@ -162,7 +162,7 @@ bool StaticMap::ValidateInMemoryStructure() const { } template -const Key StaticMap::GetKeyAtIndex(int index) const { +const Key StaticMap::GetKeyAtIndex(int64_t index) const { if (index < 0 || index >= num_nodes_) { BPLOG(ERROR) << "Key index out of range error"; // Key type is required to be primitive type. Return 0 if index is invalid. diff --git a/src/processor/static_map.h b/src/processor/static_map.h index a8f495820..be2c7d55a 100644 --- a/src/processor/static_map.h +++ b/src/processor/static_map.h @@ -34,11 +34,11 @@ // // The chunk of memory should contain data with pre-defined pattern: // **************** header *************** -// uint32 (4 bytes): number of nodes -// uint32 (4 bytes): address offset of node1's mapped_value -// uint32 (4 bytes): address offset of node2's mapped_value +// int64 (8 bytes): number of nodes +// uint64 (8 bytes): address offset of node1's mapped_value +// uint64 (8 bytes): address offset of node2's mapped_value // ... -// uint32 (4 bytes): address offset of nodeN's mapped_value +// uint64 (8 bytes): address offset of nodeN's mapped_value // // ************* Key array ************ // (X bytes): node1's key @@ -54,9 +54,6 @@ // // REQUIREMENT: Key type MUST be primitive type or pointers so that: // X = sizeof(typename Key); -// -// Note: since address offset is stored as uint32, user should keep in mind that -// StaticMap only supports up to 4GB size of memory data. // Author: Siyang Xie (lambxsy@google.com) @@ -72,7 +69,7 @@ namespace google_breakpad { template class DefaultCompare { public: - int operator()(const Key& k1, const Key& k2) const { + int64_t operator()(const Key& k1, const Key& k2) const { if (k1 < k2) return -1; if (k1 == k2) return 0; return 1; @@ -93,13 +90,13 @@ class StaticMap { explicit StaticMap(const char* raw_data); inline bool empty() const { return num_nodes_ == 0; } - inline unsigned int size() const { return num_nodes_; } + inline uint64_t size() const { return num_nodes_; } // Return iterators. inline iterator begin() const { return IteratorAtIndex(0); } inline iterator last() const { return IteratorAtIndex(num_nodes_ - 1); } inline iterator end() const { return IteratorAtIndex(num_nodes_); } - inline iterator IteratorAtIndex(int index) const { + inline iterator IteratorAtIndex(int64_t index) const { return iterator(raw_data_, index); } @@ -120,18 +117,18 @@ class StaticMap { bool ValidateInMemoryStructure() const; private: - const Key GetKeyAtIndex(int i) const; + const Key GetKeyAtIndex(int64_t i) const; // Start address of a raw memory chunk with serialized data. const char* raw_data_; // Number of nodes in the static map. - int32_t num_nodes_; + int64_t num_nodes_; // Array of offset addresses for stored values. // For example: // address_of_i-th_node_value = raw_data_ + offsets_[i] - const uint32_t* offsets_; + const uint64_t* offsets_; // keys_[i] = key of i_th node const Key* keys_; diff --git a/src/processor/static_map_iterator-inl.h b/src/processor/static_map_iterator-inl.h index 01a1b7f7b..e24a70963 100644 --- a/src/processor/static_map_iterator-inl.h +++ b/src/processor/static_map_iterator-inl.h @@ -43,12 +43,12 @@ namespace google_breakpad { template StaticMapIterator::StaticMapIterator(const char* base, - const int& index): + int64_t index): index_(index), base_(base) { // See static_map.h for documentation on // bytes format of serialized StaticMap data. - num_nodes_ = *(reinterpret_cast(base_)); - offsets_ = reinterpret_cast(base_ + sizeof(num_nodes_)); + num_nodes_ = *(reinterpret_cast(base_)); + offsets_ = reinterpret_cast(base_ + sizeof(num_nodes_)); keys_ = reinterpret_cast( base_ + (1 + num_nodes_) * sizeof(num_nodes_)); } diff --git a/src/processor/static_map_iterator.h b/src/processor/static_map_iterator.h index 6c190e975..8cfe9e080 100644 --- a/src/processor/static_map_iterator.h +++ b/src/processor/static_map_iterator.h @@ -87,21 +87,21 @@ class StaticMapIterator { friend class StaticMap; // Only StaticMap can call this constructor. - explicit StaticMapIterator(const char* base, const int32_t& index); + explicit StaticMapIterator(const char* base, int64_t index); // Index of node that the iterator is pointing to. - int32_t index_; + int64_t index_; // Beginning address of the serialized map data. const char* base_; // Number of nodes in the map. Use it to identify end() iterator. - int32_t num_nodes_; + int64_t num_nodes_; // offsets_ is an array of offset addresses of mapped values. // For example: // address_of_i-th_node_value = base_ + offsets_[i] - const uint32_t* offsets_; + const uint64_t* offsets_; // keys_[i] = key of i_th node. const Key* keys_; diff --git a/src/processor/static_map_unittest.cc b/src/processor/static_map_unittest.cc index 67b201b63..1fbb9a498 100644 --- a/src/processor/static_map_unittest.cc +++ b/src/processor/static_map_unittest.cc @@ -30,6 +30,7 @@ // // Author: Siyang Xie (lambxsy@google.com) +#include #ifdef HAVE_CONFIG_H #include // Must come first #endif @@ -52,8 +53,8 @@ class SimpleMapSerializer { static char* Serialize(const std::map& stdmap, unsigned int* size = NULL) { unsigned int size_per_node = - sizeof(uint32_t) + sizeof(Key) + sizeof(Value); - unsigned int memsize = sizeof(int32_t) + size_per_node * stdmap.size(); + sizeof(uint64_t) + sizeof(Key) + sizeof(Value); + unsigned int memsize = sizeof(int64_t) + size_per_node * stdmap.size(); if (size) *size = memsize; // Allocate memory for serialized data: @@ -61,12 +62,12 @@ class SimpleMapSerializer { char* address = mem; // Writer the number of nodes: - new (address) uint32_t(static_cast(stdmap.size())); - address += sizeof(uint32_t); + new (address) uint64_t(static_cast(stdmap.size())); + address += sizeof(uint64_t); // Nodes' offset: - uint32_t* offsets = reinterpret_cast(address); - address += sizeof(uint32_t) * stdmap.size(); + uint64_t* offsets = reinterpret_cast(address); + address += sizeof(uint64_t) * stdmap.size(); // Keys: Key* keys = reinterpret_cast(address); @@ -98,16 +99,16 @@ class TestInvalidMap : public ::testing::Test { }; TEST_F(TestInvalidMap, TestNegativeNumberNodes) { - memset(data, 0xff, sizeof(uint32_t)); // Set the number of nodes = -1 + memset(data, 0xff, sizeof(uint64_t)); // Set the number of nodes = -1 test_map = TestMap(data); ASSERT_FALSE(test_map.ValidateInMemoryStructure()); } TEST_F(TestInvalidMap, TestWrongOffsets) { - uint32_t* header = reinterpret_cast(data); - const uint32_t kNumNodes = 2; - const uint32_t kHeaderOffset = - sizeof(uint32_t) + kNumNodes * (sizeof(uint32_t) + sizeof(KeyType)); + uint64_t* header = reinterpret_cast(data); + const uint64_t kNumNodes = 2; + const uint64_t kHeaderOffset = + sizeof(uint64_t) + kNumNodes * (sizeof(uint64_t) + sizeof(KeyType)); header[0] = kNumNodes; header[1] = kHeaderOffset + 3; // Wrong offset for first node @@ -121,16 +122,16 @@ TEST_F(TestInvalidMap, TestWrongOffsets) { } TEST_F(TestInvalidMap, TestUnSortedKeys) { - uint32_t* header = reinterpret_cast(data); - const uint32_t kNumNodes = 2; - const uint32_t kHeaderOffset = - sizeof(uint32_t) + kNumNodes * (sizeof(uint32_t) + sizeof(KeyType)); + uint64_t* header = reinterpret_cast(data); + const uint64_t kNumNodes = 2; + const uint64_t kHeaderOffset = + sizeof(uint64_t) + kNumNodes * (sizeof(uint64_t) + sizeof(KeyType)); header[0] = kNumNodes; header[1] = kHeaderOffset; header[2] = kHeaderOffset + sizeof(ValueType); KeyType* keys = reinterpret_cast( - data + (kNumNodes + 1) * sizeof(uint32_t)); + data + (kNumNodes + 1) * sizeof(uint64_t)); // Set keys in non-increasing order. keys[0] = 10; keys[1] = 7; @@ -174,10 +175,10 @@ class TestValidMap : public ::testing::Test { // Set correct size of memory allocation for each test case. unsigned int size_per_node = - sizeof(uint32_t) + sizeof(KeyType) + sizeof(ValueType); + sizeof(uint64_t) + sizeof(KeyType) + sizeof(ValueType); for (testcase = 0; testcase < kNumberTestCases; ++testcase) { correct_size[testcase] = - sizeof(uint32_t) + std_map[testcase].size() * size_per_node; + sizeof(uint64_t) + std_map[testcase].size() * size_per_node; } } diff --git a/src/processor/static_range_map-inl.h b/src/processor/static_range_map-inl.h index b0a327479..3ffeec975 100644 --- a/src/processor/static_range_map-inl.h +++ b/src/processor/static_range_map-inl.h @@ -102,7 +102,7 @@ bool StaticRangeMap::RetrieveNearestRange( template bool StaticRangeMap::RetrieveRangeAtIndex( - int index, const EntryType*& entry, + int64_t index, const EntryType*& entry, AddressType* entry_base, AddressType* entry_size) const { if (index >= GetCount()) { diff --git a/src/processor/static_range_map.h b/src/processor/static_range_map.h index 319085db2..c63d2fa94 100644 --- a/src/processor/static_range_map.h +++ b/src/processor/static_range_map.h @@ -73,12 +73,12 @@ class StaticRangeMap { // range. // // RetrieveRangeAtIndex is not optimized for speedy operation. - bool RetrieveRangeAtIndex(int index, const EntryType*& entry, + bool RetrieveRangeAtIndex(int64_t index, const EntryType*& entry, AddressType* entry_base, AddressType* entry_size) const; // Returns the number of ranges stored in the RangeMap. - inline int GetCount() const { return map_.size(); } + inline int64_t GetCount() const { return map_.size(); } private: friend class ModuleComparer; From a1169061a81adc88a326261294b0af9d53caf18a Mon Sep 17 00:00:00 2001 From: Leonard Grey Date: Fri, 16 Feb 2024 12:08:14 -0500 Subject: [PATCH 12/37] Fix bad pointer comparison Bug: None Change-Id: I7f3709ee7e8b7e9e938850b1bbe24925e3e03c9b Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5300127 Reviewed-by: Mark Mentovai --- src/common/mac/dump_syms.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/mac/dump_syms.cc b/src/common/mac/dump_syms.cc index c3ea1b82c..3d3413a2b 100644 --- a/src/common/mac/dump_syms.cc +++ b/src/common/mac/dump_syms.cc @@ -394,7 +394,7 @@ bool DumpSymbols::CreateEmptyModule(scoped_ptr& module) { // In certain cases, it is possible that architecture info can't be reliably // determined, e.g. new architectures that breakpad is unware of. In that // case, avoid crashing and return false instead. - if (selected_arch_name == kUnknownArchName) { + if (strcmp(selected_arch_name, kUnknownArchName) == 0) { return false; } From b48a2d4a8ed40024d38502db5aeabd3ab6876687 Mon Sep 17 00:00:00 2001 From: Leonard Grey Date: Fri, 16 Feb 2024 12:19:44 -0500 Subject: [PATCH 13/37] Fix string format specifier Split out from https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5300127/1..4/src/processor/range_map_unittest.cc#b507 Bug: None Change-Id: Iedc8508b6c123a54fdd1de2e2719dcd70adb03a6 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5300128 Reviewed-by: Ivan Penkov --- src/processor/range_map.h | 2 +- src/processor/range_map_unittest.cc | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/processor/range_map.h b/src/processor/range_map.h index 1499ff296..05c659758 100644 --- a/src/processor/range_map.h +++ b/src/processor/range_map.h @@ -40,7 +40,7 @@ #define PROCESSOR_RANGE_MAP_H__ -#include +#include #include diff --git a/src/processor/range_map_unittest.cc b/src/processor/range_map_unittest.cc index 1bf9af24e..568df3673 100644 --- a/src/processor/range_map_unittest.cc +++ b/src/processor/range_map_unittest.cc @@ -35,6 +35,7 @@ #include // Must come first #endif +#include #include #include @@ -504,7 +505,7 @@ static bool RunTests() { // The RangeMap's own count of objects should also match. if (range_map->GetCount() != stored_count) { fprintf(stderr, "FAILED: stored object count doesn't match GetCount, " - "expected %d, observed %ld\n", + "expected %d, observed %" PRId64 "\n", stored_count, range_map->GetCount()); return false; From f032e4c3b4eaef64432482cb00eb0c3df33cde48 Mon Sep 17 00:00:00 2001 From: Ivan Penkov Date: Wed, 21 Feb 2024 15:38:10 -0800 Subject: [PATCH 14/37] Addressing a potential integer underflow in minidump.cc and stackwalker_arm64.cc Also, defining __STDC_FORMAT_MACROS before including Change-Id: Ia25c4353412ca70512efef5e98670687ab575750 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5310977 Reviewed-by: Joshua Peraza --- src/processor/minidump.cc | 8 +++++++- src/processor/proc_maps_linux.cc | 3 ++- src/processor/range_map_unittest.cc | 4 ++++ src/processor/stackwalker_arm64.cc | 7 ++++++- 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/processor/minidump.cc b/src/processor/minidump.cc index 83e5a8688..3de36784e 100644 --- a/src/processor/minidump.cc +++ b/src/processor/minidump.cc @@ -32,6 +32,11 @@ // // Author: Mark Mentovai +// For PRI* macros, before anything else might #include it. +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif /* __STDC_FORMAT_MACROS */ + #ifdef HAVE_CONFIG_H #include // Must come first #endif @@ -39,6 +44,7 @@ #include "google_breakpad/processor/minidump.h" #include +#include #include #include #include @@ -820,7 +826,7 @@ bool MinidumpContext::Read(uint32_t expected_size) { // Context may include xsave registers and so be larger than // sizeof(MDRawContextX86). For now we skip this extended data. if (context_flags & MD_CONTEXT_X86_XSTATE) { - size_t bytes_left = expected_size - sizeof(MDRawContextX86); + int64_t bytes_left = expected_size - sizeof(MDRawContextX86); if (bytes_left > kMaxXSaveAreaSize) { BPLOG(ERROR) << "MinidumpContext oversized xstate area"; return false; diff --git a/src/processor/proc_maps_linux.cc b/src/processor/proc_maps_linux.cc index 6fcb909a1..5234633e6 100644 --- a/src/processor/proc_maps_linux.cc +++ b/src/processor/proc_maps_linux.cc @@ -2,9 +2,10 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +// For PRI* macros, before anything else might #include it. #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS -#endif +#endif /* __STDC_FORMAT_MACROS */ #ifdef HAVE_CONFIG_H #include // Must come first diff --git a/src/processor/range_map_unittest.cc b/src/processor/range_map_unittest.cc index 568df3673..f8e3dfbe5 100644 --- a/src/processor/range_map_unittest.cc +++ b/src/processor/range_map_unittest.cc @@ -30,6 +30,10 @@ // // Author: Mark Mentovai +// For PRI* macros, before anything else might #include it. +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif /* __STDC_FORMAT_MACROS */ #ifdef HAVE_CONFIG_H #include // Must come first diff --git a/src/processor/stackwalker_arm64.cc b/src/processor/stackwalker_arm64.cc index 0fa02e04f..8ab22ba94 100644 --- a/src/processor/stackwalker_arm64.cc +++ b/src/processor/stackwalker_arm64.cc @@ -36,6 +36,7 @@ #include // Must come first #endif +#include #include #include "common/scoped_ptr.h" @@ -269,11 +270,15 @@ void StackwalkerARM64::CorrectRegLRByFramePointer( // Searching for a real callee frame. Skipping inline frames since they // don't contain context (and cannot be downcasted to StackFrameARM64). - size_t last_frame_callee_id = frames.size() - 2; + int64_t last_frame_callee_id = frames.size() - 2; while (last_frame_callee_id >= 0 && frames[last_frame_callee_id]->trust == StackFrame::FRAME_TRUST_INLINE) { last_frame_callee_id--; } + // last_frame_callee_id should not become negative because at the top of the + // stack trace we always have a context frame (FRAME_TRUST_CONTEXT) so the + // above loop should end before last_frame_callee_id gets negative. But we are + // being extra defensive here and bail if it ever becomes negative. if (last_frame_callee_id < 0) return; StackFrameARM64* last_frame_callee = static_cast(frames[last_frame_callee_id]); From 26666b80bac3254787976bb7be52db3f4a303663 Mon Sep 17 00:00:00 2001 From: Nathan Moinvaziri Date: Fri, 8 Mar 2024 18:19:33 -0800 Subject: [PATCH 15/37] Check for failed seek when skipping x86 xstate data After reading minidump context with x86 xstate data, we seek past it, so there needs to be a check to see that the seek operation did not fail. Change-Id: I8ed4394e452c435234116d97fd65856345cb618a Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5324600 Reviewed-by: Mike Frysinger --- src/processor/minidump.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/processor/minidump.cc b/src/processor/minidump.cc index 3de36784e..22dce7626 100644 --- a/src/processor/minidump.cc +++ b/src/processor/minidump.cc @@ -903,8 +903,12 @@ bool MinidumpContext::Read(uint32_t expected_size) { // Skip extended xstate data if present in X86 context. if (context_flags & MD_CONTEXT_X86_XSTATE) { - minidump_->SeekSet((minidump_->Tell() - sizeof(MDRawContextX86)) + - expected_size); + if (!minidump_->SeekSet( + (minidump_->Tell() - sizeof(MDRawContextX86)) + + expected_size)) { + BPLOG(ERROR) << "MinidumpContext cannot seek to past xstate data"; + return false; + } } break; From 76788faa4ef163081f82273bfca7fae8a734b971 Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Tue, 12 Mar 2024 13:25:00 -0700 Subject: [PATCH 16/37] Print better error messages in parts of CompilationUnit Change CompilationUnit::SkipAttribute() and CompilationUnit::ProcessOffsetBaseAttribute() to print the unknown form types they encounter. Along the way, fix various formatting issues, stray trailing spaces, and update some NULLs to nullptrs. Change-Id: I5b3e72c9c6c9cb31e8a930e54418adb74b02f6c2 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5366242 Reviewed-by: Joshua Peraza --- src/common/dwarf/dwarf2reader.cc | 24 ++++++++++----------- src/common/dwarf/dwarf2reader.h | 36 ++++++++++++++++---------------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/common/dwarf/dwarf2reader.cc b/src/common/dwarf/dwarf2reader.cc index 8a8bf6f86..51228f745 100644 --- a/src/common/dwarf/dwarf2reader.cc +++ b/src/common/dwarf/dwarf2reader.cc @@ -285,8 +285,8 @@ const uint8_t* CompilationUnit::SkipAttribute(const uint8_t* start, case DW_FORM_sec_offset: return start + reader_->OffsetSize(); } - fprintf(stderr,"Unhandled form type"); - return NULL; + fprintf(stderr,"Unhandled form type 0x%x\n", form); + return nullptr; } // Read the abbreviation offset from a compilation unit header. @@ -369,7 +369,7 @@ void CompilationUnit::ReadHeader() { break; case DW_UT_type: case DW_UT_split_type: - is_type_unit_ = true; + is_type_unit_ = true; headerptr += ReadTypeSignature(headerptr); headerptr += ReadTypeOffset(headerptr); break; @@ -512,7 +512,7 @@ const uint8_t* CompilationUnit::ProcessOffsetBaseAttribute( &len)); start += len; return ProcessOffsetBaseAttribute(dieoffset, start, attr, form, - implicit_const); + implicit_const); case DW_FORM_flag_present: return start; @@ -568,10 +568,10 @@ const uint8_t* CompilationUnit::ProcessOffsetBaseAttribute( // offset size. assert(header_.version >= 2); if (header_.version == 2) { - reader_->ReadAddress(start); + reader_->ReadAddress(start); return start + reader_->AddressSize(); } else if (header_.version >= 3) { - reader_->ReadOffset(start); + reader_->ReadOffset(start); return start + reader_->OffsetSize(); } break; @@ -647,8 +647,8 @@ const uint8_t* CompilationUnit::ProcessOffsetBaseAttribute( reader_->ReadUnsignedLEB128(start, &len); return start + len; } - fprintf(stderr, "Unhandled form type\n"); - return NULL; + fprintf(stderr,"Unhandled form type 0x%x\n", form); + return nullptr; } // If one really wanted, you could merge SkipAttribute and @@ -896,11 +896,11 @@ const uint8_t* CompilationUnit::ProcessDIE(uint64_t dieoffset, uint64_t dieoffset_copy = dieoffset; const uint8_t* start_copy = start; for (AttributeList::const_iterator i = abbrev.attributes.begin(); - i != abbrev.attributes.end(); - i++) { + i != abbrev.attributes.end(); + i++) { start_copy = ProcessOffsetBaseAttribute(dieoffset_copy, start_copy, - i->attr_, i->form_, - i->value_); + i->attr_, i->form_, + i->value_); } } diff --git a/src/common/dwarf/dwarf2reader.h b/src/common/dwarf/dwarf2reader.h index b6bd2f31a..c023211e7 100644 --- a/src/common/dwarf/dwarf2reader.h +++ b/src/common/dwarf/dwarf2reader.h @@ -570,10 +570,10 @@ class CompilationUnit { // Special version of ProcessAttribute, for finding str_offsets_base and // DW_AT_addr_base in DW_TAG_compile_unit, for DWARF v5. const uint8_t* ProcessOffsetBaseAttribute(uint64_t dieoffset, - const uint8_t* start, - enum DwarfAttribute attr, - enum DwarfForm form, - uint64_t implicit_const); + const uint8_t* start, + enum DwarfAttribute attr, + enum DwarfForm form, + uint64_t implicit_const); // Called when we have an attribute with unsigned data to give to // our handler. The attribute is for the DIE at OFFSET from the @@ -909,11 +909,11 @@ class DwpReader { // // For example, here is a complete (uncompressed) table describing the // function above: -// +// // insn cfa r0 r1 ... ra // ======================================= // func+0: sp cfa[0] -// func+1: sp+16 cfa[0] +// func+1: sp+16 cfa[0] // func+2: sp+16 cfa[-4] cfa[0] // func+11: sp+20 cfa[-4] cfa[0] // func+21: sp+20 cfa[0] @@ -947,7 +947,7 @@ class DwpReader { // save them, caller-saves registers are probably dead in the caller // anyway, so compilers usually don't generate CFA for caller-saves // registers.) -// +// // - Exactly where the CFA points is a matter of convention that // depends on the architecture and ABI in use. In the example, the // CFA is the value the stack pointer had upon entry to the @@ -968,7 +968,7 @@ class DwpReader { // reduces the size of the data by mentioning only the addresses and // columns at which changes take place. So for the above, DWARF CFI // data would only actually mention the following: -// +// // insn cfa r0 r1 ... ra // ======================================= // func+0: sp cfa[0] @@ -976,7 +976,7 @@ class DwpReader { // func+2: cfa[-4] // func+11: sp+20 // func+21: r0 -// func+22: sp +// func+22: sp // // In fact, this is the way the parser reports CFI to the consumer: as // a series of statements of the form, "At address X, column Y changed @@ -1094,7 +1094,7 @@ class CallFrameInfo { // handling are described here, rather poorly: // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html - // + // // The mechanics of C++ exception handling, personality routines, // and language-specific data areas are described here, rather nicely: // http://www.codesourcery.com/public/cxx-abi/abi-eh.html @@ -1127,7 +1127,7 @@ class CallFrameInfo { // The start of this entry in the buffer. const uint8_t* start; - + // Which kind of entry this is. // // We want to be able to use this for error reporting even while we're @@ -1161,13 +1161,13 @@ class CallFrameInfo { struct CIE: public Entry { uint8_t version; // CFI data version number string augmentation; // vendor format extension markers - uint64_t code_alignment_factor; // scale for code address adjustments + uint64_t code_alignment_factor; // scale for code address adjustments int data_alignment_factor; // scale for stack pointer adjustments unsigned return_address_register; // which register holds the return addr // True if this CIE includes Linux C++ ABI 'z' augmentation data. bool has_z_augmentation; - + // Parsed 'z' augmentation data. These are meaningful only if // has_z_augmentation is true. bool has_z_lsda; // The 'z' augmentation included 'L'. @@ -1221,7 +1221,7 @@ class CallFrameInfo { class ValExpressionRule; class RuleMap; class State; - + // Parse the initial length and id of a CFI entry, either a CIE, an FDE, // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the // data to parse. On success, populate ENTRY as appropriate, and return @@ -1308,7 +1308,7 @@ class CallFrameInfo::Handler { // Immediately after a call to Entry, the handler should assume that // the rule for each callee-saves register is "unchanged" --- that // is, that the register still has the value it had in the caller. - // + // // If a *Rule function returns true, we continue processing this entry's // instructions. If a *Rule function returns false, we stop evaluating // instructions, and skip to the next entry. Either way, we call End @@ -1497,13 +1497,13 @@ class CallFrameInfo::Reporter { // The instruction at INSN_OFFSET in the entry at OFFSET, of kind // KIND, establishes a rule that cites the CFA, but we have not // established a CFA rule yet. - virtual void NoCFARule(uint64_t offset, CallFrameInfo::EntryKind kind, + virtual void NoCFARule(uint64_t offset, CallFrameInfo::EntryKind kind, uint64_t insn_offset); // The instruction at INSN_OFFSET in the entry at OFFSET, of kind // KIND, is a DW_CFA_restore_state instruction, but the stack of // saved states is empty. - virtual void EmptyStateStack(uint64_t offset, CallFrameInfo::EntryKind kind, + virtual void EmptyStateStack(uint64_t offset, CallFrameInfo::EntryKind kind, uint64_t insn_offset); // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry @@ -1511,7 +1511,7 @@ class CallFrameInfo::Reporter { // rule, whereas the current state does have a CFA rule. This is // bogus input, which the CallFrameInfo::Handler interface doesn't // (and shouldn't) have any way to report. - virtual void ClearingCFARule(uint64_t offset, CallFrameInfo::EntryKind kind, + virtual void ClearingCFARule(uint64_t offset, CallFrameInfo::EntryKind kind, uint64_t insn_offset); protected: From a86123f1e7cc27d0e3de185467b3495cf0851f54 Mon Sep 17 00:00:00 2001 From: Joshua Peraza Date: Tue, 19 Mar 2024 09:04:19 -0700 Subject: [PATCH 17/37] Fix some undefined behavior It is undefined behavior to access mis-aligned pointers. Change-Id: I06f676c6a4b92a4bb58db76b8f23750d18148940 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5380937 Reviewed-by: Ivan Penkov --- src/third_party/libdisasm/ia32_modrm.c | 9 +++++++-- src/third_party/libdisasm/x86_imm.c | 25 ++----------------------- 2 files changed, 9 insertions(+), 25 deletions(-) diff --git a/src/third_party/libdisasm/ia32_modrm.c b/src/third_party/libdisasm/ia32_modrm.c index b0fe2ed3d..c80c1b95e 100644 --- a/src/third_party/libdisasm/ia32_modrm.c +++ b/src/third_party/libdisasm/ia32_modrm.c @@ -2,6 +2,9 @@ #include "ia32_reg.h" #include "x86_imm.h" +#include +#include + /* NOTE: when decoding ModR/M and SIB, we have to add 1 to all register * values obtained from decoding the ModR/M or SIB byte, since they * are encoded with eAX = 0 and the tables in ia32_reg.c use eAX = 1. @@ -72,16 +75,18 @@ static unsigned int imm32_signsized( unsigned char *buf, size_t buf_len, return 0; } + int16_t local_short; switch (size) { case 1: *dest = *((signed char *) buf); break; case 2: - *dest = *((signed short *) buf); + memcpy(&local_short, buf, 2); + *dest = local_short; break; case 4: default: - *dest = *((signed int *) buf); + memcpy(dest, buf, 4); break; } diff --git a/src/third_party/libdisasm/x86_imm.c b/src/third_party/libdisasm/x86_imm.c index cd59bfc9a..11c8a7de9 100644 --- a/src/third_party/libdisasm/x86_imm.c +++ b/src/third_party/libdisasm/x86_imm.c @@ -2,36 +2,15 @@ #include "x86_imm.h" #include +#include unsigned int x86_imm_signsized( unsigned char * buf, size_t buf_len, void *dest, unsigned int size ) { - signed char *cp = (signed char *) dest; - signed short *sp = (signed short *) dest; - int32_t *lp = (int32_t *) dest; - qword_t *qp = (qword_t *) dest; - if ( size > buf_len ) { return 0; } - /* Copy 'size' bytes from *buf to *op - * return number of bytes copied */ - switch (size) { - case 1: /* BYTE */ - *cp = *((signed char *) buf); - break; - case 2: /* WORD */ - *sp = *((signed short *) buf); - break; - case 6: - case 8: /* QWORD */ - *qp = *((qword_t *) buf); - break; - case 4: /* DWORD */ - default: - *lp = *((int32_t *) buf); - break; - } + memcpy(dest, buf, size); return (size); } From e5dc1f86b2defc8f76b56fb473b804e42e75c41d Mon Sep 17 00:00:00 2001 From: Joshua Peraza Date: Wed, 27 Mar 2024 13:46:24 -0700 Subject: [PATCH 18/37] Reduce logging during minidump processing Change-Id: Ie62795137770cff7cda7494c5527457b1e355897 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5402921 Reviewed-by: Ivan Penkov --- src/processor/minidump_processor.cc | 2 -- src/processor/stackwalker_amd64.cc | 1 - src/processor/stackwalker_arm64.cc | 2 -- 3 files changed, 5 deletions(-) diff --git a/src/processor/minidump_processor.cc b/src/processor/minidump_processor.cc index 5d2dea6d8..c8d4d4c9a 100644 --- a/src/processor/minidump_processor.cc +++ b/src/processor/minidump_processor.cc @@ -823,8 +823,6 @@ static void CalculateFaultAddressFromInstruction(Minidump* dump, DisassemblerObjdump disassembler(context->GetContextCPU(), memory_region, instruction_ptr); - fprintf(stderr, "%s %s %s\n", disassembler.operation().c_str(), - disassembler.src().c_str(), disassembler.dest().c_str()); if (!disassembler.IsValid()) { BPLOG(INFO) << "Disassembling fault instruction failed."; return; diff --git a/src/processor/stackwalker_amd64.cc b/src/processor/stackwalker_amd64.cc index b934e73b4..f3d7f119e 100644 --- a/src/processor/stackwalker_amd64.cc +++ b/src/processor/stackwalker_amd64.cc @@ -147,7 +147,6 @@ StackFrameAMD64* StackwalkerAMD64::GetCallerByCFIFrameInfo( return NULL; if (!frame->context.rip || !frame->context.rsp) { - BPLOG(ERROR) << "invalid rip/rsp"; return NULL; } diff --git a/src/processor/stackwalker_arm64.cc b/src/processor/stackwalker_arm64.cc index 8ab22ba94..6c3168ecc 100644 --- a/src/processor/stackwalker_arm64.cc +++ b/src/processor/stackwalker_arm64.cc @@ -289,8 +289,6 @@ void StackwalkerARM64::CorrectRegLRByFramePointer( uint64_t last_fp = 0; if (last_frame_callee_fp && !memory_->GetMemoryAtAddress(last_frame_callee_fp, &last_fp)) { - BPLOG(ERROR) << "Unable to read last_fp from last_last_fp: 0x" << std::hex - << last_frame_callee_fp; return; } // Give up if STACK CFI doesn't agree with frame pointer. From a8a43bad6fdb2d63b623862bea8f6bc959d8edc9 Mon Sep 17 00:00:00 2001 From: Sim Sun Date: Wed, 17 Apr 2024 16:52:25 -0700 Subject: [PATCH 19/37] Prefer to use .note.gnu.build-id section as ElfBuildId Summary: Android prefers `.note.gnu-build-id`[0] as BuildId than anyother PT_NOTE phdrs. This CL tries to read ElfBuildId from `.note.gnu.build-id` first to fix the BuildId mismatch issue between Android tombstone report and breakpad symbol file. e.g ``` Section Headers: [Nr] Name Type Address Offset Size EntSize Flags Link Info Align [ 0] NULL 0000000000000000 00000000 0000000000000000 0000000000000000 0 0 0 [ 1] .note.androi[...] NOTE 0000000000000270 00000270 0000000000000018 0000000000000000 A 0 0 4 [ 2] .note.hwasan[...] NOTE 0000000000000288 00000288 000000000000001c 0000000000000000 A 0 0 4 [ 3] .note.gnu.bu[...] NOTE 00000000000002a4 000002a4 0000000000000020 0000000000000000 A 0 0 4 $ readelf -x .note.hwasan.globals Hex dump of section '.note.hwasan.globals': 0x00000288 08000000 08000000 03000000 4c4c564d ............LLVM 0x00000298 00000000 98890100 409d0100 ........@... $ readelf -x .note.gnu.build-id Hex dump of section '.note.gnu.build-id': 0x000002a4 04000000 10000000 03000000 474e5500 ............GNU. 0x000002b4 a4eb3625 c1db6452 1e881973 bfdff9bd ..6%..dR...s.... ``` The BuildId in tombstone: ``` libartbase.so (BuildId: c7e463b51b0898d442269a421353bdbd) ``` But the breakpad dump_syms got: ``` MODULE Linux arm64 000189989D40000100000000000000000 libartbase.so INFO CODE_ID 98890100409D0100 ````` [0] https://cs.android.com/android/platform/superproject/main/+/main:system/unwinding/libunwindstack/ElfInterface.cpp;l=423-427;drc=3d19fbcc09b1b44928639b06cd0b88f735cd988d Change-Id: I01e3514e0e2a1ea163c03093055284448ed4e89c Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5463566 Reviewed-by: Joshua Peraza --- src/common/linux/file_id.cc | 14 +++++----- src/common/linux/file_id_unittest.cc | 39 ++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/src/common/linux/file_id.cc b/src/common/linux/file_id.cc index d8fcbd8d6..296531608 100644 --- a/src/common/linux/file_id.cc +++ b/src/common/linux/file_id.cc @@ -99,6 +99,13 @@ static bool ElfClassBuildIDNoteIdentifier(const void* section, size_t length, // and copy it into |identifier|. static bool FindElfBuildIDNote(const void* elf_mapped_base, wasteful_vector& identifier) { + void* note_section; + size_t note_size; + if (FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE, + (const void**)¬e_section, ¬e_size)) { + return ElfClassBuildIDNoteIdentifier(note_section, note_size, identifier); + } + PageAllocator allocator; // lld normally creates 2 PT_NOTEs, gold normally creates 1. auto_wasteful_vector segs(&allocator); @@ -110,13 +117,6 @@ static bool FindElfBuildIDNote(const void* elf_mapped_base, } } - void* note_section; - size_t note_size; - if (FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE, - (const void**)¬e_section, ¬e_size)) { - return ElfClassBuildIDNoteIdentifier(note_section, note_size, identifier); - } - return false; } diff --git a/src/common/linux/file_id_unittest.cc b/src/common/linux/file_id_unittest.cc index 0ef453532..7be239f60 100644 --- a/src/common/linux/file_id_unittest.cc +++ b/src/common/linux/file_id_unittest.cc @@ -326,6 +326,45 @@ TYPED_TEST(FileIDTest, BuildIDMultiplePH) { EXPECT_EQ(expected_identifier_string, identifier_string); } +TYPED_TEST(FileIDTest, BuildIDMultiplePHPreferGNU) { + const uint8_t kExpectedIdentifierBytes[] = + {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x10, 0x11, 0x12, 0x13}; + const string expected_identifier_string = + this->get_file_id(kExpectedIdentifierBytes); + + ELF elf(EM_386, TypeParam::kClass, kLittleEndian); + Section text(kLittleEndian); + text.Append(4096, 0); + elf.AddSection(".text", text, SHT_PROGBITS); + Notes notes1(kLittleEndian); + notes1.AddNote(0, "Linux", + reinterpret_cast("\0x42\0x02\0\0"), 4); + Notes notes2(kLittleEndian); + notes2.AddNote(NT_GNU_BUILD_ID, "GNU", + reinterpret_cast("\0x42\0x02\0\0"), 4); + Notes notes3(kLittleEndian); + notes3.AddNote(NT_GNU_BUILD_ID, "GNU", kExpectedIdentifierBytes, + sizeof(kExpectedIdentifierBytes)); + int note1_idx = elf.AddSection(".note1", notes1, SHT_NOTE); + int note2_idx = elf.AddSection(".note2", notes2, SHT_NOTE); + int note3_idx = elf.AddSection(".note.gnu.build-id", notes3, SHT_NOTE); + elf.AddSegment(note1_idx, note1_idx, PT_NOTE); + elf.AddSegment(note2_idx, note2_idx, PT_NOTE); + elf.AddSegment(note3_idx, note3_idx, PT_NOTE); + elf.Finish(); + this->GetElfContents(elf); + + id_vector identifier(this->make_vector()); + EXPECT_TRUE(FileID::ElfFileIdentifierFromMappedFile(this->elfdata, + identifier)); + EXPECT_EQ(sizeof(kExpectedIdentifierBytes), identifier.size()); + + string identifier_string = FileID::ConvertIdentifierToUUIDString(identifier); + EXPECT_EQ(expected_identifier_string, identifier_string); +} + // Test to make sure two files with different text sections produce // different hashes when not using a build id. TYPED_TEST(FileIDTest, UniqueHashes) { From f88a1aa2af1d1fd795716365245761b89041139d Mon Sep 17 00:00:00 2001 From: Konstantin Mandrika Date: Wed, 24 Apr 2024 12:48:55 -0400 Subject: [PATCH 20/37] Use zlib's uncompress() to uncompress headers Change-Id: Ib785633b229d3f17534da9b0de93255e80fddd70 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5484086 Reviewed-by: Joshua Peraza --- src/common/linux/dump_symbols.cc | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/src/common/linux/dump_symbols.cc b/src/common/linux/dump_symbols.cc index b693fc9e1..8e3b97420 100644 --- a/src/common/linux/dump_symbols.cc +++ b/src/common/linux/dump_symbols.cc @@ -315,31 +315,17 @@ uint32_t GetCompressionHeader( std::pair UncompressZlibSectionContents( const uint8_t* compressed_buffer, uint64_t compressed_size, uint64_t uncompressed_size) { - z_stream stream; - memset(&stream, 0, sizeof stream); - - stream.avail_in = compressed_size; - stream.avail_out = uncompressed_size; - stream.next_in = const_cast(compressed_buffer); - google_breakpad::scoped_array uncompressed_buffer( new uint8_t[uncompressed_size]); - int status = inflateInit(&stream); - while (stream.avail_in != 0 && status == Z_OK) { - stream.next_out = - uncompressed_buffer.get() + uncompressed_size - stream.avail_out; + uLongf size = static_cast(uncompressed_size); - if ((status = inflate(&stream, Z_FINISH)) != Z_STREAM_END) { - break; - } - - status = inflateReset(&stream); - } + int status = uncompress( + uncompressed_buffer.get(), &size, compressed_buffer, compressed_size); - return inflateEnd(&stream) != Z_OK || status != Z_OK || stream.avail_out != 0 - ? std::make_pair(nullptr, 0) - : std::make_pair(uncompressed_buffer.release(), uncompressed_size); + return status != Z_OK + ? std::make_pair(nullptr, 0) + : std::make_pair(uncompressed_buffer.release(), uncompressed_size); } #ifdef HAVE_LIBZSTD From 6fe410dcd5ddacec9554e8c217f3c37d0362da21 Mon Sep 17 00:00:00 2001 From: Sean Carpenter Date: Tue, 7 May 2024 16:01:37 -0700 Subject: [PATCH 21/37] Adds a Flag to Perserve Address Offset in dump_syms By default, the .code section start address is normalized to zero, and subsequently the address of all functions and lines are adjusted accordingly. This change adds a command line flag that perserves the original addresses. This is a requirement for those that may wish to use the crash resolver, but do not have a minidump file to work with (most commonly in firmware). TEST=make check BUG=b:328511413 Change-Id: Id7c477196ffed7fd319029b28ed2607192249c6c Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5519154 Reviewed-by: Rob Barnes Reviewed-by: Ivan Penkov Reviewed-by: Ivan Penkov --- src/common/linux/dump_symbols.cc | 2 +- src/common/linux/dump_symbols.h | 7 ++-- src/common/linux/dump_symbols_unittest.cc | 6 ++-- src/common/module.cc | 23 +++++++----- src/common/module.h | 5 +-- src/common/module_unittest.cc | 43 +++++++++++++++++++++++ src/tools/linux/dump_syms/dump_syms.cc | 7 +++- 7 files changed, 76 insertions(+), 17 deletions(-) diff --git a/src/common/linux/dump_symbols.cc b/src/common/linux/dump_symbols.cc index 8e3b97420..2877c9ca8 100644 --- a/src/common/linux/dump_symbols.cc +++ b/src/common/linux/dump_symbols.cc @@ -1280,7 +1280,7 @@ bool WriteSymbolFile(const string& load_path, &module)) return false; - bool result = module->Write(sym_stream, options.symbol_data); + bool result = module->Write(sym_stream, options.symbol_data, options.preserve_load_address); delete module; return result; } diff --git a/src/common/linux/dump_symbols.h b/src/common/linux/dump_symbols.h index f1802ecc3..d38ba6fe9 100644 --- a/src/common/linux/dump_symbols.h +++ b/src/common/linux/dump_symbols.h @@ -48,14 +48,17 @@ class Module; struct DumpOptions { DumpOptions(SymbolData symbol_data, bool handle_inter_cu_refs, - bool enable_multiple_field) + bool enable_multiple_field, + bool preserve_load_address) : symbol_data(symbol_data), handle_inter_cu_refs(handle_inter_cu_refs), - enable_multiple_field(enable_multiple_field) {} + enable_multiple_field(enable_multiple_field), + preserve_load_address(preserve_load_address) {} SymbolData symbol_data; bool handle_inter_cu_refs; bool enable_multiple_field; + bool preserve_load_address; }; // Find all the debugging information in OBJ_FILE, an ELF executable diff --git a/src/common/linux/dump_symbols_unittest.cc b/src/common/linux/dump_symbols_unittest.cc index 55dcdeed8..df3cca529 100644 --- a/src/common/linux/dump_symbols_unittest.cc +++ b/src/common/linux/dump_symbols_unittest.cc @@ -95,7 +95,7 @@ TYPED_TEST(DumpSymbols, Invalid) { Elf32_Ehdr header; memset(&header, 0, sizeof(header)); Module* module; - DumpOptions options(ALL_SYMBOL_DATA, true, false); + DumpOptions options(ALL_SYMBOL_DATA, true, false, false); EXPECT_FALSE(ReadSymbolDataInternal(reinterpret_cast(&header), "foo", "Linux", @@ -132,7 +132,7 @@ TYPED_TEST(DumpSymbols, SimplePublic) { this->GetElfContents(elf); Module* module; - DumpOptions options(ALL_SYMBOL_DATA, true, false); + DumpOptions options(ALL_SYMBOL_DATA, true, false, false); EXPECT_TRUE(ReadSymbolDataInternal(this->elfdata, "foo", "Linux", @@ -189,7 +189,7 @@ TYPED_TEST(DumpSymbols, SimpleBuildID) { this->GetElfContents(elf); Module* module; - DumpOptions options(ALL_SYMBOL_DATA, true, false); + DumpOptions options(ALL_SYMBOL_DATA, true, false, false); EXPECT_TRUE(ReadSymbolDataInternal(this->elfdata, "foo", "Linux", diff --git a/src/common/module.cc b/src/common/module.cc index b6f5da7e6..6f81b5119 100644 --- a/src/common/module.cc +++ b/src/common/module.cc @@ -380,7 +380,7 @@ bool Module::AddressIsInModule(Address address) const { return false; } -bool Module::Write(std::ostream& stream, SymbolData symbol_data) { +bool Module::Write(std::ostream& stream, SymbolData symbol_data, bool preserve_load_address) { stream << "MODULE " << os_ << " " << architecture_ << " " << id_ << " " << name_ << "\n"; if (!stream.good()) @@ -390,6 +390,13 @@ bool Module::Write(std::ostream& stream, SymbolData symbol_data) { stream << "INFO CODE_ID " << code_id_ << "\n"; } + // load_address is subtracted from each line. If we use zero instead, we + // preserve the original addresses present in the ELF binary. + Address load_offset = load_address_; + if (preserve_load_address) { + load_offset = 0; + } + if (symbol_data & SYMBOLS_AND_FILES) { // Get all referenced inline origins. set inline_origins; @@ -406,13 +413,13 @@ bool Module::Write(std::ostream& stream, SymbolData symbol_data) { return ReportError(); } } + // Write out inline origins. for (InlineOrigin* origin : inline_origins) { stream << "INLINE_ORIGIN " << origin->id << " " << origin->name << "\n"; if (!stream.good()) return ReportError(); } - // Write out functions and their inlines and lines. for (FunctionSet::const_iterator func_it = functions_.begin(); func_it != functions_.end(); ++func_it) { @@ -421,7 +428,7 @@ bool Module::Write(std::ostream& stream, SymbolData symbol_data) { for (auto range_it = func->ranges.cbegin(); range_it != func->ranges.cend(); ++range_it) { stream << "FUNC " << (func->is_multiple ? "m " : "") << hex - << (range_it->address - load_address_) << " " << range_it->size + << (range_it->address - load_offset) << " " << range_it->size << " " << func->parameter_size << " " << func->name << dec << "\n"; @@ -434,7 +441,7 @@ bool Module::Write(std::ostream& stream, SymbolData symbol_data) { stream << in->inline_nest_level << " " << in->call_site_line << " " << in->getCallSiteFileID() << " " << in->origin->id << hex; for (const Range& r : in->ranges) - stream << " " << (r.address - load_address_) << " " << r.size; + stream << " " << (r.address - load_offset) << " " << r.size; stream << dec << "\n"; }; Module::Inline::InlineDFS(func->inlines, write_inline); @@ -445,7 +452,7 @@ bool Module::Write(std::ostream& stream, SymbolData symbol_data) { (line_it->address >= range_it->address) && (line_it->address < (range_it->address + range_it->size))) { stream << hex - << (line_it->address - load_address_) << " " + << (line_it->address - load_offset) << " " << line_it->size << " " << dec << line_it->number << " " @@ -464,7 +471,7 @@ bool Module::Write(std::ostream& stream, SymbolData symbol_data) { extern_it != externs_.end(); ++extern_it) { Extern* ext = extern_it->get(); stream << "PUBLIC " << (ext->is_multiple ? "m " : "") << hex - << (ext->address - load_address_) << " 0 " << ext->name << dec + << (ext->address - load_offset) << " 0 " << ext->name << dec << "\n"; } } @@ -475,7 +482,7 @@ bool Module::Write(std::ostream& stream, SymbolData symbol_data) { frame_it != stack_frame_entries_.end(); ++frame_it) { StackFrameEntry* entry = frame_it->get(); stream << "STACK CFI INIT " << hex - << (entry->address - load_address_) << " " + << (entry->address - load_offset) << " " << entry->size << " " << dec; if (!stream.good() || !WriteRuleMap(entry->initial_rules, stream)) @@ -487,7 +494,7 @@ bool Module::Write(std::ostream& stream, SymbolData symbol_data) { for (RuleChangeMap::const_iterator delta_it = entry->rule_changes.begin(); delta_it != entry->rule_changes.end(); ++delta_it) { stream << "STACK CFI " << hex - << (delta_it->first - load_address_) << " " << dec; + << (delta_it->first - load_offset) << " " << dec; if (!stream.good() || !WriteRuleMap(delta_it->second, stream)) return ReportError(); diff --git a/src/common/module.h b/src/common/module.h index 28e8e9c50..69eec36de 100644 --- a/src/common/module.h +++ b/src/common/module.h @@ -421,8 +421,9 @@ class Module { // If symbol_data is CFI then: // - all CFI records. // Addresses in the output are all relative to the load address - // established by SetLoadAddress. - bool Write(std::ostream& stream, SymbolData symbol_data); + // established by SetLoadAddress, unless preserve_load_address + // is equal to true, in which case each address will remain unchanged. + bool Write(std::ostream& stream, SymbolData symbol_data, bool preserve_load_address = false); // Place the name in the global set of strings. Return a StringView points to // a string inside the pool. diff --git a/src/common/module_unittest.cc b/src/common/module_unittest.cc index c51162e52..6b2c49a41 100644 --- a/src/common/module_unittest.cc +++ b/src/common/module_unittest.cc @@ -175,6 +175,49 @@ TEST(Module, WriteRelativeLoadAddress) { contents.c_str()); } +TEST(Module, WritePreserveLoadAddress) { + stringstream s; + Module m(MODULE_NAME, MODULE_OS, MODULE_ARCH, MODULE_ID); + // Set the load address to something. Doesn't matter what. + // The goal of this test is to demonstrate that the load + // address does not impact any of the generated addresses + // when the preserve_load_address option is equal to true. + m.SetLoadAddress(0x1337ULL); + + Module::File* file = m.FindFile("filename-a.cc"); + Module::Function* function = new Module::Function( + "do_stuff", 0x110ULL); + Module::Range range(0x110ULL, 0x210ULL); + function->ranges.push_back(range); + function->parameter_size = 0x50ULL; + Module::Line line1 = { 0x110ULL, 0x1ULL, + file, 20ULL }; + function->lines.push_back(line1); + m.AddFunction(function); + + // Some stack information. + auto entry = std::make_unique(); + entry->address = 0x200ULL; + entry->size = 0x55ULL; + entry->initial_rules[".cfa"] = "some call frame info"; + entry->rule_changes[0x201ULL][".s0"] = + "some rules change call frame info"; + m.AddStackFrameEntry(std::move(entry)); + + bool preserve_load_address = true; + m.Write(s, ALL_SYMBOL_DATA, preserve_load_address); + string contents = s.str(); + EXPECT_STREQ("MODULE os-name architecture id-string name with spaces\n" + "FILE 0 filename-a.cc\n" + "FUNC 110 210 50 do_stuff\n" + "110 1 20 0\n" + "STACK CFI INIT 200 55" + " .cfa: some call frame info\n" + "STACK CFI 201" + " .s0: some rules change call frame info\n", + contents.c_str()); +} + TEST(Module, WriteOmitUnusedFiles) { Module m(MODULE_NAME, MODULE_OS, MODULE_ARCH, MODULE_ID); diff --git a/src/tools/linux/dump_syms/dump_syms.cc b/src/tools/linux/dump_syms/dump_syms.cc index 2fce23c2a..99deb958f 100644 --- a/src/tools/linux/dump_syms/dump_syms.cc +++ b/src/tools/linux/dump_syms/dump_syms.cc @@ -52,6 +52,8 @@ int usage(const char* self) { google_breakpad::BaseName(self).c_str()); fprintf(stderr, "Options:\n"); fprintf(stderr, " -i: Output module header information only.\n"); + fprintf(stderr, " -a Preserve the load address - Do not normalize " + "the load address to zero.\n"); fprintf(stderr, " -c Do not generate CFI section\n"); fprintf(stderr, " -d Generate INLINE/INLINE_ORIGIN records\n"); fprintf(stderr, " -r Do not handle inter-compilation " @@ -69,6 +71,7 @@ int usage(const char* self) { int main(int argc, char** argv) { if (argc < 2) return usage(argv[0]); + bool preserve_load_address = false; bool header_only = false; bool cfi = true; bool handle_inlines = false; @@ -82,6 +85,8 @@ int main(int argc, char** argv) { argv[arg_index][0] == '-') { if (strcmp("-i", argv[arg_index]) == 0) { header_only = true; + } else if (strcmp("-a", argv[arg_index]) == 0) { + preserve_load_address = true; } else if (strcmp("-c", argv[arg_index]) == 0) { cfi = false; } else if (strcmp("-d", argv[arg_index]) == 0) { @@ -143,7 +148,7 @@ int main(int argc, char** argv) { SymbolData symbol_data = (handle_inlines ? INLINES : NO_DATA) | (cfi ? CFI : NO_DATA) | SYMBOLS_AND_FILES; google_breakpad::DumpOptions options(symbol_data, handle_inter_cu_refs, - enable_multiple_field); + enable_multiple_field, preserve_load_address); if (!WriteSymbolFile(binary, obj_name, obj_os, debug_dirs, options, std::cout)) { fprintf(saved_stderr, "Failed to write symbol file.\n"); From 54986d34d4611983243f8401563ff022b649afbf Mon Sep 17 00:00:00 2001 From: Sean Carpenter Date: Wed, 8 May 2024 16:25:28 -0700 Subject: [PATCH 22/37] Adds a custom Module ID flag to dump_syms TEST=make check BUG=b:328511413 Change-Id: I09d4c5e92213501b647311b804e65f272772bbaf Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5523975 Reviewed-by: Ivan Penkov Reviewed-by: Ivan Penkov --- src/common/linux/dump_symbols.cc | 34 +++++++++------ src/common/linux/dump_symbols.h | 3 ++ src/common/linux/dump_symbols_unittest.cc | 52 +++++++++++++++++++++++ src/tools/linux/dump_syms/dump_syms.cc | 13 +++++- 4 files changed, 88 insertions(+), 14 deletions(-) diff --git a/src/common/linux/dump_symbols.cc b/src/common/linux/dump_symbols.cc index 2877c9ca8..846757d4e 100644 --- a/src/common/linux/dump_symbols.cc +++ b/src/common/linux/dump_symbols.cc @@ -1148,6 +1148,7 @@ template bool InitModuleForElfClass(const typename ElfClass::Ehdr* elf_header, const string& obj_filename, const string& obj_os, + const string& module_id, scoped_ptr& module, bool enable_multiple_field) { PageAllocator allocator; @@ -1171,10 +1172,14 @@ bool InitModuleForElfClass(const typename ElfClass::Ehdr* elf_header, ? name_buf : google_breakpad::BaseName(obj_filename); - // Add an extra "0" at the end. PDB files on Windows have an 'age' - // number appended to the end of the file identifier; this isn't - // really used or necessary on other platforms, but be consistent. - string id = FileID::ConvertIdentifierToUUIDString(identifier) + "0"; + // Use the provided module_id + string id = module_id.empty() + // Add an extra "0" at the end. PDB files on Windows have an 'age' + // number appended to the end of the file identifier; this isn't + // really used or necessary on other platforms, but be consistent. + ? FileID::ConvertIdentifierToUUIDString(identifier) + "0" + : module_id; + // This is just the raw Build ID in hex. string code_id = FileID::ConvertIdentifierToString(identifier); @@ -1188,6 +1193,7 @@ template bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header, const string& obj_filename, const string& obj_os, + const string& module_id, const std::vector& debug_dirs, const DumpOptions& options, Module** out_module) { @@ -1196,8 +1202,8 @@ bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header, *out_module = NULL; scoped_ptr module; - if (!InitModuleForElfClass(elf_header, obj_filename, obj_os, module, - options.enable_multiple_field)) { + if (!InitModuleForElfClass(elf_header, obj_filename, obj_os, module_id, + module, options.enable_multiple_field)) { return false; } @@ -1246,6 +1252,7 @@ namespace google_breakpad { bool ReadSymbolDataInternal(const uint8_t* obj_file, const string& obj_filename, const string& obj_os, + const string& module_id, const std::vector& debug_dirs, const DumpOptions& options, Module** module) { @@ -1258,12 +1265,12 @@ bool ReadSymbolDataInternal(const uint8_t* obj_file, if (elfclass == ELFCLASS32) { return ReadSymbolDataElfClass( reinterpret_cast(obj_file), obj_filename, obj_os, - debug_dirs, options, module); + module_id, debug_dirs, options, module); } if (elfclass == ELFCLASS64) { return ReadSymbolDataElfClass( reinterpret_cast(obj_file), obj_filename, obj_os, - debug_dirs, options, module); + module_id, debug_dirs, options, module); } return false; @@ -1272,11 +1279,12 @@ bool ReadSymbolDataInternal(const uint8_t* obj_file, bool WriteSymbolFile(const string& load_path, const string& obj_file, const string& obj_os, + const string& module_id, const std::vector& debug_dirs, const DumpOptions& options, std::ostream& sym_stream) { Module* module; - if (!ReadSymbolData(load_path, obj_file, obj_os, debug_dirs, options, + if (!ReadSymbolData(load_path, obj_file, obj_os, module_id, debug_dirs, options, &module)) return false; @@ -1291,6 +1299,7 @@ bool WriteSymbolFile(const string& load_path, bool WriteSymbolFileHeader(const string& load_path, const string& obj_file, const string& obj_os, + const string& module_id, std::ostream& sym_stream) { MmapWrapper map_wrapper; void* elf_header = NULL; @@ -1309,14 +1318,14 @@ bool WriteSymbolFileHeader(const string& load_path, if (elfclass == ELFCLASS32) { if (!InitModuleForElfClass( reinterpret_cast(elf_header), obj_file, obj_os, - module, /*enable_multiple_field=*/false)) { + module_id, module, /*enable_multiple_field=*/false)) { fprintf(stderr, "Failed to load ELF module: %s\n", obj_file.c_str()); return false; } } else if (elfclass == ELFCLASS64) { if (!InitModuleForElfClass( reinterpret_cast(elf_header), obj_file, obj_os, - module, /*enable_multiple_field=*/false)) { + module_id, module, /*enable_multiple_field=*/false)) { fprintf(stderr, "Failed to load ELF module: %s\n", obj_file.c_str()); return false; } @@ -1331,6 +1340,7 @@ bool WriteSymbolFileHeader(const string& load_path, bool ReadSymbolData(const string& load_path, const string& obj_file, const string& obj_os, + const string& module_id, const std::vector& debug_dirs, const DumpOptions& options, Module** module) { @@ -1340,7 +1350,7 @@ bool ReadSymbolData(const string& load_path, return false; return ReadSymbolDataInternal(reinterpret_cast(elf_header), - obj_file, obj_os, debug_dirs, options, module); + obj_file, obj_os, module_id, debug_dirs, options, module); } } // namespace google_breakpad diff --git a/src/common/linux/dump_symbols.h b/src/common/linux/dump_symbols.h index d38ba6fe9..25ede3e0f 100644 --- a/src/common/linux/dump_symbols.h +++ b/src/common/linux/dump_symbols.h @@ -70,6 +70,7 @@ struct DumpOptions { bool WriteSymbolFile(const string& load_path, const string& obj_file, const string& obj_os, + const string& module_id, const std::vector& debug_dirs, const DumpOptions& options, std::ostream& sym_stream); @@ -81,6 +82,7 @@ bool WriteSymbolFile(const string& load_path, bool WriteSymbolFileHeader(const string& load_path, const string& obj_file, const string& obj_os, + const string& module_id, std::ostream& sym_stream); // As above, but simply return the debugging information in MODULE @@ -89,6 +91,7 @@ bool WriteSymbolFileHeader(const string& load_path, bool ReadSymbolData(const string& load_path, const string& obj_file, const string& obj_os, + const string& module_id, const std::vector& debug_dirs, const DumpOptions& options, Module** module); diff --git a/src/common/linux/dump_symbols_unittest.cc b/src/common/linux/dump_symbols_unittest.cc index df3cca529..f20ac0dfc 100644 --- a/src/common/linux/dump_symbols_unittest.cc +++ b/src/common/linux/dump_symbols_unittest.cc @@ -55,6 +55,7 @@ namespace google_breakpad { bool ReadSymbolDataInternal(const uint8_t* obj_file, const string& obj_filename, const string& obj_os, + const string& module_id, const std::vector& debug_dir, const DumpOptions& options, Module** module); @@ -99,6 +100,7 @@ TYPED_TEST(DumpSymbols, Invalid) { EXPECT_FALSE(ReadSymbolDataInternal(reinterpret_cast(&header), "foo", "Linux", + "", vector(), options, &module)); @@ -136,6 +138,7 @@ TYPED_TEST(DumpSymbols, SimplePublic) { EXPECT_TRUE(ReadSymbolDataInternal(this->elfdata, "foo", "Linux", + "", vector(), options, &module)); @@ -151,6 +154,54 @@ TYPED_TEST(DumpSymbols, SimplePublic) { delete module; } +TYPED_TEST(DumpSymbols, ModuleIdOverride) { + ELF elf(TypeParam::kMachine, TypeParam::kClass, kLittleEndian); + // Zero out text section for simplicity. + Section text(kLittleEndian); + text.Append(4096, 0); + elf.AddSection(".text", text, SHT_PROGBITS); + + // Add a public symbol. + StringTable table(kLittleEndian); + SymbolTable syms(kLittleEndian, TypeParam::kAddrSize, table); + syms.AddSymbol("superfunc", + (typename TypeParam::Addr)0x1000, + (typename TypeParam::Addr)0x10, + // ELF32_ST_INFO works for 32-or 64-bit. + ELF32_ST_INFO(STB_GLOBAL, STT_FUNC), + SHN_UNDEF + 1); + int index = elf.AddSection(".dynstr", table, SHT_STRTAB); + elf.AddSection(".dynsym", syms, + SHT_DYNSYM, // type + SHF_ALLOC, // flags + 0, // addr + index, // link + sizeof(typename TypeParam::Sym)); // entsize + + elf.Finish(); + this->GetElfContents(elf); + + Module* module; + DumpOptions options(ALL_SYMBOL_DATA, true, false, false); + EXPECT_TRUE(ReadSymbolDataInternal(this->elfdata, + "foo", + "Linux", + "some_module_id", + vector(), + options, + &module)); + + stringstream s; + module->Write(s, ALL_SYMBOL_DATA); + const string expected = + string("MODULE Linux ") + TypeParam::kMachineName + + " some_module_id foo\n" + "INFO CODE_ID 00000000000000000000000000000000\n" + "PUBLIC 1000 0 superfunc\n"; + EXPECT_EQ(expected, s.str()); + delete module; +} + TYPED_TEST(DumpSymbols, SimpleBuildID) { ELF elf(TypeParam::kMachine, TypeParam::kClass, kLittleEndian); // Zero out text section for simplicity. @@ -193,6 +244,7 @@ TYPED_TEST(DumpSymbols, SimpleBuildID) { EXPECT_TRUE(ReadSymbolDataInternal(this->elfdata, "foo", "Linux", + "", vector(), options, &module)); diff --git a/src/tools/linux/dump_syms/dump_syms.cc b/src/tools/linux/dump_syms/dump_syms.cc index 99deb958f..007b46094 100644 --- a/src/tools/linux/dump_syms/dump_syms.cc +++ b/src/tools/linux/dump_syms/dump_syms.cc @@ -59,6 +59,7 @@ int usage(const char* self) { fprintf(stderr, " -r Do not handle inter-compilation " "unit references\n"); fprintf(stderr, " -v Print all warnings to stderr\n"); + fprintf(stderr, " -b Use specified id for the module id\n"); fprintf(stderr, " -n Use specified name for name of the object\n"); fprintf(stderr, " -o Use specified name for the " "operating system\n"); @@ -79,6 +80,7 @@ int main(int argc, char** argv) { bool log_to_stderr = false; bool enable_multiple_field = false; std::string obj_name; + std::string module_id; const char* obj_os = "Linux"; int arg_index = 1; while (arg_index < argc && strlen(argv[arg_index]) > 0 && @@ -95,6 +97,13 @@ int main(int argc, char** argv) { handle_inter_cu_refs = false; } else if (strcmp("-v", argv[arg_index]) == 0) { log_to_stderr = true; + } else if (strcmp("-b", argv[arg_index]) == 0) { + if (arg_index + 1 >= argc) { + fprintf(stderr, "Missing argument to -b\n"); + return usage(argv[0]); + } + module_id = argv[arg_index + 1]; + ++arg_index; } else if (strcmp("-n", argv[arg_index]) == 0) { if (arg_index + 1 >= argc) { fprintf(stderr, "Missing argument to -n\n"); @@ -140,7 +149,7 @@ int main(int argc, char** argv) { obj_name = binary; if (header_only) { - if (!WriteSymbolFileHeader(binary, obj_name, obj_os, std::cout)) { + if (!WriteSymbolFileHeader(binary, obj_name, obj_os, module_id, std::cout)) { fprintf(saved_stderr, "Failed to process file.\n"); return 1; } @@ -149,7 +158,7 @@ int main(int argc, char** argv) { (cfi ? CFI : NO_DATA) | SYMBOLS_AND_FILES; google_breakpad::DumpOptions options(symbol_data, handle_inter_cu_refs, enable_multiple_field, preserve_load_address); - if (!WriteSymbolFile(binary, obj_name, obj_os, debug_dirs, options, + if (!WriteSymbolFile(binary, obj_name, obj_os, module_id, debug_dirs, options, std::cout)) { fprintf(saved_stderr, "Failed to write symbol file.\n"); return 1; From 417f5dbd0af9f96ca3e5faa99f41c064b89b40fd Mon Sep 17 00:00:00 2001 From: Jacob Hinton Date: Thu, 2 May 2024 15:19:16 -0700 Subject: [PATCH 23/37] preserve NT_FILE note converting md to core breakpad produced Minidumps have all the information needed to preserve the NT_FILE note that linux coredumps normally have, but are not currently included when converting using minidump-2-core. This adds a step to coredump generation to add the note to the coredump. This change allows for two improvements when loading coredumps into gdb: 1. for PIE code (default compilation now for gcc) gdb is now able to properly validate the PIE displacement and apply it to the debugging session, providing better stack trace quality. 2. gdb can now pick up so symbol files automatically without relying on the scripting normally recommended by google for breakpad where add-symbol-file is used to manually load symbols. Bug: https://crbug.com/google-breakpad/766 Change-Id: I8cb25246dce0ae3492eedd6d3a4efcf1783d414d Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5463435 Reviewed-by: Mike Frysinger --- src/tools/linux/md2core/minidump-2-core.cc | 86 ++++++++++++++++++++-- 1 file changed, 79 insertions(+), 7 deletions(-) diff --git a/src/tools/linux/md2core/minidump-2-core.cc b/src/tools/linux/md2core/minidump-2-core.cc index 3e310bc7d..00384c405 100644 --- a/src/tools/linux/md2core/minidump-2-core.cc +++ b/src/tools/linux/md2core/minidump-2-core.cc @@ -349,6 +349,34 @@ struct CrashedProcess { std::vector link_map; }; +/* NT_FILE note as defined by linux kernel in fs/binfmt_elf.c + * is structured as: + * long count -- how many files are mapped + * long page_size -- units for file_ofs + * array of [COUNT] elements of + * long start + * long end + * long file_ofs + * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... + * we can re-use the file mappings info + */ +struct NtFileNote { + NtFileNote() + // XXX: we really should source page size from the minidump itself but + // I cannot find anywhere in the minidump generation code where this + // would be stashed. + : page_sz((unsigned long)getpagesize()), + filename_count(0), + filenames_length(0) { + } + + unsigned long page_sz; + unsigned long filename_count; + std::vector file_mappings; + std::vector filenames; + size_t filenames_length; +}; + #if defined(__i386__) static uint32_t U32(const uint8_t* data) { @@ -1298,6 +1326,8 @@ AugmentMappings(const Options& options, CrashedProcess* crashinfo, } AddDataToMapping(crashinfo, crashinfo->dynamic_data, (uintptr_t)crashinfo->debug.dynamic); + } else { + fprintf(stderr, "dynamic data empty\n"); } } @@ -1417,19 +1447,40 @@ main(int argc, const char* argv[]) { if (!writea(options.out_fd, &ehdr, sizeof(Ehdr))) return 1; + struct NtFileNote nt_file; + for (auto iter = crashinfo.mappings.begin(); + iter != crashinfo.mappings.end(); iter++) { + if (iter->second.filename.empty()) + continue; + nt_file.file_mappings.push_back(iter->second.start_address); + nt_file.file_mappings.push_back(iter->second.end_address); + nt_file.file_mappings.push_back(iter->second.offset); + nt_file.filenames.push_back(iter->second.filename); + nt_file.filenames_length += iter->second.filename.length() + 1; + nt_file.filename_count += 1; + } + // implementation of NT_FILE note seems to pad alignment by 4 bytes but + // keep the header size the true size of the note. so we keep nt_file_align + // as separate field. + size_t nt_file_data_sz = (2 * sizeof(unsigned long)) + + (nt_file.file_mappings.size() * sizeof(unsigned long)) + + nt_file.filenames_length; + size_t nt_file_align = nt_file_data_sz % 4 == 0 ? 0 : 4 - + (nt_file_data_sz % 4); size_t offset = sizeof(Ehdr) + ehdr.e_phnum * sizeof(Phdr); size_t filesz = sizeof(Nhdr) + 8 + sizeof(prpsinfo) + - // sizeof(Nhdr) + 8 + sizeof(user) + - sizeof(Nhdr) + 8 + crashinfo.auxv_length + - crashinfo.threads.size() * ( - (sizeof(Nhdr) + 8 + sizeof(prstatus)) + // sizeof(Nhdr) + 8 + sizeof(user) + + sizeof(Nhdr) + 8 + crashinfo.auxv_length + + sizeof(Nhdr) + 8 + nt_file_data_sz + nt_file_align + + crashinfo.threads.size() * ( + (sizeof(Nhdr) + 8 + sizeof(prstatus)) #if defined(__i386__) || defined(__x86_64__) - + sizeof(Nhdr) + 8 + sizeof(user_fpregs_struct) + + sizeof(Nhdr) + 8 + sizeof(user_fpregs_struct) #endif #if defined(__i386__) - + sizeof(Nhdr) + 8 + sizeof(user_fpxregs_struct) + + sizeof(Nhdr) + 8 + sizeof(user_fpxregs_struct) #endif - ); + ); Phdr phdr; memset(&phdr, 0, sizeof(Phdr)); @@ -1492,6 +1543,27 @@ main(int argc, const char* argv[]) { return 1; } + nhdr.n_descsz = nt_file_data_sz; + nhdr.n_type = NT_FILE; + if (!writea(options.out_fd, &nhdr, sizeof(nhdr)) || + !writea(options.out_fd, "CORE\0\0\0\0", 8) || + !writea(options.out_fd, + &nt_file.filename_count, sizeof(nt_file.filename_count)) || + !writea(options.out_fd, &nt_file.page_sz, sizeof(nt_file.page_sz))) { + return 1; + } + for (auto iter = nt_file.file_mappings.begin(); + iter != nt_file.file_mappings.end(); iter++) { + if (!writea(options.out_fd, &*iter, sizeof(*iter))) + return 1; + } + for (auto iter = nt_file.filenames.begin(); + iter != nt_file.filenames.end(); iter++) { + if (!writea(options.out_fd, iter->c_str(), iter->length() + 1)) + return 1; + } + writea(options.out_fd, "\0\0\0\0", nt_file_align); + for (const auto& current_thread : crashinfo.threads) { if (current_thread.tid == crashinfo.exception.tid) { // Use the exception record's context for the crashed thread instead of From 9dd7d3497e7f6d58e9459e0e6be28b1d4b41d35f Mon Sep 17 00:00:00 2001 From: Tyrel Russell Date: Thu, 23 May 2024 12:05:02 -0400 Subject: [PATCH 24/37] Add error handling for common failure case seen processing debug symbols Propagates error from ProcessDIEs to the top level where it can be handled. Change-Id: Ib6ba171ff642a898c75233a3f70995837c6c0552 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5564423 Reviewed-by: Joshua Peraza --- src/common/dwarf/dwarf2reader.cc | 20 +++++++++++++++++--- src/common/dwarf/dwarf2reader.h | 2 +- src/common/linux/dump_symbols.cc | 21 +++++++++++++++------ 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/src/common/dwarf/dwarf2reader.cc b/src/common/dwarf/dwarf2reader.cc index 51228f745..8bd1d860b 100644 --- a/src/common/dwarf/dwarf2reader.cc +++ b/src/common/dwarf/dwarf2reader.cc @@ -463,7 +463,11 @@ uint64_t CompilationUnit::Start() { } // Now that we have our abbreviations, start processing DIE's. - ProcessDIEs(); + if (!ProcessDIEs()) { + // If ProcessDIEs fails return 0, ourlength must be non-zero + // as it is equal to header_.length + (12 or 4) + return 0; + } // If this is a skeleton compilation unit generated with split DWARF, // and the client needs the full debug info, we need to find the full @@ -922,7 +926,7 @@ const uint8_t* CompilationUnit::ProcessDIE(uint64_t dieoffset, return start; } -void CompilationUnit::ProcessDIEs() { +bool CompilationUnit::ProcessDIEs() { const uint8_t* dieptr = after_header_; size_t len; @@ -953,13 +957,22 @@ void CompilationUnit::ProcessDIEs() { if (abbrev_num == 0) { if (die_stack.size() == 0) // If it is padding, then we are done with the compilation unit's DIEs. - return; + return true; const uint64_t offset = die_stack.top(); die_stack.pop(); handler_->EndDIE(offset); continue; } + // Abbrev > abbrev_.size() indicates a corruption in the dwarf file + // We attempt to recover + if (abbrev_num > abbrevs_->size()) { + fprintf(stderr, "An invalid abbrev was referenced %d / %d. Stopped " + "procesing following DIEs in this CU.", abbrev_num, + abbrevs_->size()); + return false; + } + const Abbrev& abbrev = abbrevs_->at(static_cast(abbrev_num)); const enum DwarfTag tag = abbrev.tag; if (!handler_->StartDIE(absolute_offset, tag)) { @@ -989,6 +1002,7 @@ void CompilationUnit::ProcessDIEs() { handler_->EndDIE(absolute_offset); } } + return true; } // Check for a valid ELF file and return the Address size. diff --git a/src/common/dwarf/dwarf2reader.h b/src/common/dwarf/dwarf2reader.h index c023211e7..bd2e4d2e9 100644 --- a/src/common/dwarf/dwarf2reader.h +++ b/src/common/dwarf/dwarf2reader.h @@ -664,7 +664,7 @@ class CompilationUnit { } // Processes all DIEs for this compilation unit - void ProcessDIEs(); + bool ProcessDIEs(); // Skips the die with attributes specified in ABBREV starting at // START, and return the new place to position the stream to. diff --git a/src/common/linux/dump_symbols.cc b/src/common/linux/dump_symbols.cc index 846757d4e..5bb434319 100644 --- a/src/common/linux/dump_symbols.cc +++ b/src/common/linux/dump_symbols.cc @@ -498,7 +498,11 @@ bool LoadDwarf(const string& dwarf_filename, &byte_reader, &die_dispatcher); // Process the entire compilation unit; get the offset of the next. - offset += reader.Start(); + uint64_t result = reader.Start(); + if (result == 0) { + return false; + } + offset += result; // Start to process split dwarf file. if (reader.ShouldProcessSplitDwarf()) { StartProcessSplitDwarf(&reader, module, endianness, handle_inter_cu_refs, @@ -878,6 +882,7 @@ bool LoadSymbols(const string& obj_file, const char* names_end = names + section_names->sh_size; bool found_debug_info_section = false; bool found_usable_info = false; + bool usable_info_parsed = false; if ((options.symbol_data & SYMBOLS_AND_FILES) || (options.symbol_data & INLINES)) { @@ -893,8 +898,10 @@ bool LoadSymbols(const string& obj_file, found_debug_info_section = true; found_usable_info = true; info->LoadedSection(".stab"); - if (!LoadStabs(elf_header, stab_section, stabstr_section, - big_endian, module)) { + bool result = LoadStabs(elf_header, stab_section, stabstr_section, + big_endian, module); + usable_info_parsed = usable_info_parsed || result; + if (!result) { fprintf(stderr, "%s: \".stab\" section found, but failed to load" " STABS debugging information\n", obj_file.c_str()); } @@ -981,9 +988,11 @@ bool LoadSymbols(const string& obj_file, found_debug_info_section = true; found_usable_info = true; info->LoadedSection(".debug_info"); - if (!LoadDwarf(obj_file, elf_header, big_endian, + bool result = LoadDwarf(obj_file, elf_header, big_endian, options.handle_inter_cu_refs, - options.symbol_data & INLINES, module)) { + options.symbol_data & INLINES, module); + usable_info_parsed = usable_info_parsed || result; + if (!result){ fprintf(stderr, "%s: \".debug_info\" section found, but failed to load " "DWARF debugging information\n", obj_file.c_str()); } @@ -1088,7 +1097,7 @@ bool LoadSymbols(const string& obj_file, return false; } - return true; + return usable_info_parsed; } // Return the breakpad symbol file identifier for the architecture of From 69f9a4ef3ce4f5b40e3b202302a076cc2e96ec0b Mon Sep 17 00:00:00 2001 From: Tyrel Russell Date: Thu, 23 May 2024 14:08:59 -0400 Subject: [PATCH 25/37] Fix format string for uint64_t and size_t data. Change-Id: I9086396561ea7870cfd55ab62c416d2944f0d46b Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5564823 Reviewed-by: Mike Frysinger --- src/common/dwarf/dwarf2reader.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/common/dwarf/dwarf2reader.cc b/src/common/dwarf/dwarf2reader.cc index 8bd1d860b..3cae1a8e8 100644 --- a/src/common/dwarf/dwarf2reader.cc +++ b/src/common/dwarf/dwarf2reader.cc @@ -964,11 +964,10 @@ bool CompilationUnit::ProcessDIEs() { continue; } - // Abbrev > abbrev_.size() indicates a corruption in the dwarf file - // We attempt to recover + // Abbrev > abbrev_.size() indicates a corruption in the dwarf file. if (abbrev_num > abbrevs_->size()) { - fprintf(stderr, "An invalid abbrev was referenced %d / %d. Stopped " - "procesing following DIEs in this CU.", abbrev_num, + fprintf(stderr, "An invalid abbrev was referenced %" PRIu64 " / %zu. " + "Stopped procesing following DIEs in this CU.", abbrev_num, abbrevs_->size()); return false; } From 527331a471207d28d015e210c12fb8bb8b6732fd Mon Sep 17 00:00:00 2001 From: Martin Kurbanov Date: Fri, 31 May 2024 13:36:23 +0300 Subject: [PATCH 26/37] Wait for SIGSTOP during attach, reinjecting unrelated signals PTRACE_ATTACH sends SIGSTOP to this thread. However, on waitpid() we may receive some other pending signal sooner than this SIGSTOP. Therefore, we need to call waitpid() until we receive SIGSTOP. Signals other than SIGSTOP that are received need to be reinjected with PTRACE_CONT, or they will otherwise get lost. If we do not wait for the SIGSTOP signal in waitpid(), this signal will be delivered after PTRACE_DETACH, and the thread will enter the "T (stopped)" state. See ptrace(2) manpage, under the section "Attaching and detaching". Change-Id: Ifcc89cc34086988d496cd31f5dc63e9fceebcaf6 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5582149 Reviewed-by: Mike Frysinger --- .../minidump_writer/linux_ptrace_dumper.cc | 23 +++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/client/linux/minidump_writer/linux_ptrace_dumper.cc b/src/client/linux/minidump_writer/linux_ptrace_dumper.cc index e5850063c..fb5b7e762 100644 --- a/src/client/linux/minidump_writer/linux_ptrace_dumper.cc +++ b/src/client/linux/minidump_writer/linux_ptrace_dumper.cc @@ -59,6 +59,7 @@ #include "client/linux/minidump_writer/directory_reader.h" #include "client/linux/minidump_writer/line_reader.h" +#include "common/linux/eintr_wrapper.h" #include "common/linux/linux_libc_support.h" #include "third_party/lss/linux_syscall_support.h" @@ -84,11 +85,29 @@ static bool SuspendThread(pid_t pid) { errno != 0) { return false; } - while (sys_waitpid(pid, NULL, __WALL) < 0) { - if (errno != EINTR) { + while (true) { + int status; + int r = HANDLE_EINTR(sys_waitpid(pid, &status, __WALL)); + if (r < 0) { sys_ptrace(PTRACE_DETACH, pid, NULL, NULL); return false; } + + if (!WIFSTOPPED(status)) + return false; + + // Any signal will stop the thread, make sure it is SIGSTOP. Otherwise, this + // signal will be delivered after PTRACE_DETACH, and the thread will enter + // the "T (stopped)" state. + if (WSTOPSIG(status) == SIGSTOP) + break; + + // Signals other than SIGSTOP that are received need to be reinjected, + // or they will otherwise get lost. + r = sys_ptrace(PTRACE_CONT, pid, NULL, + reinterpret_cast(WSTOPSIG(status))); + if (r < 0) + return false; } #if defined(__i386) || defined(__x86_64) // On x86, the stack pointer is NULL or -1, when executing trusted code in From c8318f06d3a48b0af37862b29fbeeeae8258dbed Mon Sep 17 00:00:00 2001 From: Joshua Peraza Date: Mon, 17 Jun 2024 16:54:19 -0700 Subject: [PATCH 27/37] ARM: use r11 as frame pointer during stack unwind Bug: b/340226382 Change-Id: Idb3d6e502a5a1e25179c3d5f2a19ac4cd79cd103 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5636127 Reviewed-by: Ivan Penkov --- src/processor/stackwalker.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/processor/stackwalker.cc b/src/processor/stackwalker.cc index 1ff6cf7cb..e7ef42d62 100644 --- a/src/processor/stackwalker.cc +++ b/src/processor/stackwalker.cc @@ -263,6 +263,8 @@ Stackwalker* Stackwalker::StackwalkerForCPU( int fp_register = -1; if (system_info->os_short == "ios") fp_register = MD_CONTEXT_ARM_REG_IOS_FP; + else + fp_register = MD_CONTEXT_ARM_REG_FP; cpu_stackwalker = new StackwalkerARM(system_info, context->GetContextARM(), fp_register, memory, modules, From e0a3a76758874bdc04e7a32e254403332f515a2d Mon Sep 17 00:00:00 2001 From: Ivan Penkov Date: Tue, 18 Jun 2024 16:35:26 -0700 Subject: [PATCH 28/37] Allows setting maximum number of threads to process. By default, there is no limit and all threads will be processed. If set to a positive integer (via set_max_thread_count), the minidump processor will stop once that limit is reached. The limit will be ignored if the crashing/requesting thread is above the specified limit. The original thread count is stored in the ProcessState and can be acessed via the original_thread_count() method. Change-Id: I6f0f173e5ad163aa7b2be76780b4a86e4932fc17 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5641257 Reviewed-by: Joshua Peraza --- .../processor/minidump_processor.h | 9 +++++ src/google_breakpad/processor/process_state.h | 6 +++ src/processor/minidump_processor.cc | 38 +++++++++++++------ src/processor/process_state.cc | 1 + 4 files changed, 43 insertions(+), 11 deletions(-) diff --git a/src/google_breakpad/processor/minidump_processor.h b/src/google_breakpad/processor/minidump_processor.h index 8475407e7..2a5b07499 100644 --- a/src/google_breakpad/processor/minidump_processor.h +++ b/src/google_breakpad/processor/minidump_processor.h @@ -138,6 +138,11 @@ class MinidumpProcessor { enable_objdump_for_exploitability_ = enabled; } + // Sets the maximum number of threads to process. + void set_max_thread_count(int max_thread_count) { + max_thread_count_ = max_thread_count; + } + private: StackFrameSymbolizer* frame_symbolizer_; // Indicate whether resolver_helper_ is owned by this instance. @@ -157,6 +162,10 @@ class MinidumpProcessor { // purposes of disassembly. This results in significantly more overhead than // the enable_objdump_ flag. bool enable_objdump_for_exploitability_; + + // The maximum number of threads to process. This can be exceeded if the + // requesting thread comes after the limit. Setting this to -1 means no limit. + int max_thread_count_; }; } // namespace google_breakpad diff --git a/src/google_breakpad/processor/process_state.h b/src/google_breakpad/processor/process_state.h index 3fe6a5c27..cc0e83a5d 100644 --- a/src/google_breakpad/processor/process_state.h +++ b/src/google_breakpad/processor/process_state.h @@ -105,6 +105,7 @@ class ProcessState { uint64_t crash_address() const { return crash_address_; } string assertion() const { return assertion_; } int requesting_thread() const { return requesting_thread_; } + int original_thread_count() const { return original_thread_count_; } const ExceptionRecord* exception_record() const { return &exception_record_; } const vector* threads() const { return &threads_; } const vector* thread_memory_regions() const { @@ -168,6 +169,11 @@ class ProcessState { // indicating that the dump thread is not available. int requesting_thread_; + // Original thread count. The Processor has limit on how many threads to + // process, so not all threads are processed. This tells you how many threads + // were originally in the minudump. + int original_thread_count_; + // Exception record details: code, flags, address, parameters. ExceptionRecord exception_record_; diff --git a/src/processor/minidump_processor.cc b/src/processor/minidump_processor.cc index c8d4d4c9a..44e69c890 100644 --- a/src/processor/minidump_processor.cc +++ b/src/processor/minidump_processor.cc @@ -35,6 +35,8 @@ #include #include +#include +#include #include #include #include @@ -64,7 +66,8 @@ MinidumpProcessor::MinidumpProcessor(SymbolSupplier* supplier, own_frame_symbolizer_(true), enable_exploitability_(false), enable_objdump_(false), - enable_objdump_for_exploitability_(false) { + enable_objdump_for_exploitability_(false), + max_thread_count_(-1) { } MinidumpProcessor::MinidumpProcessor(SymbolSupplier* supplier, @@ -74,7 +77,8 @@ MinidumpProcessor::MinidumpProcessor(SymbolSupplier* supplier, own_frame_symbolizer_(true), enable_exploitability_(enable_exploitability), enable_objdump_(false), - enable_objdump_for_exploitability_(false) { + enable_objdump_for_exploitability_(false), + max_thread_count_(-1) { } MinidumpProcessor::MinidumpProcessor(StackFrameSymbolizer* frame_symbolizer, @@ -83,7 +87,8 @@ MinidumpProcessor::MinidumpProcessor(StackFrameSymbolizer* frame_symbolizer, own_frame_symbolizer_(false), enable_exploitability_(enable_exploitability), enable_objdump_(false), - enable_objdump_for_exploitability_(false) { + enable_objdump_for_exploitability_(false), + max_thread_count_(-1) { assert(frame_symbolizer_); } @@ -208,29 +213,32 @@ ProcessResult MinidumpProcessor::Process( bool interrupted = false; bool found_requesting_thread = false; unsigned int thread_count = threads->thread_count(); + process_state->original_thread_count_ = thread_count; // Reset frame_symbolizer_ at the beginning of stackwalk for each minidump. frame_symbolizer_->Reset(); - MinidumpThreadNameList* thread_names = dump->GetThreadNameList(); std::map thread_id_to_name; if (thread_names) { const unsigned int thread_name_count = thread_names->thread_name_count(); for (unsigned int thread_name_index = 0; - thread_name_index < thread_name_count; - ++thread_name_index) { - MinidumpThreadName* thread_name = thread_names->GetThreadNameAtIndex(thread_name_index); + thread_name_index < thread_name_count; ++thread_name_index) { + MinidumpThreadName* thread_name = + thread_names->GetThreadNameAtIndex(thread_name_index); if (!thread_name) { - BPLOG(ERROR) << "Could not get thread name for thread at index " << thread_name_index; + BPLOG(ERROR) << "Could not get thread name for thread at index " + << thread_name_index; return PROCESS_ERROR_GETTING_THREAD_NAME; } uint32_t thread_id; if (!thread_name->GetThreadID(&thread_id)) { - BPLOG(ERROR) << "Could not get thread ID for thread at index " << thread_name_index; + BPLOG(ERROR) << "Could not get thread ID for thread at index " + << thread_name_index; return PROCESS_ERROR_GETTING_THREAD_NAME; } - thread_id_to_name.insert(std::make_pair(thread_id, thread_name->GetThreadName())); + thread_id_to_name.insert( + std::make_pair(thread_id, thread_name->GetThreadName())); } } @@ -270,6 +278,7 @@ ProcessResult MinidumpProcessor::Process( // dump of itself (when both its context and its stack are in flux), // processing that stack wouldn't provide much useful data. if (has_dump_thread && thread_id == dump_thread_id) { + process_state->original_thread_count_--; continue; } @@ -290,6 +299,13 @@ ProcessResult MinidumpProcessor::Process( // be the index of the current thread when it's pushed into the // vector. process_state->requesting_thread_ = process_state->threads_.size(); + if (max_thread_count_ >= 0) { + thread_count = + std::min(thread_count, + std::max(static_cast( + process_state->requesting_thread_ + 1), + static_cast(max_thread_count_))); + } found_requesting_thread = true; @@ -1341,7 +1357,7 @@ string MinidumpProcessor::GetCrashReason(Minidump* dump, uint64_t* address, // an attempt to read data, 1 if it was an attempt to write data, // and 8 if this was a data execution violation. // exception_information[2] contains the underlying NTSTATUS code, - // which is the explanation for why this error occured. + // which is the explanation for why this error occurred. // This information is useful in addition to the code address, which // will be present in the crash thread's instruction field anyway. if (raw_exception->exception_record.number_parameters >= 1) { diff --git a/src/processor/process_state.cc b/src/processor/process_state.cc index c5c38b6cc..54459c653 100644 --- a/src/processor/process_state.cc +++ b/src/processor/process_state.cc @@ -54,6 +54,7 @@ void ProcessState::Clear() { crash_address_ = 0; assertion_.clear(); requesting_thread_ = -1; + original_thread_count_ = 0; for (vector::const_iterator iterator = threads_.begin(); iterator != threads_.end(); ++iterator) { From 63c61b6f5a1d823ff944e1c58e1b341d27e9f0fe Mon Sep 17 00:00:00 2001 From: Jacob Hinton Date: Tue, 14 May 2024 12:37:21 -0700 Subject: [PATCH 29/37] handle write failure in minidump-2-core Change-Id: I0636f315d300a1738a9f12f28d693292a836dc56 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5539174 Reviewed-by: Mike Frysinger --- src/tools/linux/md2core/minidump-2-core.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tools/linux/md2core/minidump-2-core.cc b/src/tools/linux/md2core/minidump-2-core.cc index 00384c405..29560ed18 100644 --- a/src/tools/linux/md2core/minidump-2-core.cc +++ b/src/tools/linux/md2core/minidump-2-core.cc @@ -1562,7 +1562,8 @@ main(int argc, const char* argv[]) { if (!writea(options.out_fd, iter->c_str(), iter->length() + 1)) return 1; } - writea(options.out_fd, "\0\0\0\0", nt_file_align); + if (!writea(options.out_fd, "\0\0\0\0", nt_file_align)) + return 1; for (const auto& current_thread : crashinfo.threads) { if (current_thread.tid == crashinfo.exception.tid) { From 8470d392a2fdb73b8f838196ba3c09cf8c95d3ac Mon Sep 17 00:00:00 2001 From: Greg Thompson Date: Wed, 3 Jul 2024 19:37:19 +0200 Subject: [PATCH 30/37] Support large files in the Windows variant of HTTPUpload The STL uses fseek and ftell, which don't work for files with lengths greater than that which a long can accommodate. Avoid std::fstream, and instead use Win32 APIs directly to read files in the HTTP uploader. Additionally: - Halve the heap impact of reading files by reading directly into the request. - Hint to the Windows cache manager that the file will be read sequentially, thereby reducing the impact of the read on the system. Bug: chromium:349736158 Change-Id: I260836946069f6867c20a9ba8ea6043dd041c69c Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5675886 Reviewed-by: Ivan Penkov --- src/common/windows/http_upload.cc | 87 ++++++++++++++++--------------- 1 file changed, 46 insertions(+), 41 deletions(-) diff --git a/src/common/windows/http_upload.cc b/src/common/windows/http_upload.cc index bd48a2339..ff34f074f 100644 --- a/src/common/windows/http_upload.cc +++ b/src/common/windows/http_upload.cc @@ -31,11 +31,12 @@ #endif #include +#include +#include // Disable exception handler warnings. #pragma warning(disable:4530) -#include #include #include "common/windows/string_utils-inl.h" @@ -47,8 +48,8 @@ namespace { using std::wstring; using std::map; using std::vector; - using std::ifstream; using std::ios; + using std::unique_ptr; const wchar_t kUserAgent[] = L"Breakpad/1.0 (Windows)"; @@ -110,38 +111,51 @@ namespace { return result; } - bool GetFileContents(const wstring& filename, vector* contents) { - bool rv = false; - // The "open" method on pre-MSVC8 ifstream implementations doesn't accept a - // wchar_t* filename, so use _wfopen directly in that case. For VC8 and - // later, _wfopen has been deprecated in favor of _wfopen_s, which does - // not exist in earlier versions, so let the ifstream open the file itself. - // GCC doesn't support wide file name and opening on FILE* requires ugly - // hacks, so fallback to multi byte file. -#ifdef _MSC_VER - ifstream file; - file.open(filename.c_str(), ios::binary); -#else // GCC - ifstream file(WideToMBCP(filename, CP_ACP).c_str(), ios::binary); -#endif // _MSC_VER >= 1400 - if (file.is_open()) { - file.seekg(0, ios::end); - std::streamoff length = file.tellg(); - // Check for loss of data when converting lenght from std::streamoff into - // std::vector::size_type - std::vector::size_type vector_size = - static_cast::size_type>(length); - if (static_cast(vector_size) == length) { - contents->resize(vector_size); - if (length != 0) { - file.seekg(0, ios::beg); - file.read(&((*contents)[0]), length); + // Invokes the Win32 CloseHandle function on `handle` if it is valid. + // Intended for use as a deleter for a std::unique_ptr. + void CloseWindowsHandle(void* handle) { + if (handle != INVALID_HANDLE_VALUE && handle != nullptr) { + ::CloseHandle(handle); + } + } + + // Appends the contents of the file at `filename` to `contents`. + bool AppendFileContents(const wstring& filename, string* contents) { + // Use Win32 APIs rather than the STL so that files larger than 2^31-1 bytes + // can be read. This also allows for use of a hint to the cache manager that + // the file will be read sequentially, which can improve performance. + using ScopedWindowsHandle = + unique_ptr; + ScopedWindowsHandle file( + ::CreateFileW(filename.c_str(), GENERIC_READ, + FILE_SHARE_DELETE | FILE_SHARE_READ, + /*lpSecurityAttributes=*/nullptr, OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL | FILE_FLAG_SEQUENTIAL_SCAN, + /*hTemplateFile=*/nullptr), &CloseWindowsHandle); + BY_HANDLE_FILE_INFORMATION info = {}; + if (file.get() != nullptr && file.get() != INVALID_HANDLE_VALUE && + ::GetFileInformationByHandle(file.get(), &info)) { + uint64_t file_size = info.nFileSizeHigh; + file_size <<= 32; + file_size |= info.nFileSizeLow; + string::size_type position = contents->size(); + contents->resize(position + file_size); + constexpr DWORD kChunkSize = 1024*1024; + while (file_size) { + const DWORD bytes_to_read = + (file_size >= kChunkSize + ? kChunkSize + : static_cast(file_size)); + DWORD bytes_read = 0; + if (!::ReadFile(file.get(), &((*contents)[position]), bytes_to_read, + &bytes_read, /*lpOverlapped=*/nullptr)) { + return false; } - rv = true; + position += bytes_read; + file_size -= bytes_read; } - file.close(); } - return rv; + return true; } bool CheckParameters(const map& parameters) { @@ -386,16 +400,7 @@ namespace { request_body->append("\r\n"); } - vector contents; - if (!GetFileContents(filename, &contents)) { - return false; - } - - if (!contents.empty()) { - request_body->append(&(contents[0]), contents.size()); - } - - return true; + return AppendFileContents(filename, request_body); } bool GenerateRequestBody(const map& parameters, From 9ad37adcc5218698e3d24710491833a5e89d1284 Mon Sep 17 00:00:00 2001 From: Greg Thompson Date: Fri, 5 Jul 2024 12:42:30 +0200 Subject: [PATCH 31/37] [Windows] Improve diagnostic logging in HTTPUpload Log the operation and Windows and/or WinInet error in case of failure. Also remove use of an intermediate buffer in ReadResponse. Bug: chromium:349736158 Change-Id: I51de506e1636e28a24a5c6dd579ebbfcd07c0345 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5679269 Reviewed-by: Primiano Tucci --- src/common/windows/http_upload.cc | 99 +++++++++++++++++++++---------- 1 file changed, 68 insertions(+), 31 deletions(-) diff --git a/src/common/windows/http_upload.cc b/src/common/windows/http_upload.cc index ff34f074f..7dd935577 100644 --- a/src/common/windows/http_upload.cc +++ b/src/common/windows/http_upload.cc @@ -32,13 +32,12 @@ #include #include +#include #include // Disable exception handler warnings. #pragma warning(disable:4530) -#include - #include "common/windows/string_utils-inl.h" #include "common/windows/http_upload.h" @@ -47,7 +46,6 @@ namespace { using std::string; using std::wstring; using std::map; - using std::vector; using std::ios; using std::unique_ptr; @@ -111,6 +109,30 @@ namespace { return result; } + // Returns a string representation of a given Windows error code, or null + // on failure. + using ScopedLocalString = unique_ptr; + ScopedLocalString FormatError(DWORD error) { + wchar_t* message_buffer = nullptr; + DWORD message_length = + ::FormatMessageW( + FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_FROM_HMODULE | FORMAT_MESSAGE_IGNORE_INSERTS, + /*lpSource=*/::GetModuleHandle(L"wininet.dll"), error, + /*dwLanguageId=*/0, reinterpret_cast(&message_buffer), + /*nSize=*/0, /*Arguments=*/nullptr); + return ScopedLocalString(message_length ? message_buffer : nullptr, + &LocalFree); + } + + // Emits a log message to stderr for the named operation and Windows error + // code. + void LogError(const char* operation, DWORD error) { + ScopedLocalString message = FormatError(error); + fwprintf(stderr, L"%S failed with error %u: %s\n", operation, error, + message ? message.get() : L""); + } + // Invokes the Win32 CloseHandle function on `handle` if it is valid. // Intended for use as a deleter for a std::unique_ptr. void CloseWindowsHandle(void* handle) { @@ -193,37 +215,46 @@ namespace { has_content_length_header = true; claimed_size = wcstol(content_length, NULL, 10); response_body.reserve(claimed_size); + } else { + DWORD error = ::GetLastError(); + if (error != ERROR_HTTP_HEADER_NOT_FOUND) { + LogError("HttpQueryInfo", error); + } } - DWORD bytes_available; DWORD total_read = 0; - BOOL return_code; - - while (((return_code = InternetQueryDataAvailable(request, &bytes_available, - 0, 0)) != 0) && bytes_available > 0) { - vector response_buffer(bytes_available); + while (true) { + DWORD bytes_available; + if (!InternetQueryDataAvailable(request, &bytes_available, 0, 0)) { + LogError("InternetQueryDataAvailable", ::GetLastError()); + return false; + } + if (bytes_available == 0) { + break; + } + // Grow the output to hold the available bytes. + response_body.resize(total_read + bytes_available); DWORD size_read; - - return_code = InternetReadFile(request, - &response_buffer[0], - bytes_available, &size_read); - - if (return_code && size_read > 0) { - total_read += size_read; - response_body.append(&response_buffer[0], size_read); + if (!InternetReadFile(request, &response_body[total_read], + bytes_available, &size_read)) { + LogError("InternetReadFile", ::GetLastError()); + return false; } - else { + if (size_read == 0) { break; } + total_read += size_read; } + // The body may have been over-sized above; shrink to the actual bytes read. + response_body.resize(total_read); - bool succeeded = return_code && (!has_content_length_header || - (total_read == claimed_size)); - if (succeeded && response) { + if (has_content_length_header && (total_read != claimed_size)) { + return false; // The response doesn't match the Content-Length header. + } + if (response) { *response = UTF8ToWide(response_body); } - - return succeeded; + return true; } bool SendRequestInner( @@ -251,8 +282,7 @@ namespace { components.dwUrlPathLength = sizeof(path) / sizeof(path[0]); if (!InternetCrackUrl(url.c_str(), static_cast(url.size()), 0, &components)) { - DWORD err = GetLastError(); - wprintf(L"%d\n", err); + LogError("InternetCrackUrl", ::GetLastError()); return false; } bool secure = false; @@ -269,6 +299,7 @@ namespace { NULL, // proxy bypass 0)); // flags if (!internet.get()) { + LogError("InternetOpen", ::GetLastError()); return false; } @@ -281,6 +312,7 @@ namespace { 0, // flags 0)); // context if (!connection.get()) { + LogError("InternetConnect", ::GetLastError()); return false; } @@ -295,14 +327,17 @@ namespace { http_open_flags, 0)); // context if (!request.get()) { + LogError("HttpOpenRequest", ::GetLastError()); return false; } if (!content_type_header.empty()) { - HttpAddRequestHeaders(request.get(), - content_type_header.c_str(), - static_cast(-1), - HTTP_ADDREQ_FLAG_ADD); + if (!HttpAddRequestHeaders(request.get(), + content_type_header.c_str(), + static_cast(-1), + HTTP_ADDREQ_FLAG_ADD)) { + LogError("HttpAddRequestHeaders", ::GetLastError()); + } } if (timeout_ms) { @@ -310,20 +345,21 @@ namespace { INTERNET_OPTION_SEND_TIMEOUT, timeout_ms, sizeof(*timeout_ms))) { - fwprintf(stderr, L"Could not unset send timeout, continuing...\n"); + LogError("InternetSetOption-send timeout", ::GetLastError()); } if (!InternetSetOption(request.get(), INTERNET_OPTION_RECEIVE_TIMEOUT, timeout_ms, sizeof(*timeout_ms))) { - fwprintf(stderr, L"Could not unset receive timeout, continuing...\n"); + LogError("InternetSetOption-receive timeout", ::GetLastError()); } } if (!HttpSendRequest(request.get(), NULL, 0, const_cast(request_body.data()), static_cast(request_body.size()))) { + LogError("HttpSendRequest", ::GetLastError()); return false; } @@ -333,6 +369,7 @@ namespace { if (!HttpQueryInfo(request.get(), HTTP_QUERY_STATUS_CODE, static_cast(&http_status), &http_status_size, 0)) { + LogError("HttpQueryInfo", ::GetLastError()); return false; } From 5b748cb3a691d0954d9c00b5a02ed866cbc97106 Mon Sep 17 00:00:00 2001 From: Greg Thompson Date: Mon, 8 Jul 2024 15:31:17 +0200 Subject: [PATCH 32/37] [symupload] Fix malformed Content-Type header SendSimplePostRequest expects a complete message header for Content-Type, not only the value. Change-Id: Ib0b039d1a717c10f74b7e07caa8e4b544dff6cd1 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5682928 Reviewed-by: Ivan Penkov --- src/common/windows/symbol_collector_client.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/windows/symbol_collector_client.cc b/src/common/windows/symbol_collector_client.cc index d91b702b0..9695100a0 100644 --- a/src/common/windows/symbol_collector_client.cc +++ b/src/common/windows/symbol_collector_client.cc @@ -106,7 +106,7 @@ namespace google_breakpad { if (!HTTPUpload::SendSimplePostRequest( url, body, - L"application/json", + L"Content-Type: application/json", timeout_ms, &response, &response_code)) { @@ -176,4 +176,4 @@ namespace google_breakpad { SymbolStatus::Missing; } -} // namespace google_breakpad \ No newline at end of file +} // namespace google_breakpad From 2c5ac8a88e58b294443df92f5be08915da951067 Mon Sep 17 00:00:00 2001 From: Greg Thompson Date: Mon, 8 Jul 2024 13:19:56 +0200 Subject: [PATCH 33/37] [symupload] Log details to stderr on failure On success, symupload emits details such as the debug identifier. Emit a very similar message to stderr on failure, as this information may also be useful when diagnosing upload failures. Bug: chromium:349736158 Change-Id: Ic3b5316283e4603dca815bd85a2fd51516fbcca8 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5682842 Reviewed-by: Ivan Penkov --- src/tools/windows/symupload/symupload.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/tools/windows/symupload/symupload.cc b/src/tools/windows/symupload/symupload.cc index 46ace95ad..20f48a277 100644 --- a/src/tools/windows/symupload/symupload.cc +++ b/src/tools/windows/symupload/symupload.cc @@ -305,12 +305,12 @@ int wmain(int argc, wchar_t* argv[]) { _wunlink(symbol_file.c_str()); - if (success) { - wprintf(L"Uploaded breakpad symbols for windows-%s/%s/%s (%s %s)\n", - pdb_info.cpu.c_str(), pdb_info.debug_file.c_str(), - pdb_info.debug_identifier.c_str(), code_file.c_str(), - file_version.c_str()); - } + fwprintf(success ? stdout : stderr, + L"%S breakpad symbols for windows-%s/%s/%s (%s %s)\n", + success ? "Uploaded" : "Failed to upload", + pdb_info.cpu.c_str(), pdb_info.debug_file.c_str(), + pdb_info.debug_identifier.c_str(), code_file.c_str(), + file_version.c_str()); return success ? 0 : 1; } From 466cc92bc247862a2c9ad3b0051eab4dd081c355 Mon Sep 17 00:00:00 2001 From: Greg Thompson Date: Fri, 12 Jul 2024 11:52:20 +0200 Subject: [PATCH 34/37] [symupload] Upload compressed symbol files on Windows When supported at build-time (by defining HAVE_ZLIB and ensuring that zlib.h is in the include path), use zlib to deflate symbol files before uploading. In case of failure (or when not supported), fall-back to uploading the raw data as before. Bug: chromium:349736158 Change-Id: I9345b87e42a03c87a43247e418b5cc05cd19b2b5 Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5701147 Reviewed-by: Ivan Penkov --- src/common/windows/http_upload.cc | 69 ++++++++++++++++++++++++++++++- src/common/windows/http_upload.h | 3 +- 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/src/common/windows/http_upload.cc b/src/common/windows/http_upload.cc index 7dd935577..cade22ec8 100644 --- a/src/common/windows/http_upload.cc +++ b/src/common/windows/http_upload.cc @@ -30,11 +30,18 @@ #include // Must come first #endif +#include + #include #include #include +#include #include +#if defined(HAVE_ZLIB) +#include +#endif + // Disable exception handler warnings. #pragma warning(disable:4530) @@ -49,6 +56,56 @@ namespace { using std::ios; using std::unique_ptr; +// Compresses the contents of `data` into `deflated` using the deflate +// algorithm, if supported. Returns true on success, or false if not supported +// or in case of any error. The contents of `deflated` are undefined in the +// latter case. +bool Deflate(const string& data, string& deflated) { +#if defined(HAVE_ZLIB) + z_stream stream{}; + + // Start with an output buffer sufficient for 75% compression to avoid + // reallocations. + deflated.resize(data.size() / 4); + stream.next_in = reinterpret_cast(const_cast(data.data())); + stream.avail_in = data.size(); + stream.next_out = reinterpret_cast(&deflated[0]); + stream.avail_out = deflated.size(); + stream.data_type = Z_TEXT; + + // Z_BEST_SPEED is chosen because, in practice, it offers excellent speed with + // comparable compression for the symbol data typically being uploaded. + // Z_BEST_COMPRESSION: 2151202094 bytes compressed 84.27% in 74.440s. + // Z_DEFAULT_COMPRESSION: 2151202094 bytes compressed 84.08% in 36.016s. + // Z_BEST_SPEED: 2151202094 bytes compressed 80.39% in 13.73s. + int result = deflateInit(&stream, Z_BEST_SPEED); + if (result != Z_OK) { + return false; + } + + while (true) { + result = deflate(&stream, /*flush=*/Z_FINISH); + if (result == Z_STREAM_END) { // All data processed. + deflated.resize(stream.total_out); + break; + } + if (result != Z_OK && result != Z_BUF_ERROR) { + fwprintf(stderr, L"Compression failed with zlib error %d\n", result); + break; // Error condition. + } + // Grow `deflated` by at least 1k to accept the rest of the data. + deflated.resize(deflated.size() + std::max(stream.avail_in, 1024U)); + stream.next_out = reinterpret_cast(&deflated[stream.total_out]); + stream.avail_out = deflated.size() - stream.total_out; + } + deflateEnd(&stream); + + return result == Z_STREAM_END; +#else + return false; +#endif // defined(HAVE_ZLIB) +} + const wchar_t kUserAgent[] = L"Breakpad/1.0 (Windows)"; // Helper class which closes an internet handle when it goes away @@ -490,10 +547,20 @@ namespace google_breakpad { return false; } + static const wchar_t kNoEncoding[] = L""; + static const wchar_t kDeflateEncoding[] = L"Content-Encoding: deflate\r\n"; + const wchar_t* encoding_header = &kNoEncoding[0]; + string compressed_body; + if (Deflate(request_body, compressed_body)) { + request_body.swap(compressed_body); + encoding_header = &kDeflateEncoding[0]; + } // else deflate unsupported or failed; send the raw data. + string().swap(compressed_body); // Free memory. + return SendRequestInner( url, L"PUT", - L"", + encoding_header, request_body, timeout_ms, response_body, diff --git a/src/common/windows/http_upload.h b/src/common/windows/http_upload.h index e117840e9..0fc55e411 100644 --- a/src/common/windows/http_upload.h +++ b/src/common/windows/http_upload.h @@ -51,7 +51,8 @@ using std::map; class HTTPUpload { public: // Sends a PUT request containing the data in |path| to the given - // URL. + // URL. The data is encoded via the deflate algorithm, if support for such + // is available at build-time. // Only HTTP(S) URLs are currently supported. Returns true on success. // If the request is successful and response_body is non-NULL, // the response body will be returned in response_body. From 7f3ccb77337bc9d35b252fcddc4f6a983f80dd6b Mon Sep 17 00:00:00 2001 From: Greg Thompson Date: Mon, 8 Jul 2024 11:04:39 +0200 Subject: [PATCH 35/37] [Windows] PDBSourceLineWriter improvements COM calls that return a BSTR via an out param pass ownership of the string's underlying memory to the caller. It is the caller's responsibility to free the memory via SysFreeString. Be sure to use CComBSTR in all such cases, so that this happens upon destruction of the CComBSTR. This CL fixes three BSTR memory leaks. The inline_origins_ member maps an inline origin's name to a unique numerical identifier. Simplify the container by mapping a name only to an id rather than a name to a struct holding the name and id. These origins are printed ordered by id. Rather than using a std::set to create a container sorted by id, leverage the fact that ids are ints that are assigned sequentially to create the reverse mapping in a std::vector. This greatly reduces heap usage, and reduces complexity to O(N). Combined, these changes reduce commit charge by 26% and runtime by 10% when processing chrome.dll for a 32-bit win-dcheck build. Change-Id: Ib8d6540c74622500989f1dc06f705d6846be303f Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5682242 Reviewed-by: Ivan Penkov --- src/common/windows/pdb_source_line_writer.cc | 59 +++++++++++--------- src/common/windows/pdb_source_line_writer.h | 17 ++---- 2 files changed, 40 insertions(+), 36 deletions(-) diff --git a/src/common/windows/pdb_source_line_writer.cc b/src/common/windows/pdb_source_line_writer.cc index dd80a6d25..86f01a798 100644 --- a/src/common/windows/pdb_source_line_writer.cc +++ b/src/common/windows/pdb_source_line_writer.cc @@ -105,7 +105,7 @@ void MaybeRecordSymbol(DWORD rva, // Take the 'least' symbol by lexicographical order of the decorated name. We // use the decorated rather than undecorated name because computing the latter // is expensive. - BSTR current_name, new_name; + CComBSTR current_name, new_name; loc->second.symbol->get_name(¤t_name); symbol->get_name(&new_name); if (wcscmp(new_name, current_name) < 0) { @@ -479,6 +479,28 @@ bool PDBSourceLineWriter::Open(const wstring& file, FileFormat format) { return true; } +int PDBSourceLineWriter::GetCallsiteInlineOriginId( + CComPtr& callsite) { + wstring name; + { + CComBSTR name_bstr; + callsite->get_name(&name_bstr); + if (name.assign(name_bstr, name_bstr.Length()).empty()) { + name = L""; + } + } + + const int next_id = inline_origins_.size(); + auto iter_inserted = inline_origins_.emplace(std::move(name), next_id); + if (!iter_inserted.second) { + // `name` is already present. Return its previously-assigned unique id. + return iter_inserted.first->second; + } + // `name` was just inserted. Assign it the next id and return this value. + iter_inserted.first->second = next_id; + return next_id; +} + bool PDBSourceLineWriter::GetLine(IDiaLineNumber* dia_line, Line* line) const { if (FAILED(dia_line->get_relativeVirtualAddress(&line->rva))) { fprintf(stderr, "failed to get line rva\n"); @@ -783,17 +805,16 @@ bool PDBSourceLineWriter::PrintFunctions() { } void PDBSourceLineWriter::PrintInlineOrigins() const { - struct OriginCompare { - bool operator()(const InlineOrigin lhs, const InlineOrigin rhs) const { - return lhs.id < rhs.id; - } - }; - set origins; - // Sort by origin id. - for (auto const& origin : inline_origins_) - origins.insert(origin.second); - for (auto o : origins) { - fprintf(output_, "INLINE_ORIGIN %d %ls\n", o.id, o.name.c_str()); + // Inline origins' unique identifiers are assigned sequentially starting from + // zero. Make a reverse-mapping from ids to names, then print the names in + // order of id. + vector names_by_id(inline_origins_.size()); + for (const auto& origin : inline_origins_) { + names_by_id[origin.second] = &origin.first; + } + int id = 0; + for (const wstring* name : names_by_id) { + fprintf(output_, "INLINE_ORIGIN %d %ls\n", id++, name->c_str()); } } @@ -838,19 +859,7 @@ bool PDBSourceLineWriter::GetInlines(IDiaSymbol* block, } dia_line.Release(); } - BSTR name; - callsite->get_name(&name); - if (SysStringLen(name) == 0) { - name = SysAllocString(L""); - } - auto iter = inline_origins_.find(name); - if (iter == inline_origins_.end()) { - InlineOrigin origin; - origin.id = inline_origins_.size(); - origin.name = name; - inline_origins_[name] = origin; - } - new_inline->SetOriginId(inline_origins_[name].id); + new_inline->SetOriginId(GetCallsiteInlineOriginId(callsite)); new_inline->SetCallSiteLine(call_site_line); new_inline->SetCallSiteFileId(file_id); // Go to next level. diff --git a/src/common/windows/pdb_source_line_writer.h b/src/common/windows/pdb_source_line_writer.h index 8c74e2ca3..c4d7c0613 100644 --- a/src/common/windows/pdb_source_line_writer.h +++ b/src/common/windows/pdb_source_line_writer.h @@ -103,15 +103,6 @@ class PDBSourceLineWriter { bool UsesGUID(bool *uses_guid); private: - // InlineOrigin represents INLINE_ORIGIN record in a symbol file. It's an - // inlined function. - struct InlineOrigin { - // The unique id for an InlineOrigin. - int id; - // The name of the inlined function. - wstring name; - }; - // Line represents LINE record in a symbol file. It represents a source code // line. struct Line { @@ -196,6 +187,10 @@ class PDBSourceLineWriter { map line_map_; }; + // Returns the unique id for the inline origin with the same name as the given + // callsite, creating a new id if needed. + int GetCallsiteInlineOriginId(CComPtr& callsite); + // Construct Line from IDiaLineNumber. The output Line is stored at line. // Return true on success. bool GetLine(IDiaLineNumber* dia_line, Line* line) const; @@ -337,8 +332,8 @@ class PDBSourceLineWriter { // This maps unique filenames to file IDs. unordered_map unique_files_; - // The INLINE_ORIGINS records. The key is the function name. - std::map inline_origins_; + // The INLINE_ORIGINS records; inline origin name -> unique id. + std::map inline_origins_; // This is used for calculating post-transform symbol addresses and lengths. ImageMap image_map_; From 0c9811dd5d6d9f544c7a18fdac79b35a5faabcd1 Mon Sep 17 00:00:00 2001 From: Sean Carpenter Date: Wed, 17 Jul 2024 10:44:43 -0700 Subject: [PATCH 36/37] Adds NDS32 Architecture Support to dump_syms NDS32 is a relatively uncommon architecture that is used on a significant chunk of our ChromeOS EC Microprocessors. NDS32 symbol table generation was already supported, but would fail due to "Unrecognized ELF machine architecture". This change fixes that. TEST=dump_syms -a some.nds32.elf BUG=b:328511413 Change-Id: I3e9ae900be483c957a824166d24e2d5db240b64a Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5718928 Reviewed-by: Forest Mittelberg Reviewed-by: Ivan Penkov --- src/common/linux/dump_symbols.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/linux/dump_symbols.cc b/src/common/linux/dump_symbols.cc index 5bb434319..55fa98469 100644 --- a/src/common/linux/dump_symbols.cc +++ b/src/common/linux/dump_symbols.cc @@ -1118,6 +1118,7 @@ const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) { case EM_SPARCV9: return "sparcv9"; case EM_X86_64: return "x86_64"; case EM_RISCV: return "riscv"; + case EM_NDS32: return "nds32"; default: return NULL; } } From 81819541a78c49e9109d2267462775e801f89ce6 Mon Sep 17 00:00:00 2001 From: Greg Thompson Date: Thu, 18 Jul 2024 09:23:54 +0200 Subject: [PATCH 37/37] [windows] Include-what-you-use fixes in http_upload.{cc,h} Bug: b/353869880 Change-Id: I7dd2d432032099c77410ce67a0fce6c7a71b473f Reviewed-on: https://chromium-review.googlesource.com/c/breakpad/breakpad/+/5720428 Reviewed-by: Ivan Penkov --- src/common/windows/http_upload.cc | 4 +++- src/common/windows/http_upload.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/common/windows/http_upload.cc b/src/common/windows/http_upload.cc index cade22ec8..22250f502 100644 --- a/src/common/windows/http_upload.cc +++ b/src/common/windows/http_upload.cc @@ -31,6 +31,9 @@ #endif #include +#include +#include +#include #include #include @@ -53,7 +56,6 @@ namespace { using std::string; using std::wstring; using std::map; - using std::ios; using std::unique_ptr; // Compresses the contents of `data` into `deflated` using the deflate diff --git a/src/common/windows/http_upload.h b/src/common/windows/http_upload.h index 0fc55e411..4a2e9bf2f 100644 --- a/src/common/windows/http_upload.h +++ b/src/common/windows/http_upload.h @@ -41,6 +41,7 @@ #include #include +#include namespace google_breakpad {