Plan 9 from Bell Labs’s /usr/web/sources/contrib/stallion/root/arm/go/src/cmd/vendor/github.com/google/pprof/internal/binutils/binutils.go

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


// Copyright 2014 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package binutils provides access to the GNU binutils.
package binutils

import (
	"debug/elf"
	"debug/macho"
	"encoding/binary"
	"fmt"
	"io"
	"os"
	"os/exec"
	"path/filepath"
	"regexp"
	"runtime"
	"strings"
	"sync"

	"github.com/google/pprof/internal/elfexec"
	"github.com/google/pprof/internal/plugin"
)

// A Binutils implements plugin.ObjTool by invoking the GNU binutils.
type Binutils struct {
	mu  sync.Mutex
	rep *binrep
}

// binrep is an immutable representation for Binutils.  It is atomically
// replaced on every mutation to provide thread-safe access.
type binrep struct {
	// Commands to invoke.
	llvmSymbolizer      string
	llvmSymbolizerFound bool
	addr2line           string
	addr2lineFound      bool
	nm                  string
	nmFound             bool
	objdump             string
	objdumpFound        bool

	// if fast, perform symbolization using nm (symbol names only),
	// instead of file-line detail from the slower addr2line.
	fast bool
}

// get returns the current representation for bu, initializing it if necessary.
func (bu *Binutils) get() *binrep {
	bu.mu.Lock()
	r := bu.rep
	if r == nil {
		r = &binrep{}
		initTools(r, "")
		bu.rep = r
	}
	bu.mu.Unlock()
	return r
}

// update modifies the rep for bu via the supplied function.
func (bu *Binutils) update(fn func(r *binrep)) {
	r := &binrep{}
	bu.mu.Lock()
	defer bu.mu.Unlock()
	if bu.rep == nil {
		initTools(r, "")
	} else {
		*r = *bu.rep
	}
	fn(r)
	bu.rep = r
}

// String returns string representation of the binutils state for debug logging.
func (bu *Binutils) String() string {
	r := bu.get()
	var llvmSymbolizer, addr2line, nm, objdump string
	if r.llvmSymbolizerFound {
		llvmSymbolizer = r.llvmSymbolizer
	}
	if r.addr2lineFound {
		addr2line = r.addr2line
	}
	if r.nmFound {
		nm = r.nm
	}
	if r.objdumpFound {
		objdump = r.objdump
	}
	return fmt.Sprintf("llvm-symbolizer=%q addr2line=%q nm=%q objdump=%q fast=%t",
		llvmSymbolizer, addr2line, nm, objdump, r.fast)
}

// SetFastSymbolization sets a toggle that makes binutils use fast
// symbolization (using nm), which is much faster than addr2line but
// provides only symbol name information (no file/line).
func (bu *Binutils) SetFastSymbolization(fast bool) {
	bu.update(func(r *binrep) { r.fast = fast })
}

// SetTools processes the contents of the tools option. It
// expects a set of entries separated by commas; each entry is a pair
// of the form t:path, where cmd will be used to look only for the
// tool named t. If t is not specified, the path is searched for all
// tools.
func (bu *Binutils) SetTools(config string) {
	bu.update(func(r *binrep) { initTools(r, config) })
}

func initTools(b *binrep, config string) {
	// paths collect paths per tool; Key "" contains the default.
	paths := make(map[string][]string)
	for _, t := range strings.Split(config, ",") {
		name, path := "", t
		if ct := strings.SplitN(t, ":", 2); len(ct) == 2 {
			name, path = ct[0], ct[1]
		}
		paths[name] = append(paths[name], path)
	}

	defaultPath := paths[""]
	b.llvmSymbolizer, b.llvmSymbolizerFound = findExe("llvm-symbolizer", append(paths["llvm-symbolizer"], defaultPath...))
	b.addr2line, b.addr2lineFound = findExe("addr2line", append(paths["addr2line"], defaultPath...))
	if !b.addr2lineFound {
		// On MacOS, brew installs addr2line under gaddr2line name, so search for
		// that if the tool is not found by its default name.
		b.addr2line, b.addr2lineFound = findExe("gaddr2line", append(paths["addr2line"], defaultPath...))
	}
	b.nm, b.nmFound = findExe("nm", append(paths["nm"], defaultPath...))
	b.objdump, b.objdumpFound = findExe("objdump", append(paths["objdump"], defaultPath...))
}

// findExe looks for an executable command on a set of paths.
// If it cannot find it, returns cmd.
func findExe(cmd string, paths []string) (string, bool) {
	for _, p := range paths {
		cp := filepath.Join(p, cmd)
		if c, err := exec.LookPath(cp); err == nil {
			return c, true
		}
	}
	return cmd, false
}

// Disasm returns the assembly instructions for the specified address range
// of a binary.
func (bu *Binutils) Disasm(file string, start, end uint64) ([]plugin.Inst, error) {
	b := bu.get()
	cmd := exec.Command(b.objdump, "-d", "-C", "--no-show-raw-insn", "-l",
		fmt.Sprintf("--start-address=%#x", start),
		fmt.Sprintf("--stop-address=%#x", end),
		file)
	out, err := cmd.Output()
	if err != nil {
		return nil, fmt.Errorf("%v: %v", cmd.Args, err)
	}

	return disassemble(out)
}

// Open satisfies the plugin.ObjTool interface.
func (bu *Binutils) Open(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
	b := bu.get()

	// Make sure file is a supported executable.
	// This uses magic numbers, mainly to provide better error messages but
	// it should also help speed.

	if _, err := os.Stat(name); err != nil {
		// For testing, do not require file name to exist.
		if strings.Contains(b.addr2line, "testdata/") {
			return &fileAddr2Line{file: file{b: b, name: name}}, nil
		}
		return nil, err
	}

	// Read the first 4 bytes of the file.

	f, err := os.Open(name)
	if err != nil {
		return nil, fmt.Errorf("error opening %s: %v", name, err)
	}
	defer f.Close()

	var header [4]byte
	if _, err = io.ReadFull(f, header[:]); err != nil {
		return nil, fmt.Errorf("error reading magic number from %s: %v", name, err)
	}

	elfMagic := string(header[:])

	// Match against supported file types.
	if elfMagic == elf.ELFMAG {
		f, err := b.openELF(name, start, limit, offset)
		if err != nil {
			return nil, fmt.Errorf("error reading ELF file %s: %v", name, err)
		}
		return f, nil
	}

	// Mach-O magic numbers can be big or little endian.
	machoMagicLittle := binary.LittleEndian.Uint32(header[:])
	machoMagicBig := binary.BigEndian.Uint32(header[:])

	if machoMagicLittle == macho.Magic32 || machoMagicLittle == macho.Magic64 ||
		machoMagicBig == macho.Magic32 || machoMagicBig == macho.Magic64 {
		f, err := b.openMachO(name, start, limit, offset)
		if err != nil {
			return nil, fmt.Errorf("error reading Mach-O file %s: %v", name, err)
		}
		return f, nil
	}
	if machoMagicLittle == macho.MagicFat || machoMagicBig == macho.MagicFat {
		f, err := b.openFatMachO(name, start, limit, offset)
		if err != nil {
			return nil, fmt.Errorf("error reading fat Mach-O file %s: %v", name, err)
		}
		return f, nil
	}

	return nil, fmt.Errorf("unrecognized binary format: %s", name)
}

func (b *binrep) openMachOCommon(name string, of *macho.File, start, limit, offset uint64) (plugin.ObjFile, error) {

	// Subtract the load address of the __TEXT section. Usually 0 for shared
	// libraries or 0x100000000 for executables. You can check this value by
	// running `objdump -private-headers <file>`.

	textSegment := of.Segment("__TEXT")
	if textSegment == nil {
		return nil, fmt.Errorf("could not identify base for %s: no __TEXT segment", name)
	}
	if textSegment.Addr > start {
		return nil, fmt.Errorf("could not identify base for %s: __TEXT segment address (0x%x) > mapping start address (0x%x)",
			name, textSegment.Addr, start)
	}

	base := start - textSegment.Addr

	if b.fast || (!b.addr2lineFound && !b.llvmSymbolizerFound) {
		return &fileNM{file: file{b: b, name: name, base: base}}, nil
	}
	return &fileAddr2Line{file: file{b: b, name: name, base: base}}, nil
}

func (b *binrep) openFatMachO(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
	of, err := macho.OpenFat(name)
	if err != nil {
		return nil, fmt.Errorf("error parsing %s: %v", name, err)
	}
	defer of.Close()

	if len(of.Arches) == 0 {
		return nil, fmt.Errorf("empty fat Mach-O file: %s", name)
	}

	var arch macho.Cpu
	// Use the host architecture.
	// TODO: This is not ideal because the host architecture may not be the one
	// that was profiled. E.g. an amd64 host can profile a 386 program.
	switch runtime.GOARCH {
	case "386":
		arch = macho.Cpu386
	case "amd64", "amd64p32":
		arch = macho.CpuAmd64
	case "arm", "armbe", "arm64", "arm64be":
		arch = macho.CpuArm
	case "ppc":
		arch = macho.CpuPpc
	case "ppc64", "ppc64le":
		arch = macho.CpuPpc64
	default:
		return nil, fmt.Errorf("unsupported host architecture for %s: %s", name, runtime.GOARCH)
	}
	for i := range of.Arches {
		if of.Arches[i].Cpu == arch {
			return b.openMachOCommon(name, of.Arches[i].File, start, limit, offset)
		}
	}
	return nil, fmt.Errorf("architecture not found in %s: %s", name, runtime.GOARCH)
}

func (b *binrep) openMachO(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
	of, err := macho.Open(name)
	if err != nil {
		return nil, fmt.Errorf("error parsing %s: %v", name, err)
	}
	defer of.Close()

	return b.openMachOCommon(name, of, start, limit, offset)
}

func (b *binrep) openELF(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
	ef, err := elf.Open(name)
	if err != nil {
		return nil, fmt.Errorf("error parsing %s: %v", name, err)
	}
	defer ef.Close()

	var stextOffset *uint64
	var pageAligned = func(addr uint64) bool { return addr%4096 == 0 }
	if strings.Contains(name, "vmlinux") || !pageAligned(start) || !pageAligned(limit) || !pageAligned(offset) {
		// Reading all Symbols is expensive, and we only rarely need it so
		// we don't want to do it every time. But if _stext happens to be
		// page-aligned but isn't the same as Vaddr, we would symbolize
		// wrong. So if the name the addresses aren't page aligned, or if
		// the name is "vmlinux" we read _stext. We can be wrong if: (1)
		// someone passes a kernel path that doesn't contain "vmlinux" AND
		// (2) _stext is page-aligned AND (3) _stext is not at Vaddr
		symbols, err := ef.Symbols()
		if err != nil && err != elf.ErrNoSymbols {
			return nil, err
		}
		for _, s := range symbols {
			if s.Name == "_stext" {
				// The kernel may use _stext as the mapping start address.
				stextOffset = &s.Value
				break
			}
		}
	}

	base, err := elfexec.GetBase(&ef.FileHeader, elfexec.FindTextProgHeader(ef), stextOffset, start, limit, offset)
	if err != nil {
		return nil, fmt.Errorf("could not identify base for %s: %v", name, err)
	}

	buildID := ""
	if f, err := os.Open(name); err == nil {
		if id, err := elfexec.GetBuildID(f); err == nil {
			buildID = fmt.Sprintf("%x", id)
		}
	}
	if b.fast || (!b.addr2lineFound && !b.llvmSymbolizerFound) {
		return &fileNM{file: file{b, name, base, buildID}}, nil
	}
	return &fileAddr2Line{file: file{b, name, base, buildID}}, nil
}

// file implements the binutils.ObjFile interface.
type file struct {
	b       *binrep
	name    string
	base    uint64
	buildID string
}

func (f *file) Name() string {
	return f.name
}

func (f *file) Base() uint64 {
	return f.base
}

func (f *file) BuildID() string {
	return f.buildID
}

func (f *file) SourceLine(addr uint64) ([]plugin.Frame, error) {
	return []plugin.Frame{}, nil
}

func (f *file) Close() error {
	return nil
}

func (f *file) Symbols(r *regexp.Regexp, addr uint64) ([]*plugin.Sym, error) {
	// Get from nm a list of symbols sorted by address.
	cmd := exec.Command(f.b.nm, "-n", f.name)
	out, err := cmd.Output()
	if err != nil {
		return nil, fmt.Errorf("%v: %v", cmd.Args, err)
	}

	return findSymbols(out, f.name, r, addr)
}

// fileNM implements the binutils.ObjFile interface, using 'nm' to map
// addresses to symbols (without file/line number information). It is
// faster than fileAddr2Line.
type fileNM struct {
	file
	addr2linernm *addr2LinerNM
}

func (f *fileNM) SourceLine(addr uint64) ([]plugin.Frame, error) {
	if f.addr2linernm == nil {
		addr2liner, err := newAddr2LinerNM(f.b.nm, f.name, f.base)
		if err != nil {
			return nil, err
		}
		f.addr2linernm = addr2liner
	}
	return f.addr2linernm.addrInfo(addr)
}

// fileAddr2Line implements the binutils.ObjFile interface, using
// llvm-symbolizer, if that's available, or addr2line to map addresses to
// symbols (with file/line number information). It can be slow for large
// binaries with debug information.
type fileAddr2Line struct {
	once sync.Once
	file
	addr2liner     *addr2Liner
	llvmSymbolizer *llvmSymbolizer
}

func (f *fileAddr2Line) SourceLine(addr uint64) ([]plugin.Frame, error) {
	f.once.Do(f.init)
	if f.llvmSymbolizer != nil {
		return f.llvmSymbolizer.addrInfo(addr)
	}
	if f.addr2liner != nil {
		return f.addr2liner.addrInfo(addr)
	}
	return nil, fmt.Errorf("could not find local addr2liner")
}

func (f *fileAddr2Line) init() {
	if llvmSymbolizer, err := newLLVMSymbolizer(f.b.llvmSymbolizer, f.name, f.base); err == nil {
		f.llvmSymbolizer = llvmSymbolizer
		return
	}

	if addr2liner, err := newAddr2Liner(f.b.addr2line, f.name, f.base); err == nil {
		f.addr2liner = addr2liner

		// When addr2line encounters some gcc compiled binaries, it
		// drops interesting parts of names in anonymous namespaces.
		// Fallback to NM for better function names.
		if nm, err := newAddr2LinerNM(f.b.nm, f.name, f.base); err == nil {
			f.addr2liner.nm = nm
		}
	}
}

func (f *fileAddr2Line) Close() error {
	if f.llvmSymbolizer != nil {
		f.llvmSymbolizer.rw.close()
		f.llvmSymbolizer = nil
	}
	if f.addr2liner != nil {
		f.addr2liner.rw.close()
		f.addr2liner = nil
	}
	return nil
}

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to webmaster@9p.io.