Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement symbol downloading without symchk.exe #4

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
/target/
.vscode/
**/*.rs.bk
manifest
symbols
Expand Down
7 changes: 7 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,14 @@
name = "pdblister"
version = "0.1.0"
authors = ["Gogs <gogs@fake.local>"]
edition = "2018"

[dependencies]
rand = "0.3"
futures = "0.3.5"
reqwest = "0.10.8"
indicatif = "0.15.0"

[dependencies.tokio]
version = "1.1.1"
features = ["fs", "rt-multi-thread", "io-util", "macros"]
61 changes: 10 additions & 51 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,17 @@

extern crate rand;

use rand::{thread_rng, Rng};
// use rand::{thread_rng, Rng};

use std::io;
use std::env;
use std::time::Instant;
use std::thread;
use std::process::Command;
use std::fs::File;
use std::io::{Read, Write, Seek, SeekFrom};
use std::path::{Path, PathBuf};

mod sym;

const USAGE: &'static str =
"Usage:

Expand Down Expand Up @@ -272,7 +272,7 @@ fn contains(range: &std::ops::Range<u32>, item: u32) -> bool
}

fn parse_pe(filename: &Path) ->
Result<(File, MZHeader, PEHeader, u32, u32), Box<std::error::Error>>
Result<(File, MZHeader, PEHeader, u32, u32), Box<dyn std::error::Error>>
{
let mut fd = File::open(filename)?;

Expand Down Expand Up @@ -310,7 +310,7 @@ fn parse_pe(filename: &Path) ->
Ok((fd, mz_header, pe_header, image_size, num_tables))
}

fn get_file_path(filename: &Path) -> Result<String, Box<std::error::Error>>
fn get_file_path(filename: &Path) -> Result<String, Box<dyn std::error::Error>>
{
let (_, _, pe_header, image_size, _) = parse_pe(filename)?;

Expand All @@ -337,7 +337,7 @@ fn get_file_path(filename: &Path) -> Result<String, Box<std::error::Error>>
///
/// Returns a string which is the same representation you get from `symchk`
/// when outputting a manifest for the PDB "<filename>,<guid><age>,1"
fn get_pdb(filename: &Path) -> Result<String, Box<std::error::Error>>
fn get_pdb(filename: &Path) -> Result<String, Box<dyn std::error::Error>>
{
let (mut fd, mz_header, pe_header, _, num_tables) =
parse_pe(filename)?;
Expand Down Expand Up @@ -470,14 +470,6 @@ fn get_pdb(filename: &Path) -> Result<String, Box<std::error::Error>>
Err("Failed to find RSDS codeview directory".into())
}

fn download_worker(filename: PathBuf, sympath: String)
{
let _ = Command::new("symchk").args(
&["/im", filename.to_str().unwrap(), "/s",
sympath.as_str()]).
output().expect("Failed to run command");
}

fn main()
{
let args: Vec<String> = env::args().collect();
Expand Down Expand Up @@ -515,8 +507,6 @@ fn main()
expect("Failed to write pdbs to manifest file");

} else if args.len() == 3 && args[1] == "download" {
const NUM_PIECES: usize = 64;

/* Read the entire manifest file into a string */
let mut buf = String::new();
let mut fd = match File::open("manifest") {
Expand All @@ -538,48 +528,17 @@ fn main()
return;
}

/* Calculate number of entries per temporary manifest to split into
* NUM_PIECES chunks.
*/
let chunk_size = (lines.len() + NUM_PIECES - 1) / NUM_PIECES;

print!("Original manifest has {} PDBs\n", lines.len());

lines.sort();
lines.dedup();

print!("Deduped manifest has {} PDBs\n", lines.len());

/* Shuffle filenames so files are not biased to the downloader based
* on name. This should lead to download threads having more even
* work.
*/
thread_rng().shuffle(&mut lines);

/* Create worker threads downloading each chunk using symchk */
let mut threads = Vec::new();
for (ii, lines) in lines.chunks(chunk_size).enumerate() {
let mut tmp_path = env::temp_dir();
tmp_path.push(format!("manifest_{:04}", ii));

{
/* Create chunked manifest file */
let mut fd = File::create(&tmp_path).
expect("Failed to create split manifest");
fd.write_all(lines.join("\n").as_bytes()).
expect("Failed to write split manifest");
}

/* Create worker */
let sympath = args[2].clone();
threads.push(thread::spawn(move || {
download_worker(tmp_path, sympath);
}));
}

/* Wait for all threads to complete. Discard return status */
for thr in threads {
let _ = thr.join();
match sym::download_manifest(args[2].clone(), lines)
{
Ok(_) => println!("Success!"),
Err(e) => println!("Failed: {}", e),
}
} else if args.len() == 3 && args[1] == "filestore" {
/* List all files in the directory specified by args[2] */
Expand Down
165 changes: 165 additions & 0 deletions src/sym.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
#![allow(unknown_lints)]
#![warn(clippy::all)]

#![allow(clippy::needless_return)]

use std::str::FromStr;
use std::error::Error;

extern crate futures;
extern crate reqwest;
extern crate tokio;
extern crate indicatif;

use indicatif::{ProgressBar,ProgressStyle};

use futures::stream::StreamExt;

struct SymSrv {
server: String,
filepath: String,
}

impl FromStr for SymSrv {
type Err = Box<dyn Error>;

fn from_str(srv: &str) -> Result<Self, Self::Err> {
// Split the path out by asterisks.
let directives: Vec<&str> = srv.split('*').collect();

// Ensure that the path starts with `SRV*` - the only form we currently support.
match directives.first() {
// Simply exit the match statement if the directive is "SRV"
Some(x) => {
if "SRV" == *x {
if directives.len() != 3 {
return Err("".into());
}

// Alright, the directive is of the proper form. Return the server and filepath.
return Ok(SymSrv {
server: directives[2].to_string(),
filepath: directives[1].to_string(),
});
}

},

None => {
return Err("Unsupported server string form".into());
}
};

unreachable!();
}
}

fn parse_servers(srvstr: String) -> Result<Vec<SymSrv>, Box<dyn Error>> {
let server_list: Vec<&str> = srvstr.split(';').collect();
if server_list.is_empty() {
return Err("Invalid server string!".into());
}

let symbol_servers = server_list.into_iter().map(|symstr| {
return symstr.parse::<SymSrv>();
}).collect();

return symbol_servers;
}

pub fn download_manifest(srvstr: String, files: Vec<String>) -> Result<(), Box<dyn Error>> {
// First, parse the server string to figure out where we're supposed to fetch symbols from,
// and where to.
let srvs = parse_servers(srvstr)?;
if srvs.len() != 1 {
return Err("Only one symbol server/path supported at this time.".into());
}

let srv = &srvs[0];

// Create the directory first, if it does not exist.
std::fs::create_dir_all(srv.filepath.clone())?;

// http://patshaughnessy.net/2020/1/20/downloading-100000-files-using-async-rust
// The following code is based off of the above blog post.
let client = reqwest::Client::new();

// Create a progress bar.
let pb = ProgressBar::new(files.len() as u64);

pb.set_style(
ProgressStyle::default_bar().
template("[{elapsed_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} ({eta}) {msg}")
.progress_chars("##-")
);

// Set up our asynchronous code block.
// This block will be lazily executed when something awaits on it, such as the tokio thread pool below.
let queries = futures::stream::iter(
// Map the files vector using a closure, such that it's converted from a Vec<String>
// into a Vec<Result<T, E>>
files.into_iter().map(|line| {
// Take explicit references to a few variables and move them into the async block.
let client = &client;
let srv = &srv;
let pb = pb.clone();

async move {
// Break out the filename into the separate components.
let el: Vec<&str> = line.split(',').collect();
if el.len() != 3 {
panic!("Invalid manifest line encountered: \"{}\"", line);
}

// Create the directory tree.
std::fs::create_dir_all(format!("{}/{}/{}", srv.filepath, el[0], el[1]))?;

let pdbpath = format!("{}/{}/{}", el[0], el[1], el[0]);

// Check to see if the file already exists. If so, skip it.
if std::path::Path::new(&format!("{}/{}", srv.filepath, pdbpath)).exists() {
pb.inc(1);
return Ok(());
}

// println!("{}/{}", el[0], el[1]);
pb.set_message(format!("{}/{}", el[1], el[0]).as_str());
pb.inc(1);

// Attempt to retrieve the file.
let req = client.get::<&str>(&format!("{}/{}", srv.server, pdbpath).to_string()).send().await?;
if req.status() != 200 {
return Err(format!("File {} - Code {}", pdbpath, req.status()).into());
}

// Create the output file.
let mut file = tokio::fs::File::create(format!("{}/{}", srv.filepath, pdbpath).to_string()).await?;
tokio::io::copy(&mut req.bytes().await?.as_ref(), &mut file).await?;

return Ok(());
}
})
).buffer_unordered(64).collect::<Vec<Result<(), Box<dyn Error>>>>();

// N.B: The buffer_unordered bit above allows us to feed in 64 requests at a time to tokio.
// That way we don't exhaust system resources in the networking stack or filesystem.

// Start up a tokio runtime and run through the requests.
let mut rt = tokio::runtime::Runtime::new()?;
let output = rt.block_on(queries);

pb.finish();

// Collect output results.
output.iter().for_each(|x| {
match x {
Err(res) => {
println!("{}", res);
}

Ok(_) => (),
}
});

return Ok(());
}