From d21ac03a0cd41a5f46caaa7fc59907407ffaf007 Mon Sep 17 00:00:00 2001 From: monster29000 <1067752807@qq.com> Date: Sat, 17 Aug 2024 08:05:10 +0800 Subject: [PATCH] It is recommended to use np.asarray instead of np.array to avoid unnecessary copies of the data --- .../dataset_evaluator/dataset_evaluator.py | 26 +++++++++---------- .../colossal_eval/models/huggingface.py | 2 +- .../tensor_shard/solver/solver.py | 12 ++++----- colossalai/device/calc_pipeline_strategy.py | 6 ++--- .../inference/dynamic_batching/req_queue.py | 4 +-- colossalai/zero/gemini/chunk/search_utils.py | 2 +- .../images/diffusion/ldm/data/imagenet.py | 12 ++++----- examples/images/diffusion/ldm/data/lsun.py | 4 +-- .../ldm/modules/diffusionmodules/util.py | 2 +- .../ldm/modules/image_degradation/bsrgan.py | 12 ++++----- .../modules/image_degradation/bsrgan_light.py | 12 ++++----- .../modules/image_degradation/utils_image.py | 2 +- examples/images/diffusion/ldm/util.py | 2 +- examples/images/diffusion/scripts/img2img.py | 2 +- examples/images/diffusion/scripts/inpaint.py | 4 +-- examples/images/diffusion/scripts/txt2img.py | 2 +- .../dreambooth/train_dreambooth_inpaint.py | 4 +-- .../tutorial/auto_parallel/bench_utils.py | 4 +-- .../data/datasets/blendable_dataset.py | 2 +- .../data/datasets/dataset_utils.py | 10 +++---- .../data/datasets/ict_dataset.py | 2 +- .../data/datasets/indexed_dataset.py | 12 ++++----- .../test_layers/test_cache_embedding.py | 4 +-- .../test_gemini/test_runtime_mem_tracer.py | 2 +- 24 files changed, 73 insertions(+), 73 deletions(-) diff --git a/applications/ColossalEval/colossal_eval/evaluate/dataset_evaluator/dataset_evaluator.py b/applications/ColossalEval/colossal_eval/evaluate/dataset_evaluator/dataset_evaluator.py index 37dbac3cf925..8b15d4a55293 100644 --- a/applications/ColossalEval/colossal_eval/evaluate/dataset_evaluator/dataset_evaluator.py +++ b/applications/ColossalEval/colossal_eval/evaluate/dataset_evaluator/dataset_evaluator.py @@ -60,7 +60,7 @@ def _calculate_label_metrics(self, metric: str, category: str): flag = False logits = [] for i, sample in enumerate(self.data[category]["data"]): - if np.any(np.isnan(np.array(list(sample["logits_over_choices"].values())))): + if np.any(np.isnan(np.asarray(list(sample["logits_over_choices"].values())))): if not flag: print( f"NaN in the logits, switch to exact match for category {category} in dataset {self.dataset_name} in model {self.model_name}." @@ -81,10 +81,10 @@ def _calculate_label_metrics(self, metric: str, category: str): ) logits.append(references[i] if score == 1 else -1) else: - logits.append(np.argmax(np.array(list(sample["logits_over_choices"].values())))) + logits.append(np.argmax(np.asarray(list(sample["logits_over_choices"].values())))) - references = np.array(references) - logits = np.array(logits) + references = np.asarray(references) + logits = np.asarray(logits) scores = np.sum(references == logits) / len(self.data[category]["data"]) * 100 self.evaluation_results[metric][category] = (scores, len(self.data[category]["data"])) @@ -107,7 +107,7 @@ def _calculate_combined_metrics(self, metric: str, category: str): flag = False logits = [] for i, sample in enumerate(self.data[category]["data"]): - if np.any(np.isnan(np.array(list(sample["logits_over_choices"].values())))): + if np.any(np.isnan(np.asarray(list(sample["logits_over_choices"].values())))): if not flag: print( f"NaN in the logits, switch to exact match for category {category} in dataset {self.dataset_name} in model {self.model_name}." @@ -123,7 +123,7 @@ def _calculate_combined_metrics(self, metric: str, category: str): ) logits.append(references[i] if score == 1 else -1) else: - logits.append(np.argmax(np.array(list(sample["logits_over_choices"].values())))) + logits.append(np.argmax(np.asarray(list(sample["logits_over_choices"].values())))) metric_method = eval("metric_helper." + metric) @@ -194,21 +194,21 @@ def _calculate_loss_metrics(self, metric: str, category: str): if metric == "perplexity": weight = len(self.data[category]["data"]) / self.metric_total_length[metric] losses = [min(sample["loss"]) for sample in self.data[category]["data"]] - perplexity = np.mean(np.exp(np.array(losses))) + perplexity = np.mean(np.exp(np.asarray(losses))) self.evaluation_results["perplexity"][category] = (perplexity, len(self.data[category]["data"])) self.evaluation_results["perplexity"]["ALL"] += perplexity * weight elif metric == "ppl_score": weight = len(self.data[category]["data"]) / self.metric_total_length[metric] losses = [min(sample["loss"]) for sample in self.data[category]["data"]] - perplexity_score = np.mean(np.exp(-np.array(losses))) * 100 + perplexity_score = np.mean(np.exp(-np.asarray(losses))) * 100 self.evaluation_results["ppl_score"][category] = (perplexity_score, len(self.data[category]["data"])) self.evaluation_results["ppl_score"]["ALL"] += perplexity_score * weight elif metric == "ppl_score_over_choices" and self.data[category]["inference_kwargs"]["all_classes"] is not None: weight = len(self.data[category]["data"]) / self.metric_total_length[metric] loss_over_choices = [sample["loss_over_choices"] for sample in self.data[category]["data"]] - perplexity_score_over_choices = np.mean(np.exp(-np.array(loss_over_choices))) * 100 + perplexity_score_over_choices = np.mean(np.exp(-np.asarray(loss_over_choices))) * 100 self.evaluation_results["ppl_score_over_choices"][category] = ( perplexity_score_over_choices, @@ -218,14 +218,14 @@ def _calculate_loss_metrics(self, metric: str, category: str): elif metric == "per_byte_perplexity": weight = len(self.data[category]["data"]) / self.metric_total_length[metric] losses = [min(sample["loss_sum"]) for sample in self.data[category]["data"]] - perplexity = np.mean(np.exp(np.array(losses) / np.array(self.N_bytes[category]))) + perplexity = np.mean(np.exp(np.asarray(losses) / np.asarray(self.N_bytes[category]))) self.evaluation_results["per_byte_perplexity"][category] = perplexity self.evaluation_results["per_byte_perplexity"]["ALL"] += perplexity * weight elif metric == "per_byte_ppl_score": weight = len(self.data[category]["data"]) / self.metric_total_length[metric] losses = [min(sample["loss_sum"]) for sample in self.data[category]["data"]] - perplexity_score = np.mean(np.exp(-np.array(losses) / np.array(self.N_bytes[category]))) * 100 + perplexity_score = np.mean(np.exp(-np.asarray(losses) / np.asarray(self.N_bytes[category]))) * 100 self.evaluation_results["per_byte_ppl_score"][category] = perplexity_score self.evaluation_results["per_byte_ppl_score"]["ALL"] += perplexity_score * weight @@ -233,14 +233,14 @@ def _calculate_loss_metrics(self, metric: str, category: str): weight = len(self.data[category]["data"]) / self.metric_total_length[metric] losses = [min(sample["loss_sum"]) for sample in self.data[category]["data"]] token_nums = [sample["token_num"][np.argmin(sample["loss_sum"])] for sample in self.data[category]["data"]] - perplexity = np.sum(np.array(losses)) / np.sum(np.array(token_nums)) + perplexity = np.sum(np.asarray(losses)) / np.sum(np.asarray(token_nums)) self.evaluation_results["loss_over_all_tokens"][category] = perplexity self.evaluation_results["loss_over_all_tokens"]["ALL"] += perplexity * weight # The number of tokens can be used for normalizing. # See https://github.com/SkyworkAI/Skywork/issues/43#issuecomment-1811733834 - print(f"{self.model_name} {category} token num: {np.sum(np.array(token_nums))}") + print(f"{self.model_name} {category} token num: {np.sum(np.asarray(token_nums))}") def _evaluate(self): """Calculate and return evaluation results""" diff --git a/applications/ColossalEval/colossal_eval/models/huggingface.py b/applications/ColossalEval/colossal_eval/models/huggingface.py index e91743525f0e..ad1782764456 100644 --- a/applications/ColossalEval/colossal_eval/models/huggingface.py +++ b/applications/ColossalEval/colossal_eval/models/huggingface.py @@ -422,7 +422,7 @@ def inference(self, data_loader: DataLoader, inference_kwargs: Dict[str, Any], d batch[j]["loss_over_choices"] = loss_over_choices[j] if calculate_loss: - batch[j]["loss"] = (np.array(batch_losses[j]) / np.array(batch_target_token_nums[j])).tolist() + batch[j]["loss"] = (np.asarray(batch_losses[j]) / np.asarray(batch_target_token_nums[j])).tolist() # loss_sum is specially used for pertrain dataset for calculating per-byte-perplexity. # However, loss (which is per sample loss) suffices for most cases. diff --git a/colossalai/auto_parallel/tensor_shard/solver/solver.py b/colossalai/auto_parallel/tensor_shard/solver/solver.py index 088d1acb5177..3dec190c9a69 100644 --- a/colossalai/auto_parallel/tensor_shard/solver/solver.py +++ b/colossalai/auto_parallel/tensor_shard/solver/solver.py @@ -109,7 +109,7 @@ def _prepare_data_for_solver(self): strategies_len = [] for node in self.nodes: strategies_len.append(self.cost_graph.node_lens[node]) - strategies_len = np.array(strategies_len) + strategies_len = np.asarray(strategies_len) # prepare following_nodes following_nodes = self.cost_graph.following_dict @@ -137,8 +137,8 @@ def _prepare_data_for_solver(self): for i in range(strategies_len[src_node_index]): for j in range(strategies_len[dst_node_index]): resharding_costs.append(edge_cost[(i, j)]) - edge_pairs = np.array(edge_pairs) - resharding_costs = np.array(resharding_costs) + edge_pairs = np.asarray(edge_pairs) + resharding_costs = np.asarray(resharding_costs) # prepare liveness_set liveness_set = self.liveness_list @@ -184,9 +184,9 @@ def _prepare_data_for_solver(self): communication_costs.append(origin_communication_cost) memory_costs.append(memory_cost) - compute_costs = np.array(compute_costs) - communication_costs = np.array(communication_costs) - memory_costs = np.array(memory_costs) + compute_costs = np.asarray(compute_costs) + communication_costs = np.asarray(communication_costs) + memory_costs = np.asarray(memory_costs) # omit initial value for nodes s_init_np = None diff --git a/colossalai/device/calc_pipeline_strategy.py b/colossalai/device/calc_pipeline_strategy.py index 72d432701ada..ae060b5d4f74 100644 --- a/colossalai/device/calc_pipeline_strategy.py +++ b/colossalai/device/calc_pipeline_strategy.py @@ -49,7 +49,7 @@ def alpa_dp_impl( for k in range(num_layers - 1, -1, -1): for d in range(1, num_devices + 1): for m, submesh in enumerate(submesh_choices): - n_submesh_devices = np.prod(np.array(submesh)) + n_submesh_devices = np.prod(np.asarray(submesh)) if n_submesh_devices <= d: # TODO: [luzgh]: Why alpa needs max_n_succ_stages? Delete. # if s - 1 <= max_n_succ_stages[i, k - 1, m, n_config]: @@ -83,7 +83,7 @@ def alpa_dp_impl( res.append(((current_layer, next_start_layer), submesh_choice, autosharding_choice)) current_s -= 1 current_layer = next_start_layer - current_devices -= np.prod(np.array(submesh_choices[submesh_choice])) + current_devices -= np.prod(np.asarray(submesh_choices[submesh_choice])) assert current_s == 0 and current_layer == num_layers and current_devices == 0 return total_cost, res @@ -98,7 +98,7 @@ def alpa_dp( Arguments: submesh_choices: List[(int,int)] num_autosharding_configs: Max number of t_intra(start_layer, end_layer, LogicalMesh) - compute_cost: np.array(num_layers,num_layers,num_submesh_choices,num_autosharding_configs) + compute_cost: np.asarray(num_layers,num_layers,num_submesh_choices,num_autosharding_configs) """ assert np.shape(compute_cost) == ( num_layers, diff --git a/colossalai/legacy/inference/dynamic_batching/req_queue.py b/colossalai/legacy/inference/dynamic_batching/req_queue.py index 0de43bd1a21f..88aeeb3b01da 100644 --- a/colossalai/legacy/inference/dynamic_batching/req_queue.py +++ b/colossalai/legacy/inference/dynamic_batching/req_queue.py @@ -34,9 +34,9 @@ def _can_add_new_req(self, req): self.cache_len_list.append((req.input_len + 1, req.max_output_len - 1)) # hard to analysis self.cache_len_list.sort(key=lambda x: -x[1]) - left_out_len_array = np.array([e[1] for e in self.cache_len_list]) + left_out_len_array = np.asarray([e[1] for e in self.cache_len_list]) # assert left_out_len_array.min() >= 0 - has_run_len_array = np.array([e[0] for e in self.cache_len_list]) + has_run_len_array = np.asarray([e[0] for e in self.cache_len_list]) cum_run_len_array = np.cumsum(has_run_len_array) size_array = np.arange(1, len(self.cache_len_list) + 1, 1) diff --git a/colossalai/zero/gemini/chunk/search_utils.py b/colossalai/zero/gemini/chunk/search_utils.py index 24d8537bad90..ea87082afe6d 100644 --- a/colossalai/zero/gemini/chunk/search_utils.py +++ b/colossalai/zero/gemini/chunk/search_utils.py @@ -27,7 +27,7 @@ def _filter_exlarge_params(model: nn.Module, size_dict: Dict[int, List[int]]) -> if len(agg_size_list) == 0: return - params_size_arr = np.array(agg_size_list) + params_size_arr = np.asarray(agg_size_list) std = np.std(params_size_arr) mean = np.mean(params_size_arr) diff --git a/examples/images/diffusion/ldm/data/imagenet.py b/examples/images/diffusion/ldm/data/imagenet.py index 8483e16ab23a..8cc4ce82bce3 100644 --- a/examples/images/diffusion/ldm/data/imagenet.py +++ b/examples/images/diffusion/ldm/data/imagenet.py @@ -118,10 +118,10 @@ def _load(self): self.human_labels = [human_dict[s] for s in self.synsets] labels = { - "relpath": np.array(self.relpaths), - "synsets": np.array(self.synsets), - "class_label": np.array(self.class_labels), - "human_label": np.array(self.human_labels), + "relpath": np.asarray(self.relpaths), + "synsets": np.asarray(self.synsets), + "class_label": np.asarray(self.class_labels), + "human_label": np.asarray(self.human_labels), } if self.process_images: @@ -346,7 +346,7 @@ def __getitem__(self, i): if not image.mode == "RGB": image = image.convert("RGB") - image = np.array(image).astype(np.uint8) + image = np.asarray(image).astype(np.uint8) min_side_len = min(image.shape[:2]) crop_side_len = min_side_len * np.random.uniform(self.min_crop_f, self.max_crop_f, size=None) @@ -364,7 +364,7 @@ def __getitem__(self, i): if self.pil_interpolation: image_pil = PIL.Image.fromarray(image) LR_image = self.degradation_process(image_pil) - LR_image = np.array(LR_image).astype(np.uint8) + LR_image = np.asarray(LR_image).astype(np.uint8) else: LR_image = self.degradation_process(image=image)["image"] diff --git a/examples/images/diffusion/ldm/data/lsun.py b/examples/images/diffusion/ldm/data/lsun.py index e5c374aa2d51..d5a37397eec6 100644 --- a/examples/images/diffusion/ldm/data/lsun.py +++ b/examples/images/diffusion/ldm/data/lsun.py @@ -55,7 +55,7 @@ def __getitem__(self, i): # default to score-sde preprocessing - img = np.array(image).astype(np.uint8) # convert image to numpy array + img = np.asarray(image).astype(np.uint8) # convert image to numpy array crop = min(img.shape[0], img.shape[1]) # crop the image to a square shape ( h, @@ -73,7 +73,7 @@ def __getitem__(self, i): image = image.resize((self.size, self.size), resample=self.interpolation) image = self.flip(image) # flip the image horizontally with the given probability - image = np.array(image).astype(np.uint8) + image = np.asarray(image).astype(np.uint8) example["image"] = (image / 127.5 - 1.0).astype(np.float32) # normalize the image values and convert to float32 return example # return the example dictionary containing the image and its file paths diff --git a/examples/images/diffusion/ldm/modules/diffusionmodules/util.py b/examples/images/diffusion/ldm/modules/diffusionmodules/util.py index aed1b061323a..09c622fee84d 100644 --- a/examples/images/diffusion/ldm/modules/diffusionmodules/util.py +++ b/examples/images/diffusion/ldm/modules/diffusionmodules/util.py @@ -86,7 +86,7 @@ def betas_for_alpha_bar(num_diffusion_timesteps, alpha_bar, max_beta=0.999): t1 = i / num_diffusion_timesteps t2 = (i + 1) / num_diffusion_timesteps betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta)) - return np.array(betas) + return np.asarray(betas) def extract_into_tensor(a, t, x_shape): diff --git a/examples/images/diffusion/ldm/modules/image_degradation/bsrgan.py b/examples/images/diffusion/ldm/modules/image_degradation/bsrgan.py index 879b2aa099b6..75add939c15a 100644 --- a/examples/images/diffusion/ldm/modules/image_degradation/bsrgan.py +++ b/examples/images/diffusion/ldm/modules/image_degradation/bsrgan.py @@ -73,9 +73,9 @@ def anisotropic_Gaussian(ksize=15, theta=np.pi, l1=6, l2=6): k : kernel """ - v = np.dot(np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]), np.array([1.0, 0.0])) - V = np.array([[v[0], v[1]], [v[1], -v[0]]]) - D = np.array([[l1, 0], [0, l2]]) + v = np.dot(np.asarray([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]), np.asarray([1.0, 0.0])) + V = np.asarray([[v[0], v[1]], [v[1], -v[0]]]) + D = np.asarray([[l1, 0], [0, l2]]) Sigma = np.dot(np.dot(V, D), np.linalg.inv(V)) k = gm_blur_kernel(mean=[0, 0], cov=Sigma, size=ksize) @@ -141,7 +141,7 @@ def blur(x, k): return x -def gen_kernel(k_size=np.array([15, 15]), scale_factor=np.array([4, 4]), min_var=0.6, max_var=10.0, noise_level=0): +def gen_kernel(k_size=np.asarray([15, 15]), scale_factor=np.asarray([4, 4]), min_var=0.6, max_var=10.0, noise_level=0): """ " # modified version of https://github.com/assafshocher/BlindSR_dataset_generator # Kai Zhang @@ -156,7 +156,7 @@ def gen_kernel(k_size=np.array([15, 15]), scale_factor=np.array([4, 4]), min_var # Set COV matrix using Lambdas and Theta LAMBDA = np.diag([lambda_1, lambda_2]) - Q = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) + Q = np.asarray([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) SIGMA = Q @ LAMBDA @ Q.T INV_SIGMA = np.linalg.inv(SIGMA)[None, None, :, :] @@ -201,7 +201,7 @@ def fspecial_laplacian(alpha): h1 = alpha / (alpha + 1) h2 = (1 - alpha) / (alpha + 1) h = [[h1, h2, h1], [h2, -4 / (alpha + 1), h2], [h1, h2, h1]] - h = np.array(h) + h = np.asarray(h) return h diff --git a/examples/images/diffusion/ldm/modules/image_degradation/bsrgan_light.py b/examples/images/diffusion/ldm/modules/image_degradation/bsrgan_light.py index cf3f83f0c011..590912fc0a6c 100644 --- a/examples/images/diffusion/ldm/modules/image_degradation/bsrgan_light.py +++ b/examples/images/diffusion/ldm/modules/image_degradation/bsrgan_light.py @@ -73,9 +73,9 @@ def anisotropic_Gaussian(ksize=15, theta=np.pi, l1=6, l2=6): k : kernel """ - v = np.dot(np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]), np.array([1.0, 0.0])) - V = np.array([[v[0], v[1]], [v[1], -v[0]]]) - D = np.array([[l1, 0], [0, l2]]) + v = np.dot(np.asarray([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]), np.asarray([1.0, 0.0])) + V = np.asarray([[v[0], v[1]], [v[1], -v[0]]]) + D = np.asarray([[l1, 0], [0, l2]]) Sigma = np.dot(np.dot(V, D), np.linalg.inv(V)) k = gm_blur_kernel(mean=[0, 0], cov=Sigma, size=ksize) @@ -141,7 +141,7 @@ def blur(x, k): return x -def gen_kernel(k_size=np.array([15, 15]), scale_factor=np.array([4, 4]), min_var=0.6, max_var=10.0, noise_level=0): +def gen_kernel(k_size=np.asarray([15, 15]), scale_factor=np.asarray([4, 4]), min_var=0.6, max_var=10.0, noise_level=0): """ " # modified version of https://github.com/assafshocher/BlindSR_dataset_generator # Kai Zhang @@ -156,7 +156,7 @@ def gen_kernel(k_size=np.array([15, 15]), scale_factor=np.array([4, 4]), min_var # Set COV matrix using Lambdas and Theta LAMBDA = np.diag([lambda_1, lambda_2]) - Q = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) + Q = np.asarray([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) SIGMA = Q @ LAMBDA @ Q.T INV_SIGMA = np.linalg.inv(SIGMA)[None, None, :, :] @@ -201,7 +201,7 @@ def fspecial_laplacian(alpha): h1 = alpha / (alpha + 1) h2 = (1 - alpha) / (alpha + 1) h = [[h1, h2, h1], [h2, -4 / (alpha + 1), h2], [h1, h2, h1]] - h = np.array(h) + h = np.asarray(h) return h diff --git a/examples/images/diffusion/ldm/modules/image_degradation/utils_image.py b/examples/images/diffusion/ldm/modules/image_degradation/utils_image.py index 71fae1084b61..3ac27719a828 100644 --- a/examples/images/diffusion/ldm/modules/image_degradation/utils_image.py +++ b/examples/images/diffusion/ldm/modules/image_degradation/utils_image.py @@ -658,7 +658,7 @@ def calculate_ssim(img1, img2, border=0): ssims = [] for i in range(3): ssims.append(ssim(img1[:, :, i], img2[:, :, i])) - return np.array(ssims).mean() + return np.asarray(ssims).mean() elif img1.shape[2] == 1: return ssim(np.squeeze(img1), np.squeeze(img2)) else: diff --git a/examples/images/diffusion/ldm/util.py b/examples/images/diffusion/ldm/util.py index 9b52b199aa2c..2e5512fad8c6 100644 --- a/examples/images/diffusion/ldm/util.py +++ b/examples/images/diffusion/ldm/util.py @@ -24,7 +24,7 @@ def log_txt_as_img(wh, xc, size=10): except UnicodeEncodeError: print("Cant encode string for logging. Skipping.") - txt = np.array(txt).transpose(2, 0, 1) / 127.5 - 1.0 + txt = np.asarray(txt).transpose(2, 0, 1) / 127.5 - 1.0 txts.append(txt) txts = np.stack(txts) txts = torch.tensor(txts) diff --git a/examples/images/diffusion/scripts/img2img.py b/examples/images/diffusion/scripts/img2img.py index 4c386113dcc3..539189b8ee36 100644 --- a/examples/images/diffusion/scripts/img2img.py +++ b/examples/images/diffusion/scripts/img2img.py @@ -57,7 +57,7 @@ def load_img(path): print(f"loaded input image of size ({w}, {h}) from {path}") w, h = map(lambda x: x - x % 64, (w, h)) # resize to integer multiple of 64 image = image.resize((w, h), resample=PIL.Image.LANCZOS) - image = np.array(image).astype(np.float32) / 255.0 + image = np.asarray(image).astype(np.float32) / 255.0 image = image[None].transpose(0, 3, 1, 2) image = torch.from_numpy(image) return 2.0 * image - 1.0 diff --git a/examples/images/diffusion/scripts/inpaint.py b/examples/images/diffusion/scripts/inpaint.py index afffcf1685e6..7f6255f74ab7 100644 --- a/examples/images/diffusion/scripts/inpaint.py +++ b/examples/images/diffusion/scripts/inpaint.py @@ -12,12 +12,12 @@ def make_batch(image, mask, device): - image = np.array(Image.open(image).convert("RGB")) + image = np.asarray(Image.open(image).convert("RGB")) image = image.astype(np.float32) / 255.0 image = image[None].transpose(0, 3, 1, 2) image = torch.from_numpy(image) - mask = np.array(Image.open(mask).convert("L")) + mask = np.asarray(Image.open(mask).convert("L")) mask = mask.astype(np.float32) / 255.0 mask = mask[None, None] mask[mask < 0.5] = 0 diff --git a/examples/images/diffusion/scripts/txt2img.py b/examples/images/diffusion/scripts/txt2img.py index feb17b9f77ae..d9eecc7ab0a6 100644 --- a/examples/images/diffusion/scripts/txt2img.py +++ b/examples/images/diffusion/scripts/txt2img.py @@ -183,7 +183,7 @@ def parse_args(): def put_watermark(img, wm_encoder=None): if wm_encoder is not None: - img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) + img = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR) img = wm_encoder.encode(img, "dwtDct") img = Image.fromarray(img[:, :, ::-1]) return img diff --git a/examples/images/dreambooth/train_dreambooth_inpaint.py b/examples/images/dreambooth/train_dreambooth_inpaint.py index 32f1b4959879..8275dcdb122c 100644 --- a/examples/images/dreambooth/train_dreambooth_inpaint.py +++ b/examples/images/dreambooth/train_dreambooth_inpaint.py @@ -33,11 +33,11 @@ def prepare_mask_and_masked_image(image, mask): - image = np.array(image.convert("RGB")) + image = np.asarray(image.convert("RGB")) image = image[None].transpose(0, 3, 1, 2) image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0 - mask = np.array(mask.convert("L")) + mask = np.asarray(mask.convert("L")) mask = mask.astype(np.float32) / 255.0 mask = mask[None, None] mask[mask < 0.5] = 0 diff --git a/examples/tutorial/auto_parallel/bench_utils.py b/examples/tutorial/auto_parallel/bench_utils.py index 96cfd49c6787..ce422cd61e33 100644 --- a/examples/tutorial/auto_parallel/bench_utils.py +++ b/examples/tutorial/auto_parallel/bench_utils.py @@ -63,7 +63,7 @@ def bench_rotor( sample_points: int = 20, free_memory: int = torch.cuda.mem_get_info()[0], start_factor: int = 4, -) -> Tuple[np.array, list, list]: +) -> Tuple[np.asarray, list, list]: """Auto Checkpoint Rotor Algorithm benchmarking Benchmarks the Auto Checkpoint Rotor Algorithm for a given graph module and data. Args: @@ -76,7 +76,7 @@ def bench_rotor( start_factor (int, optional): Start memory budget factor for benchmark, the start memory budget will be free_memory / start_factor. Defaults to 4. Returns: - Tuple[np.array, list, list]: return budgets vector (MB), peak memory vector (MB), step time vector (MS). + Tuple[np.asarray, list, list]: return budgets vector (MB), peak memory vector (MB), step time vector (MS). """ peak_hist, step_hist = [], [] raw_graph = deepcopy(gm.graph) diff --git a/examples/tutorial/sequence_parallel/data/datasets/blendable_dataset.py b/examples/tutorial/sequence_parallel/data/datasets/blendable_dataset.py index 1fa9c85fce0a..b76bc28ff75e 100644 --- a/examples/tutorial/sequence_parallel/data/datasets/blendable_dataset.py +++ b/examples/tutorial/sequence_parallel/data/datasets/blendable_dataset.py @@ -32,7 +32,7 @@ def __init__(self, datasets, weights): self.size += len(dataset) # Normalize weights. - weights = np.array(weights, dtype=np.float64) + weights = np.asarray(weights, dtype=np.float64) sum_weights = np.sum(weights) assert sum_weights > 0.0 weights /= sum_weights diff --git a/examples/tutorial/sequence_parallel/data/datasets/dataset_utils.py b/examples/tutorial/sequence_parallel/data/datasets/dataset_utils.py index 3e197ff96c0c..38493eb5cbce 100644 --- a/examples/tutorial/sequence_parallel/data/datasets/dataset_utils.py +++ b/examples/tutorial/sequence_parallel/data/datasets/dataset_utils.py @@ -371,11 +371,11 @@ def pad_and_convert_to_numpy(tokens, tokentypes, masked_positions, masked_labels # Tokens and token types. filler = [pad_id] * padding_length - tokens_np = np.array(tokens + filler, dtype=np.int64) - tokentypes_np = np.array(tokentypes + filler, dtype=np.int64) + tokens_np = np.asarray(tokens + filler, dtype=np.int64) + tokentypes_np = np.asarray(tokentypes + filler, dtype=np.int64) # Padding mask. - padding_mask_np = np.array([1] * num_tokens + [0] * padding_length, dtype=np.int64) + padding_mask_np = np.asarray([1] * num_tokens + [0] * padding_length, dtype=np.int64) # Lables and loss mask. labels = [-1] * max_seq_length @@ -384,8 +384,8 @@ def pad_and_convert_to_numpy(tokens, tokentypes, masked_positions, masked_labels assert masked_positions[i] < num_tokens labels[masked_positions[i]] = masked_labels[i] loss_mask[masked_positions[i]] = 1 - labels_np = np.array(labels, dtype=np.int64) - loss_mask_np = np.array(loss_mask, dtype=np.int64) + labels_np = np.asarray(labels, dtype=np.int64) + loss_mask_np = np.asarray(loss_mask, dtype=np.int64) return tokens_np, tokentypes_np, labels_np, padding_mask_np, loss_mask_np diff --git a/examples/tutorial/sequence_parallel/data/datasets/ict_dataset.py b/examples/tutorial/sequence_parallel/data/datasets/ict_dataset.py index 220099f9ba32..914b0b18bcaf 100644 --- a/examples/tutorial/sequence_parallel/data/datasets/ict_dataset.py +++ b/examples/tutorial/sequence_parallel/data/datasets/ict_dataset.py @@ -174,4 +174,4 @@ def concat_and_pad_tokens(self, tokens, title=None): pad_mask = [1] * len(tokens) + [0] * num_pad tokens += [self.pad_id] * num_pad - return np.array(tokens), np.array(pad_mask) + return np.asarray(tokens), np.asarray(pad_mask) diff --git a/examples/tutorial/sequence_parallel/data/datasets/indexed_dataset.py b/examples/tutorial/sequence_parallel/data/datasets/indexed_dataset.py index 961a1650bd74..9a9e6e5ccf9b 100644 --- a/examples/tutorial/sequence_parallel/data/datasets/indexed_dataset.py +++ b/examples/tutorial/sequence_parallel/data/datasets/indexed_dataset.py @@ -84,7 +84,7 @@ def read_longs(f, n): def write_longs(f, a): - f.write(np.array(a, dtype=np.int64)) + f.write(np.asarray(a, dtype=np.int64)) dtypes = {1: np.uint8, 2: np.int8, 3: np.int16, 4: np.int32, 5: np.int64, 6: float, 7: np.double, 8: np.uint16} @@ -260,7 +260,7 @@ def __init__(self, out_file, dtype=np.int32): self.doc_idx = [0] def add_item(self, tensor): - bytes = self.out_file.write(np.array(tensor.numpy(), dtype=self.dtype)) + bytes = self.out_file.write(np.asarray(tensor.numpy(), dtype=self.dtype)) self.data_offsets.append(self.data_offsets[-1] + bytes / self.element_size) for s in tensor.size(): self.sizes.append(s) @@ -344,15 +344,15 @@ def write(self, sizes, doc_idx): self._file.write(struct.pack("