Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

It is recommended to use np.asarray instead of np.array to avoid cessary copies of the data #6015

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def _calculate_label_metrics(self, metric: str, category: str):
flag = False
logits = []
for i, sample in enumerate(self.data[category]["data"]):
if np.any(np.isnan(np.array(list(sample["logits_over_choices"].values())))):
if np.any(np.isnan(np.asarray(list(sample["logits_over_choices"].values())))):
if not flag:
print(
f"NaN in the logits, switch to exact match for category {category} in dataset {self.dataset_name} in model {self.model_name}."
Expand All @@ -81,10 +81,10 @@ def _calculate_label_metrics(self, metric: str, category: str):
)
logits.append(references[i] if score == 1 else -1)
else:
logits.append(np.argmax(np.array(list(sample["logits_over_choices"].values()))))
logits.append(np.argmax(np.asarray(list(sample["logits_over_choices"].values()))))

references = np.array(references)
logits = np.array(logits)
references = np.asarray(references)
logits = np.asarray(logits)
scores = np.sum(references == logits) / len(self.data[category]["data"]) * 100

self.evaluation_results[metric][category] = (scores, len(self.data[category]["data"]))
Expand All @@ -107,7 +107,7 @@ def _calculate_combined_metrics(self, metric: str, category: str):
flag = False
logits = []
for i, sample in enumerate(self.data[category]["data"]):
if np.any(np.isnan(np.array(list(sample["logits_over_choices"].values())))):
if np.any(np.isnan(np.asarray(list(sample["logits_over_choices"].values())))):
if not flag:
print(
f"NaN in the logits, switch to exact match for category {category} in dataset {self.dataset_name} in model {self.model_name}."
Expand All @@ -123,7 +123,7 @@ def _calculate_combined_metrics(self, metric: str, category: str):
)
logits.append(references[i] if score == 1 else -1)
else:
logits.append(np.argmax(np.array(list(sample["logits_over_choices"].values()))))
logits.append(np.argmax(np.asarray(list(sample["logits_over_choices"].values()))))

metric_method = eval("metric_helper." + metric)

Expand Down Expand Up @@ -194,21 +194,21 @@ def _calculate_loss_metrics(self, metric: str, category: str):
if metric == "perplexity":
weight = len(self.data[category]["data"]) / self.metric_total_length[metric]
losses = [min(sample["loss"]) for sample in self.data[category]["data"]]
perplexity = np.mean(np.exp(np.array(losses)))
perplexity = np.mean(np.exp(np.asarray(losses)))

self.evaluation_results["perplexity"][category] = (perplexity, len(self.data[category]["data"]))
self.evaluation_results["perplexity"]["ALL"] += perplexity * weight
elif metric == "ppl_score":
weight = len(self.data[category]["data"]) / self.metric_total_length[metric]
losses = [min(sample["loss"]) for sample in self.data[category]["data"]]
perplexity_score = np.mean(np.exp(-np.array(losses))) * 100
perplexity_score = np.mean(np.exp(-np.asarray(losses))) * 100

self.evaluation_results["ppl_score"][category] = (perplexity_score, len(self.data[category]["data"]))
self.evaluation_results["ppl_score"]["ALL"] += perplexity_score * weight
elif metric == "ppl_score_over_choices" and self.data[category]["inference_kwargs"]["all_classes"] is not None:
weight = len(self.data[category]["data"]) / self.metric_total_length[metric]
loss_over_choices = [sample["loss_over_choices"] for sample in self.data[category]["data"]]
perplexity_score_over_choices = np.mean(np.exp(-np.array(loss_over_choices))) * 100
perplexity_score_over_choices = np.mean(np.exp(-np.asarray(loss_over_choices))) * 100

self.evaluation_results["ppl_score_over_choices"][category] = (
perplexity_score_over_choices,
Expand All @@ -218,29 +218,29 @@ def _calculate_loss_metrics(self, metric: str, category: str):
elif metric == "per_byte_perplexity":
weight = len(self.data[category]["data"]) / self.metric_total_length[metric]
losses = [min(sample["loss_sum"]) for sample in self.data[category]["data"]]
perplexity = np.mean(np.exp(np.array(losses) / np.array(self.N_bytes[category])))
perplexity = np.mean(np.exp(np.asarray(losses) / np.asarray(self.N_bytes[category])))

self.evaluation_results["per_byte_perplexity"][category] = perplexity
self.evaluation_results["per_byte_perplexity"]["ALL"] += perplexity * weight
elif metric == "per_byte_ppl_score":
weight = len(self.data[category]["data"]) / self.metric_total_length[metric]
losses = [min(sample["loss_sum"]) for sample in self.data[category]["data"]]
perplexity_score = np.mean(np.exp(-np.array(losses) / np.array(self.N_bytes[category]))) * 100
perplexity_score = np.mean(np.exp(-np.asarray(losses) / np.asarray(self.N_bytes[category]))) * 100

self.evaluation_results["per_byte_ppl_score"][category] = perplexity_score
self.evaluation_results["per_byte_ppl_score"]["ALL"] += perplexity_score * weight
elif metric == "loss_over_all_tokens":
weight = len(self.data[category]["data"]) / self.metric_total_length[metric]
losses = [min(sample["loss_sum"]) for sample in self.data[category]["data"]]
token_nums = [sample["token_num"][np.argmin(sample["loss_sum"])] for sample in self.data[category]["data"]]
perplexity = np.sum(np.array(losses)) / np.sum(np.array(token_nums))
perplexity = np.sum(np.asarray(losses)) / np.sum(np.asarray(token_nums))

self.evaluation_results["loss_over_all_tokens"][category] = perplexity
self.evaluation_results["loss_over_all_tokens"]["ALL"] += perplexity * weight

# The number of tokens can be used for normalizing.
# See https://github.com/SkyworkAI/Skywork/issues/43#issuecomment-1811733834
print(f"{self.model_name} {category} token num: {np.sum(np.array(token_nums))}")
print(f"{self.model_name} {category} token num: {np.sum(np.asarray(token_nums))}")

def _evaluate(self):
"""Calculate and return evaluation results"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ def inference(self, data_loader: DataLoader, inference_kwargs: Dict[str, Any], d
batch[j]["loss_over_choices"] = loss_over_choices[j]

if calculate_loss:
batch[j]["loss"] = (np.array(batch_losses[j]) / np.array(batch_target_token_nums[j])).tolist()
batch[j]["loss"] = (np.asarray(batch_losses[j]) / np.asarray(batch_target_token_nums[j])).tolist()

# loss_sum is specially used for pertrain dataset for calculating per-byte-perplexity.
# However, loss (which is per sample loss) suffices for most cases.
Expand Down
12 changes: 6 additions & 6 deletions colossalai/auto_parallel/tensor_shard/solver/solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def _prepare_data_for_solver(self):
strategies_len = []
for node in self.nodes:
strategies_len.append(self.cost_graph.node_lens[node])
strategies_len = np.array(strategies_len)
strategies_len = np.asarray(strategies_len)

# prepare following_nodes
following_nodes = self.cost_graph.following_dict
Expand Down Expand Up @@ -137,8 +137,8 @@ def _prepare_data_for_solver(self):
for i in range(strategies_len[src_node_index]):
for j in range(strategies_len[dst_node_index]):
resharding_costs.append(edge_cost[(i, j)])
edge_pairs = np.array(edge_pairs)
resharding_costs = np.array(resharding_costs)
edge_pairs = np.asarray(edge_pairs)
resharding_costs = np.asarray(resharding_costs)

# prepare liveness_set
liveness_set = self.liveness_list
Expand Down Expand Up @@ -184,9 +184,9 @@ def _prepare_data_for_solver(self):
communication_costs.append(origin_communication_cost)
memory_costs.append(memory_cost)

compute_costs = np.array(compute_costs)
communication_costs = np.array(communication_costs)
memory_costs = np.array(memory_costs)
compute_costs = np.asarray(compute_costs)
communication_costs = np.asarray(communication_costs)
memory_costs = np.asarray(memory_costs)

# omit initial value for nodes
s_init_np = None
Expand Down
6 changes: 3 additions & 3 deletions colossalai/device/calc_pipeline_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def alpa_dp_impl(
for k in range(num_layers - 1, -1, -1):
for d in range(1, num_devices + 1):
for m, submesh in enumerate(submesh_choices):
n_submesh_devices = np.prod(np.array(submesh))
n_submesh_devices = np.prod(np.asarray(submesh))
if n_submesh_devices <= d:
# TODO: [luzgh]: Why alpa needs max_n_succ_stages? Delete.
# if s - 1 <= max_n_succ_stages[i, k - 1, m, n_config]:
Expand Down Expand Up @@ -83,7 +83,7 @@ def alpa_dp_impl(
res.append(((current_layer, next_start_layer), submesh_choice, autosharding_choice))
current_s -= 1
current_layer = next_start_layer
current_devices -= np.prod(np.array(submesh_choices[submesh_choice]))
current_devices -= np.prod(np.asarray(submesh_choices[submesh_choice]))
assert current_s == 0 and current_layer == num_layers and current_devices == 0

return total_cost, res
Expand All @@ -98,7 +98,7 @@ def alpa_dp(
Arguments:
submesh_choices: List[(int,int)]
num_autosharding_configs: Max number of t_intra(start_layer, end_layer, LogicalMesh)
compute_cost: np.array(num_layers,num_layers,num_submesh_choices,num_autosharding_configs)
compute_cost: np.asarray(num_layers,num_layers,num_submesh_choices,num_autosharding_configs)
"""
assert np.shape(compute_cost) == (
num_layers,
Expand Down
4 changes: 2 additions & 2 deletions colossalai/legacy/inference/dynamic_batching/req_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ def _can_add_new_req(self, req):
self.cache_len_list.append((req.input_len + 1, req.max_output_len - 1)) # hard to analysis
self.cache_len_list.sort(key=lambda x: -x[1])

left_out_len_array = np.array([e[1] for e in self.cache_len_list])
left_out_len_array = np.asarray([e[1] for e in self.cache_len_list])
# assert left_out_len_array.min() >= 0
has_run_len_array = np.array([e[0] for e in self.cache_len_list])
has_run_len_array = np.asarray([e[0] for e in self.cache_len_list])
cum_run_len_array = np.cumsum(has_run_len_array)
size_array = np.arange(1, len(self.cache_len_list) + 1, 1)

Expand Down
2 changes: 1 addition & 1 deletion colossalai/zero/gemini/chunk/search_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def _filter_exlarge_params(model: nn.Module, size_dict: Dict[int, List[int]]) ->
if len(agg_size_list) == 0:
return

params_size_arr = np.array(agg_size_list)
params_size_arr = np.asarray(agg_size_list)

std = np.std(params_size_arr)
mean = np.mean(params_size_arr)
Expand Down
12 changes: 6 additions & 6 deletions examples/images/diffusion/ldm/data/imagenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,10 @@ def _load(self):
self.human_labels = [human_dict[s] for s in self.synsets]

labels = {
"relpath": np.array(self.relpaths),
"synsets": np.array(self.synsets),
"class_label": np.array(self.class_labels),
"human_label": np.array(self.human_labels),
"relpath": np.asarray(self.relpaths),
"synsets": np.asarray(self.synsets),
"class_label": np.asarray(self.class_labels),
"human_label": np.asarray(self.human_labels),
}

if self.process_images:
Expand Down Expand Up @@ -346,7 +346,7 @@ def __getitem__(self, i):
if not image.mode == "RGB":
image = image.convert("RGB")

image = np.array(image).astype(np.uint8)
image = np.asarray(image).astype(np.uint8)

min_side_len = min(image.shape[:2])
crop_side_len = min_side_len * np.random.uniform(self.min_crop_f, self.max_crop_f, size=None)
Expand All @@ -364,7 +364,7 @@ def __getitem__(self, i):
if self.pil_interpolation:
image_pil = PIL.Image.fromarray(image)
LR_image = self.degradation_process(image_pil)
LR_image = np.array(LR_image).astype(np.uint8)
LR_image = np.asarray(LR_image).astype(np.uint8)

else:
LR_image = self.degradation_process(image=image)["image"]
Expand Down
4 changes: 2 additions & 2 deletions examples/images/diffusion/ldm/data/lsun.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __getitem__(self, i):

# default to score-sde preprocessing

img = np.array(image).astype(np.uint8) # convert image to numpy array
img = np.asarray(image).astype(np.uint8) # convert image to numpy array
crop = min(img.shape[0], img.shape[1]) # crop the image to a square shape
(
h,
Expand All @@ -73,7 +73,7 @@ def __getitem__(self, i):
image = image.resize((self.size, self.size), resample=self.interpolation)

image = self.flip(image) # flip the image horizontally with the given probability
image = np.array(image).astype(np.uint8)
image = np.asarray(image).astype(np.uint8)
example["image"] = (image / 127.5 - 1.0).astype(np.float32) # normalize the image values and convert to float32
return example # return the example dictionary containing the image and its file paths

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def betas_for_alpha_bar(num_diffusion_timesteps, alpha_bar, max_beta=0.999):
t1 = i / num_diffusion_timesteps
t2 = (i + 1) / num_diffusion_timesteps
betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
return np.array(betas)
return np.asarray(betas)


def extract_into_tensor(a, t, x_shape):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ def anisotropic_Gaussian(ksize=15, theta=np.pi, l1=6, l2=6):
k : kernel
"""

v = np.dot(np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]), np.array([1.0, 0.0]))
V = np.array([[v[0], v[1]], [v[1], -v[0]]])
D = np.array([[l1, 0], [0, l2]])
v = np.dot(np.asarray([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]), np.asarray([1.0, 0.0]))
V = np.asarray([[v[0], v[1]], [v[1], -v[0]]])
D = np.asarray([[l1, 0], [0, l2]])
Sigma = np.dot(np.dot(V, D), np.linalg.inv(V))
k = gm_blur_kernel(mean=[0, 0], cov=Sigma, size=ksize)

Expand Down Expand Up @@ -141,7 +141,7 @@ def blur(x, k):
return x


def gen_kernel(k_size=np.array([15, 15]), scale_factor=np.array([4, 4]), min_var=0.6, max_var=10.0, noise_level=0):
def gen_kernel(k_size=np.asarray([15, 15]), scale_factor=np.asarray([4, 4]), min_var=0.6, max_var=10.0, noise_level=0):
""" "
# modified version of https://github.com/assafshocher/BlindSR_dataset_generator
# Kai Zhang
Expand All @@ -156,7 +156,7 @@ def gen_kernel(k_size=np.array([15, 15]), scale_factor=np.array([4, 4]), min_var

# Set COV matrix using Lambdas and Theta
LAMBDA = np.diag([lambda_1, lambda_2])
Q = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
Q = np.asarray([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
SIGMA = Q @ LAMBDA @ Q.T
INV_SIGMA = np.linalg.inv(SIGMA)[None, None, :, :]

Expand Down Expand Up @@ -201,7 +201,7 @@ def fspecial_laplacian(alpha):
h1 = alpha / (alpha + 1)
h2 = (1 - alpha) / (alpha + 1)
h = [[h1, h2, h1], [h2, -4 / (alpha + 1), h2], [h1, h2, h1]]
h = np.array(h)
h = np.asarray(h)
return h


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ def anisotropic_Gaussian(ksize=15, theta=np.pi, l1=6, l2=6):
k : kernel
"""

v = np.dot(np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]), np.array([1.0, 0.0]))
V = np.array([[v[0], v[1]], [v[1], -v[0]]])
D = np.array([[l1, 0], [0, l2]])
v = np.dot(np.asarray([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]), np.asarray([1.0, 0.0]))
V = np.asarray([[v[0], v[1]], [v[1], -v[0]]])
D = np.asarray([[l1, 0], [0, l2]])
Sigma = np.dot(np.dot(V, D), np.linalg.inv(V))
k = gm_blur_kernel(mean=[0, 0], cov=Sigma, size=ksize)

Expand Down Expand Up @@ -141,7 +141,7 @@ def blur(x, k):
return x


def gen_kernel(k_size=np.array([15, 15]), scale_factor=np.array([4, 4]), min_var=0.6, max_var=10.0, noise_level=0):
def gen_kernel(k_size=np.asarray([15, 15]), scale_factor=np.asarray([4, 4]), min_var=0.6, max_var=10.0, noise_level=0):
""" "
# modified version of https://github.com/assafshocher/BlindSR_dataset_generator
# Kai Zhang
Expand All @@ -156,7 +156,7 @@ def gen_kernel(k_size=np.array([15, 15]), scale_factor=np.array([4, 4]), min_var

# Set COV matrix using Lambdas and Theta
LAMBDA = np.diag([lambda_1, lambda_2])
Q = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
Q = np.asarray([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
SIGMA = Q @ LAMBDA @ Q.T
INV_SIGMA = np.linalg.inv(SIGMA)[None, None, :, :]

Expand Down Expand Up @@ -201,7 +201,7 @@ def fspecial_laplacian(alpha):
h1 = alpha / (alpha + 1)
h2 = (1 - alpha) / (alpha + 1)
h = [[h1, h2, h1], [h2, -4 / (alpha + 1), h2], [h1, h2, h1]]
h = np.array(h)
h = np.asarray(h)
return h


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,7 @@ def calculate_ssim(img1, img2, border=0):
ssims = []
for i in range(3):
ssims.append(ssim(img1[:, :, i], img2[:, :, i]))
return np.array(ssims).mean()
return np.asarray(ssims).mean()
elif img1.shape[2] == 1:
return ssim(np.squeeze(img1), np.squeeze(img2))
else:
Expand Down
2 changes: 1 addition & 1 deletion examples/images/diffusion/ldm/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def log_txt_as_img(wh, xc, size=10):
except UnicodeEncodeError:
print("Cant encode string for logging. Skipping.")

txt = np.array(txt).transpose(2, 0, 1) / 127.5 - 1.0
txt = np.asarray(txt).transpose(2, 0, 1) / 127.5 - 1.0
txts.append(txt)
txts = np.stack(txts)
txts = torch.tensor(txts)
Expand Down
2 changes: 1 addition & 1 deletion examples/images/diffusion/scripts/img2img.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def load_img(path):
print(f"loaded input image of size ({w}, {h}) from {path}")
w, h = map(lambda x: x - x % 64, (w, h)) # resize to integer multiple of 64
image = image.resize((w, h), resample=PIL.Image.LANCZOS)
image = np.array(image).astype(np.float32) / 255.0
image = np.asarray(image).astype(np.float32) / 255.0
image = image[None].transpose(0, 3, 1, 2)
image = torch.from_numpy(image)
return 2.0 * image - 1.0
Expand Down
4 changes: 2 additions & 2 deletions examples/images/diffusion/scripts/inpaint.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@


def make_batch(image, mask, device):
image = np.array(Image.open(image).convert("RGB"))
image = np.asarray(Image.open(image).convert("RGB"))
image = image.astype(np.float32) / 255.0
image = image[None].transpose(0, 3, 1, 2)
image = torch.from_numpy(image)

mask = np.array(Image.open(mask).convert("L"))
mask = np.asarray(Image.open(mask).convert("L"))
mask = mask.astype(np.float32) / 255.0
mask = mask[None, None]
mask[mask < 0.5] = 0
Expand Down
2 changes: 1 addition & 1 deletion examples/images/diffusion/scripts/txt2img.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def parse_args():

def put_watermark(img, wm_encoder=None):
if wm_encoder is not None:
img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
img = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR)
img = wm_encoder.encode(img, "dwtDct")
img = Image.fromarray(img[:, :, ::-1])
return img
Expand Down
4 changes: 2 additions & 2 deletions examples/images/dreambooth/train_dreambooth_inpaint.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@


def prepare_mask_and_masked_image(image, mask):
image = np.array(image.convert("RGB"))
image = np.asarray(image.convert("RGB"))
image = image[None].transpose(0, 3, 1, 2)
image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0

mask = np.array(mask.convert("L"))
mask = np.asarray(mask.convert("L"))
mask = mask.astype(np.float32) / 255.0
mask = mask[None, None]
mask[mask < 0.5] = 0
Expand Down
Loading