Skip to content
This repository has been archived by the owner on Mar 23, 2023. It is now read-only.

[hotfix]fit to refactored dataloader api, and remove tensor shape in config file. #149

Merged
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,3 @@

# pipeline config
NUM_MICRO_BATCHES = parallel['pipeline']
TENSOR_SHAPE = (BATCH_SIZE // NUM_MICRO_BATCHES, SEQ_LENGTH, HIDDEN_SIZE)
5 changes: 2 additions & 3 deletions image/vision_transformer/hybrid_parallel/train_with_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ def train_imagenet():

# create dataloaders
root = os.environ['DATA']
train_dataloader, test_dataloader = build_dali_imagenet(root, rand_augment=False)

train_dataloader, test_dataloader = build_dali_imagenet(root, train_batch_size=gpc.config.BATCH_SIZE, \
test_batch_size=gpc.config.BATCH_SIZE)
# create loss function
criterion = CrossEntropyLoss(label_smoothing=0.1)

Expand All @@ -92,7 +92,6 @@ def train_imagenet():
test_dataloader=test_dataloader)

logger.info("Engine is built", ranks=[0])

# create schedule
schedule = None
tensor_shape = getattr(gpc.config, 'TENSOR_SHAPE', None)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ def train_imagenet():

# create dataloader
root = os.environ['DATA']
train_dataloader, test_dataloader = build_dali_imagenet(root, rand_augment=False)
train_dataloader, test_dataloader = build_dali_imagenet(root, train_batch_size=gpc.config.BATCH_SIZE, \
test_batch_size=gpc.config.BATCH_SIZE)

# create loss function
criterion = CrossEntropyLoss(label_smoothing=0.1)
Expand Down
13 changes: 3 additions & 10 deletions language/gpt/gpt2_configs/gpt2_pp1d.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from colossalai.amp import AMP_TYPE
import torch


BATCH_SIZE = 8
NUM_EPOCHS = 60
SEQ_LEN = 1024
Expand All @@ -14,17 +13,11 @@
HIDDEN_SIZE = 768
PIPELINE = 2
TENSOR_PARALLEL = 2
MODE = '1d'
TENSOR_SHAPE = (BATCH_SIZE // NUM_MICRO_BATCHES, SEQ_LEN, HIDDEN_SIZE)
MODE = '1d'

fp16 = dict(
mode=AMP_TYPE.NAIVE
)
fp16 = dict(mode=AMP_TYPE.NAIVE)

parallel = dict(
pipeline=PIPELINE,
tensor=dict(mode=MODE, size=TENSOR_PARALLEL)
)
parallel = dict(pipeline=PIPELINE, tensor=dict(mode=MODE, size=TENSOR_PARALLEL))

optimizer = dict(
type=Adam,
Expand Down
3 changes: 1 addition & 2 deletions language/gpt/train_gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,7 @@ def mask_function(attention_mask=None):
test_interval=1,
hooks=hook_list,
display_progress=True,
return_output_label=False,
max_steps=5)
return_output_label=False)


if __name__ == '__main__':
Expand Down