-
Notifications
You must be signed in to change notification settings - Fork 20
/
create_caption.py
143 lines (116 loc) · 6.2 KB
/
create_caption.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
@File : create_celeba_caption_v4.py
@Time : 2023/03/24 20:46:59
@Author : Weihao Xia (xiawh3@outlook.com)
@Version : 4.0
@Desc : This script can generate image captions given attribute annotations.
Please notice that the text part of the released multi-modal-celeba-hq dataset is not based on this scripts.
This version of the script uses a pcfg-like idea and offers improved readability and extensibility compared to the original.
You can alternatively use cntk.pcfg package for sentence generation.
'''
import os
import pandas as pd
from random import randint, choice, shuffle, sample
ALL_ATTRIBUTE = False # True
NUM_CAPTION = 10 if ALL_ATTRIBUTE is False else 1
# Create a dictionary with attributes and their values
ATTRIBUTES = {
'IsAttributes': ['Attractive', 'Bald', 'Chubby', 'Young', 'Smiling'],
'HasAttributes': ['Eyeglasses', 'Arched_Eyebrows', 'Bags_Under_Eyes', 'Bangs', 'Big_Lips', 'Big_Nose', 'Black_Hair',
'Blond_Hair', 'Brown_Hair', 'Bushy_Eyebrows', 'Double_Chin', 'Goatee', 'Gray_Hair', 'Straight_Hair',
'Sideburns', 'Rosy_Cheeks', 'Receding_Hairline', 'Pointy_Nose', 'Pale_Skin', 'Oval_Face', 'Narrow_Eyes',
'Mustache', 'Mouth_Slightly_Open', 'High_Cheekbones', 'Wavy_Hair'],
'WearAttributes': ['Wearing_Necktie', 'Wearing_Necklace', 'Wearing_Lipstick', 'Wearing_Hat', 'Wearing_Earrings',
'Heavy_Makeup'],
}
gender = {
'female': ['She', 'This woman', 'The woman', 'The person', 'This person'], # in the picture, in the image, the entire face of
'male': ['He', 'This man', 'The man', 'The person', 'This person']
}
IsVerb = [' is ', ' looks ', ' appears to be ']
WearVerb =[' wears ', ' is wearing ']
HaveVerb = [' has ', ' is with ']
def get_subject(img_attribute):
'''
This function gives options of subject for a given image.
'''
if img_attribute['Male'] == str(-1):
return gender['female']
else:
return gender['male']
def get_feature(img_attribute):
'''
This function gives three categories attributes for a given image. The output for image 29999.jpg is shown below.
feature = {'IsAttributes': ['Attractive', 'Smiling'],
'HasAttributes': ['Bangs', 'Brown_Hair', 'Rosy_Cheeks', 'Mouth_Slightly_Open', 'High_Cheekbones', 'Wavy_Hair'],
'WearAttributes': ['Wearing_Lipstick', 'Wearing_Earrings', 'Heavy_Makeup']}
'''
feature = {}
for attribute, values in ATTRIBUTES.items():
feature[attribute] = [value for value in values if img_attribute[value] == str(1)]
return feature
def get_caption(img_attribute, num_caption):
'''
This function gives a certain numbers of captions for every images in the dataset.
'''
subject = get_subject(img_attribute)
feature = get_feature(img_attribute)
captions = []
# randomly select number of captions to generate
for _ in range(num_caption):
if ALL_ATTRIBUTE:
IsAttributes = ', '.join(feature['IsAttributes']).lower()[::]
HasAttributes = ', '.join(feature['HasAttributes']).replace('_', ' ').lower()[::]
WearAttributes = ', '.join(feature['WearAttributes']).replace('Wearing_', '').replace('_', ' ').lower()[::]
caption = f'The person in the picture is {IsAttributes}. {choice(subject)}{choice(HaveVerb)}{HasAttributes}. {choice(subject)}{choice(WearVerb)}{WearAttributes}.'
captions.append(caption)
else:
# get number of attributes in each category
len_i = len(feature['IsAttributes'])
len_h = len(feature['HasAttributes'])
len_w = len(feature['WearAttributes'])
# randomly select number of attributes to use for each category
c_i = randint(1, len_i) if len_i > 1 else len_i
c_h = randint(1, len_h) if len_h > 1 else len_h
c_w = randint(1, len_w) if len_w > 1 else len_w
# randomly select attributes from each category (should suffle the list first)
# cannot handle the case where original/sampled attribute list is empty
IsAttributes = ', '.join(sample(feature['IsAttributes'], c_i)).lower()[::]
HasAttributes = ', '.join(sample(feature['HasAttributes'], c_h)).replace('_', ' ').lower()[::]
WearAttributes = ', '.join(sample(feature['WearAttributes'], c_w)).replace('Wearing_', '').replace('_', ' ').lower()[::]
# randomly select verbs for each category
SelectWearAttributes = f'{choice(WearVerb)}{WearAttributes}'
SelectHasAttributes = f'{choice(HaveVerb)}{HasAttributes}'
SelectIsAttributes = f'{choice(subject)}{choice(IsVerb)}{IsAttributes}'
# define caption formats here
caption_format = {
'1': f'{SelectIsAttributes}. {choice(subject)}{SelectHasAttributes} and{SelectWearAttributes}.',
'2': f'{SelectIsAttributes} and{SelectHasAttributes}. {choice(subject)}{SelectWearAttributes}.',
'3': f'This is a {IsAttributes} person. {choice(subject)}{SelectHasAttributes}. {choice(subject)}{SelectWearAttributes}.',
}
# randomly select one of the caption format
random_format = choice(list(caption_format.keys()))
caption = caption_format[random_format]
captions.append(caption)
return captions
if __name__ == "__main__":
anno_path = 'celeba-hq-attribute.txt'
save_path = 'celeba_caption'
if not os.path.exists(save_path):
os.makedirs(save_path)
with open(anno_path, 'r') as f:
lines = f.readlines()
num_images = int(lines[0])
attributes = lines[1].split()
# Store the attributes for each image in a dictionary
image_attributes = {}
for i in range(num_images):
image_id, *attr_values = lines[i+2].split()
image_attributes[image_id] = dict(zip(attributes, attr_values))
for num in range (0, num_images):
captions = get_caption(image_attributes['{}.jpg'.format(num)], NUM_CAPTION)
with open('{}/{}.txt'.format(save_path, str(num)), "w") as f:
f.write("\n".join(captions))
print ('all done!')