Skip to content

Commit

Permalink
Added Mood & Enhanced Speech Classifications
Browse files Browse the repository at this point in the history
  • Loading branch information
hoangsonww committed Mar 30, 2024
1 parent 3bd1ca5 commit fb53090
Show file tree
Hide file tree
Showing 12 changed files with 189 additions and 33 deletions.
Empty file added .gitignore
Empty file.
5 changes: 5 additions & 0 deletions Animals-Classification/animal_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
from PIL import Image, ImageDraw, ImageFont
import numpy as np


def load_model():
return MobileNetV2(weights='imagenet')


def classify_image(model, image):
resized_image = image.resize((224, 224))
image_array = img_to_array(resized_image)
Expand All @@ -19,6 +21,7 @@ def classify_image(model, image):

return decoded_predictions


def annotate_image(image, predictions):
draw = ImageDraw.Draw(image)
font = ImageFont.load_default()
Expand All @@ -31,6 +34,7 @@ def annotate_image(image, predictions):

return image


def process_input(source, model):
if source == 'webcam':
cap = cv2.VideoCapture(0)
Expand Down Expand Up @@ -58,6 +62,7 @@ def process_input(source, model):
cap.release()
cv2.destroyAllWindows()


if __name__ == "__main__":
print("You may see some errors due to font issues. It is totally OK and can be ignored.")

Expand Down
9 changes: 7 additions & 2 deletions Flowers-Classification/flower_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
from PIL import Image, ImageDraw, ImageFont
import numpy as np


def load_model():
return MobileNetV2(weights='imagenet')


def classify_image(model, image):
image_resized = image.resize((224, 224))
image_array = img_to_array(image_resized)
Expand All @@ -19,10 +21,11 @@ def classify_image(model, image):

return decoded_predictions


def annotate_image(image, predictions):
draw = ImageDraw.Draw(image)
try:
font = ImageFont.truetype("arial.ttf", 20) # Adjust font size as needed
font = ImageFont.truetype("arial.ttf", 20)
except IOError:
print("Arial font not found, using default font.")
font = ImageFont.load_default()
Expand All @@ -32,7 +35,7 @@ def annotate_image(image, predictions):
for i, (id, label, prob) in enumerate(predictions):
text = f"{label} ({prob * 100:.2f}%)"
draw.text((10, text_y), text, fill="red", font=font)
# Workaround for text size

if hasattr(font, 'getsize'):
text_size = font.getsize(text)
else:
Expand All @@ -41,6 +44,7 @@ def annotate_image(image, predictions):

return image


def process_input(source, model):
if source == 'webcam':
cap = cv2.VideoCapture(0)
Expand Down Expand Up @@ -70,6 +74,7 @@ def process_input(source, model):
cap.release()
cv2.destroyAllWindows()


if __name__ == "__main__":
print("You may see some errors due to font issues. It is totally OK and can be ignored.")

Expand Down
8 changes: 5 additions & 3 deletions Human-Face-Classification/faces-classification.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import cv2
import numpy as np


def load_models():
face_model_path = 'res10_300x300_ssd_iter_140000.caffemodel'
face_proto_path = 'deploy.prototxt.txt'
Expand All @@ -15,6 +16,7 @@ def load_models():

return face_net, age_net, gender_net


def predict_age_and_gender(face, age_net, gender_net):
blob = cv2.dnn.blobFromImage(face, 1.0, (227, 227), (78.4263377603, 87.7689143744, 114.895847746), swapRB=False)
gender_net.setInput(blob)
Expand All @@ -27,9 +29,10 @@ def predict_age_and_gender(face, age_net, gender_net):

return age, gender


def annotate_video(video_source, face_net, age_net, gender_net, use_webcam=False):
if use_webcam:
cap = cv2.VideoCapture(0) # 0 is usually the default camera
cap = cv2.VideoCapture(0)
else:
cap = cv2.VideoCapture(video_source)

Expand Down Expand Up @@ -64,7 +67,7 @@ def annotate_video(video_source, face_net, age_net, gender_net, use_webcam=False
cv2.imshow("Frame", frame)

key = cv2.waitKey(1)
if key & 0xFF in [ord('q'), 27]: # 27 is the ESC key
if key & 0xFF in [ord('q'), 27]:
break
elif key & 0xFF == ord(' '):
paused = not paused
Expand Down Expand Up @@ -122,4 +125,3 @@ def main():

if __name__ == "__main__":
main()

21 changes: 21 additions & 0 deletions Mood-Classification/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2023 Son Nguyen Hoang

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Binary file added Mood-Classification/angry.mp4
Binary file not shown.
Binary file added Mood-Classification/mood-classi.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
68 changes: 68 additions & 0 deletions Mood-Classification/mood_classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import cv2
from deepface import DeepFace


def analyze_and_show(image, show_results=True):
try:
analysis = DeepFace.analyze(image, actions=['emotion'], enforce_detection=False)

text_y = 20

for result in analysis:
if 'region' in result:
region = result['region']
x, y, w, h = region['x'], region['y'], region['w'], region['h']
cv2.rectangle(image, (x, y), (x + w, y + h), (255, 0, 0), 2)

if 'emotion' in result:
emotions = result['emotion']
dominant_emotion = result['dominant_emotion']

if show_results:
print(f"Dominant emotion: {dominant_emotion}")
print("See more detailed stats on the popup window.")

for emotion, prob in emotions.items():
cv2.putText(image, f"{emotion}: {prob:.2f}%", (10, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
(0, 255, 0), 2)
text_y += 20

cv2.imshow('Mood Detector', image)

except Exception as e:
print(f"Error in emotion detection: {e}")


def process_input(source):
cap = cv2.VideoCapture(0 if source == 'webcam' else source)

while True:
ret, frame = cap.read()
if not ret:
break

show_results = True
analyze_and_show(frame, show_results)

if cv2.waitKey(1) & 0xFF in [27, ord('q')]:
break

if cv2.getWindowProperty('Mood Detector', cv2.WND_PROP_VISIBLE) < 1:
break

cap.release()
cv2.destroyAllWindows()


if __name__ == "__main__":
choice = input("Enter 'image', 'video', or 'webcam': ").lower()

if choice == 'image':
image_path = input("Enter the image path: ")
image = cv2.imread(image_path)
analyze_and_show(image)
cv2.waitKey(0)
cv2.destroyAllWindows()
elif choice in ['video', 'webcam']:
source = 'webcam' if choice == 'webcam' else input("Enter the video path: ")
process_input(source)
Binary file added Mood-Classification/surprised.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
94 changes: 68 additions & 26 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# AI Classifiers
# AI Multitask Classifiers: From Objects to Emotions

Created by [Son Nguyen](https://github.com/hoangsonww) in 2024, this repository contains Python scripts for vehicle classification and object classification using pre-trained deep learning models. The vehicle classification logic uses the YOLOv3 model for vehicle detection and classification, while the object classification logic uses a pre-trained model for object classification. These scripts can be used to classify vehicles in videos and objects in images, respectively.
Created by [Son Nguyen](https://github.com/hoangsonww) in 2024, this repository contains Python scripts for various AI-powered classifiers. These classifiers can be used for object detection, face detection, character recognition, and more. The classifiers are built using popular deep learning frameworks such as `OpenCV`, `TensorFlow`, and `PyTorch`.

This repository contains six sub-directories: one for vehicle classification logic, one for human face classification logic, one for flower classification logic, one for object classification logic, one for character classification logic, and one for animal classification logic, namely `Vehicle-Classification`, `Human-Face-Classification`, `Flowers-Classification`, `Object-Classification`, `Character-Recognition`, and `Animal-Classification`. Refer to the information below for details on each classifier.
This repository contains 8 subdirectories: one for vehicle classification, one for human face classification, one for mood classification, one for flower classification , one for object classification, one for character classification, one for animal classification, and one for speech recognition, namely `Vehicle-Classification`, `Human-Face-Classification`, `Mood-Classification`, `Flowers-Classification`, `Object-Classification`, `Character-Recognition`, `Animal-Classification`, and `Speech-Recognition`. Refer to the information below for details on each classifier.

What's even more interesting is that all these classifiers can use your webcam for live testing, video files, or image files!

Expand All @@ -12,32 +12,37 @@ What's even more interesting is that all these classifiers can use your webcam f
- [Files Included](#files-included)
- [Getting Started](#getting-started)
- [Output](#output)
- [License](#license)
- [Human Face Classification](#face-classification)
- [Files Included](#files-included-1)
- [Getting Started](#getting-started-1)
- [Output](#output-1)
- [Flower Classification](#flower-classification)
- [Mood Classification](#mood-classification)
- [Files Included](#files-included-2)
- [Getting Started](#getting-started-2)
- [Output](#output-2)
- [Object Classification](#object-classification)
- [Flower Classification](#flower-classification)
- [Files Included](#files-included-3)
- [Getting Started](#getting-started-3)
- [Output](#output-3)
- [Character Classification (OCR)](#character-classification)
- [Object Classification](#object-classification)
- [Files Included](#files-included-4)
- [Getting Started](#getting-started-4)
- [Output](#output-4)
- [Animal Classification](#animal-classification)
- [Character Classification (OCR)](#character-classification)
- [Files Included](#files-included-5)
- [Getting Started](#getting-started-5)
- [Output](#output-5)
- [Speech Recognition](#speech-recognition)
- [Animal Classification](#animal-classification)
- [Files Included](#files-included-6)
- [Getting Started](#getting-started-6)
- [Output](#output-6)
- [Speech Recognition](#speech-recognition)
- [Files Included](#files-included-7)
- [Getting Started](#getting-started-7)
- [Output](#output-7)
- [Contact Information](#contact-information)
- [Future Work](#future-work)
- [License](#license)

---

Expand Down Expand Up @@ -86,10 +91,6 @@ Example output:
<img src="Vehicle-Classification/vehicle-classi.png" alt="Vehicle Classification Output" width="100%">
</p>

### License

This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.

---

## Face Classification
Expand Down Expand Up @@ -140,11 +141,48 @@ Example output:
<img src="Human-Face-Classification/face-classi.png" alt="Face Classification Output" width="100%">
</p>
### License
---
## Mood Classification
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
### Files Included
---
- `mood_classifier.py`: Python script for mood classification.
- `angry.mp4`: Sample video for mood classification (angry).
- `surprised.jpg`: Sample image for mood classification (surprised).
### Getting Started
1. **Clone the Repository**
```bash
git clone
cd AI-Classification/Mood-Classification
```
2. **Install Dependencies**
Install the required Python dependencies.
```bash
pip install -r requirements.txt
```
3. **Run Mood Classification**
```bash
python mood_classifier.py
```
You will then be asked to choose your input type (image, video, or webcam). Enter `image` to classify the mood in the sample image provided (`surprised.jpg`), or enter `video` to classify the mood in a video file. You can also use your webcam for live testing.
The script will then display the detected mood in the image, video, or webcam stream and in the console.
All our classifiers will only stop when you press `Q`, `ESC`, or otherwise close the window.
### Output
The output will be displayed the detected mood in the image, video, or webcam stream and in the console.
Example output:
<p align="center">
<img src="Mood-Classification/mood-classi.png" alt="Mood Classification Output" width="100%">
</p>
## Character Classification
Expand Down Expand Up @@ -184,7 +222,7 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
image
```

The script will then process the image, detect characters, and display the class labels along with the confidence scores.
The script will then display the detected characters in the image, video, or webcam stream.

All our classifiers will only stop when you press `Q`, `ESC`, or otherwise close the window.

Expand All @@ -198,10 +236,6 @@ Example output:
<img src="Character-Recognition/character-classi.png" alt="Character Classification Output" width="350">
</p>

### License

This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.

---

## Flower Classification
Expand Down Expand Up @@ -330,10 +364,6 @@ Example output:
<img src="Animals-Classification/animal-classi.png" alt="Animal Classification Output" width="350">
</p>

### License

This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.

---

## Speech Recognition
Expand Down Expand Up @@ -377,7 +407,19 @@ Example output:
## Contact Information

For any questions or issues, please contact:
- Name: Son Nguyen
- Name: [Son Nguyen](https://github.com/hoangsonww)
- Email: [info@movie-verse.com](mailto:info@movie-verse.com)

## Future Work

- Add more classifiers for various tasks such as emotion recognition, sentiment analysis, and more.
- Refine existing classifiers and improve their accuracy and performance.
- Add more sample images and videos for testing the classifiers.

## License

This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.

---

This repository is a work in progress and under active development. If you have any suggestions or improvements, feel free to contribute to this repository. Thank you for visiting!
Loading

0 comments on commit fb53090

Please sign in to comment.