Added Mood & Enhanced Speech Classifications

hoangsonww · Mar 30, 2024 · fb53090 · fb53090
1 parent 3bd1ca5
commit fb53090
Show file tree

Hide file tree

Showing 12 changed files with 189 additions and 33 deletions.
diff --git a/.gitignore b/.gitignore
diff --git a/Animals-Classification/animal_classification.py b/Animals-Classification/animal_classification.py
@@ -5,9 +5,11 @@
 from PIL import Image, ImageDraw, ImageFont
 import numpy as np
 
+
 def load_model():
     return MobileNetV2(weights='imagenet')
 
+
 def classify_image(model, image):
     resized_image = image.resize((224, 224))
     image_array = img_to_array(resized_image)
@@ -19,6 +21,7 @@ def classify_image(model, image):
 
     return decoded_predictions
 
+
 def annotate_image(image, predictions):
     draw = ImageDraw.Draw(image)
     font = ImageFont.load_default()
@@ -31,6 +34,7 @@ def annotate_image(image, predictions):
 
     return image
 
+
 def process_input(source, model):
     if source == 'webcam':
         cap = cv2.VideoCapture(0)
@@ -58,6 +62,7 @@ def process_input(source, model):
     cap.release()
     cv2.destroyAllWindows()
 
+
 if __name__ == "__main__":
     print("You may see some errors due to font issues. It is totally OK and can be ignored.")
 

diff --git a/Flowers-Classification/flower_classification.py b/Flowers-Classification/flower_classification.py
@@ -5,9 +5,11 @@
 from PIL import Image, ImageDraw, ImageFont
 import numpy as np
 
+
 def load_model():
     return MobileNetV2(weights='imagenet')
 
+
 def classify_image(model, image):
     image_resized = image.resize((224, 224))
     image_array = img_to_array(image_resized)
@@ -19,10 +21,11 @@ def classify_image(model, image):
 
     return decoded_predictions
 
+
 def annotate_image(image, predictions):
     draw = ImageDraw.Draw(image)
     try:
-        font = ImageFont.truetype("arial.ttf", 20)  # Adjust font size as needed
+        font = ImageFont.truetype("arial.ttf", 20)
     except IOError:
         print("Arial font not found, using default font.")
         font = ImageFont.load_default()
@@ -32,7 +35,7 @@ def annotate_image(image, predictions):
     for i, (id, label, prob) in enumerate(predictions):
         text = f"{label} ({prob * 100:.2f}%)"
         draw.text((10, text_y), text, fill="red", font=font)
-        # Workaround for text size
+
         if hasattr(font, 'getsize'):
             text_size = font.getsize(text)
         else:
@@ -41,6 +44,7 @@ def annotate_image(image, predictions):
 
     return image
 
+
 def process_input(source, model):
     if source == 'webcam':
         cap = cv2.VideoCapture(0)
@@ -70,6 +74,7 @@ def process_input(source, model):
     cap.release()
     cv2.destroyAllWindows()
 
+
 if __name__ == "__main__":
     print("You may see some errors due to font issues. It is totally OK and can be ignored.")
 

diff --git a/Human-Face-Classification/faces-classification.py b/Human-Face-Classification/faces-classification.py
@@ -1,6 +1,7 @@
 import cv2
 import numpy as np
 
+
 def load_models():
     face_model_path = 'res10_300x300_ssd_iter_140000.caffemodel'
     face_proto_path = 'deploy.prototxt.txt'
@@ -15,6 +16,7 @@ def load_models():
 
     return face_net, age_net, gender_net
 
+
 def predict_age_and_gender(face, age_net, gender_net):
     blob = cv2.dnn.blobFromImage(face, 1.0, (227, 227), (78.4263377603, 87.7689143744, 114.895847746), swapRB=False)
     gender_net.setInput(blob)
@@ -27,9 +29,10 @@ def predict_age_and_gender(face, age_net, gender_net):
 
     return age, gender
 
+
 def annotate_video(video_source, face_net, age_net, gender_net, use_webcam=False):
     if use_webcam:
-        cap = cv2.VideoCapture(0)  # 0 is usually the default camera
+        cap = cv2.VideoCapture(0)
     else:
         cap = cv2.VideoCapture(video_source)
 
@@ -64,7 +67,7 @@ def annotate_video(video_source, face_net, age_net, gender_net, use_webcam=False
         cv2.imshow("Frame", frame)
 
         key = cv2.waitKey(1)
-        if key & 0xFF in [ord('q'), 27]:  # 27 is the ESC key
+        if key & 0xFF in [ord('q'), 27]:
             break
         elif key & 0xFF == ord(' '):
             paused = not paused
@@ -122,4 +125,3 @@ def main():
 
 if __name__ == "__main__":
     main()
-
diff --git a/Mood-Classification/LICENSE b/Mood-Classification/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Son Nguyen Hoang
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/Mood-Classification/angry.mp4 b/Mood-Classification/angry.mp4
diff --git a/Mood-Classification/mood-classi.png b/Mood-Classification/mood-classi.png
diff --git a/Mood-Classification/mood_classifier.py b/Mood-Classification/mood_classifier.py
@@ -0,0 +1,68 @@
+import cv2
+from deepface import DeepFace
+
+
+def analyze_and_show(image, show_results=True):
+    try:
+        analysis = DeepFace.analyze(image, actions=['emotion'], enforce_detection=False)
+
+        text_y = 20
+
+        for result in analysis:
+            if 'region' in result:
+                region = result['region']
+                x, y, w, h = region['x'], region['y'], region['w'], region['h']
+                cv2.rectangle(image, (x, y), (x + w, y + h), (255, 0, 0), 2)
+
+            if 'emotion' in result:
+                emotions = result['emotion']
+                dominant_emotion = result['dominant_emotion']
+
+                if show_results:
+                    print(f"Dominant emotion: {dominant_emotion}")
+                    print("See more detailed stats on the popup window.")
+
+                for emotion, prob in emotions.items():
+                    cv2.putText(image, f"{emotion}: {prob:.2f}%", (10, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
+                                (0, 255, 0), 2)
+                    text_y += 20
+
+        cv2.imshow('Mood Detector', image)
+
+    except Exception as e:
+        print(f"Error in emotion detection: {e}")
+
+
+def process_input(source):
+    cap = cv2.VideoCapture(0 if source == 'webcam' else source)
+
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+
+        show_results = True
+        analyze_and_show(frame, show_results)
+
+        if cv2.waitKey(1) & 0xFF in [27, ord('q')]:
+            break
+
+        if cv2.getWindowProperty('Mood Detector', cv2.WND_PROP_VISIBLE) < 1:
+            break
+
+    cap.release()
+    cv2.destroyAllWindows()
+
+
+if __name__ == "__main__":
+    choice = input("Enter 'image', 'video', or 'webcam': ").lower()
+
+    if choice == 'image':
+        image_path = input("Enter the image path: ")
+        image = cv2.imread(image_path)
+        analyze_and_show(image)
+        cv2.waitKey(0)
+        cv2.destroyAllWindows()
+    elif choice in ['video', 'webcam']:
+        source = 'webcam' if choice == 'webcam' else input("Enter the video path: ")
+        process_input(source)
diff --git a/Mood-Classification/surprised.jpg b/Mood-Classification/surprised.jpg
diff --git a/README.md b/README.md
@@ -1,8 +1,8 @@
-# AI Classifiers
+# AI Multitask Classifiers: From Objects to Emotions
 
-Created by [Son Nguyen](https://github.com/hoangsonww) in 2024, this repository contains Python scripts for vehicle classification and object classification using pre-trained deep learning models. The vehicle classification logic uses the YOLOv3 model for vehicle detection and classification, while the object classification logic uses a pre-trained model for object classification. These scripts can be used to classify vehicles in videos and objects in images, respectively.
+Created by [Son Nguyen](https://github.com/hoangsonww) in 2024, this repository contains Python scripts for various AI-powered classifiers. These classifiers can be used for object detection, face detection, character recognition, and more. The classifiers are built using popular deep learning frameworks such as `OpenCV`, `TensorFlow`, and `PyTorch`.
 
-This repository contains six sub-directories: one for vehicle classification logic, one for human face classification logic, one for flower classification logic, one for object classification logic, one for character classification logic, and one for animal classification logic, namely `Vehicle-Classification`, `Human-Face-Classification`, `Flowers-Classification`, `Object-Classification`, `Character-Recognition`, and `Animal-Classification`. Refer to the information below for details on each classifier.
+This repository contains 8 subdirectories: one for vehicle classification, one for human face classification, one for mood classification, one for flower classification , one for object classification, one for character classification, one for animal classification, and one for speech recognition, namely `Vehicle-Classification`, `Human-Face-Classification`, `Mood-Classification`, `Flowers-Classification`, `Object-Classification`, `Character-Recognition`, `Animal-Classification`,  and `Speech-Recognition`. Refer to the information below for details on each classifier.
 
 What's even more interesting is that all these classifiers can use your webcam for live testing, video files, or image files!
 
@@ -12,32 +12,37 @@ What's even more interesting is that all these classifiers can use your webcam f
   - [Files Included](#files-included)
   - [Getting Started](#getting-started)
   - [Output](#output)
-  - [License](#license)
 - [Human Face Classification](#face-classification)
   - [Files Included](#files-included-1)
   - [Getting Started](#getting-started-1)
   - [Output](#output-1)
-- [Flower Classification](#flower-classification)
+- [Mood Classification](#mood-classification)
   - [Files Included](#files-included-2)
   - [Getting Started](#getting-started-2)
   - [Output](#output-2)
-- [Object Classification](#object-classification)
+- [Flower Classification](#flower-classification)
   - [Files Included](#files-included-3)
   - [Getting Started](#getting-started-3)
   - [Output](#output-3)
-- [Character Classification (OCR)](#character-classification)
+- [Object Classification](#object-classification)
   - [Files Included](#files-included-4)
   - [Getting Started](#getting-started-4)
   - [Output](#output-4)
-- [Animal Classification](#animal-classification)
+- [Character Classification (OCR)](#character-classification)
   - [Files Included](#files-included-5)
   - [Getting Started](#getting-started-5)
   - [Output](#output-5)
-- [Speech Recognition](#speech-recognition)
+- [Animal Classification](#animal-classification)
   - [Files Included](#files-included-6)
   - [Getting Started](#getting-started-6)
   - [Output](#output-6)
+- [Speech Recognition](#speech-recognition)
+  - [Files Included](#files-included-7)
+  - [Getting Started](#getting-started-7)
+  - [Output](#output-7)
 - [Contact Information](#contact-information)
+- [Future Work](#future-work)
+- [License](#license)
 
 ---
 
@@ -86,10 +91,6 @@ Example output:
   <img src="Vehicle-Classification/vehicle-classi.png" alt="Vehicle Classification Output" width="100%">
 </p>
 
-### License
-
-This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
-
 ---
 
 ## Face Classification
@@ -140,11 +141,48 @@ Example output:
   <img src="Human-Face-Classification/face-classi.png" alt="Face Classification Output" width="100%">
 </p>
 
-### License
+---
+
+## Mood Classification
 
-This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
+### Files Included
 
----
+- `mood_classifier.py`: Python script for mood classification.
+- `angry.mp4`: Sample video for mood classification (angry).
+- `surprised.jpg`: Sample image for mood classification (surprised).
+
+### Getting Started
+
+1. **Clone the Repository**
+   ```bash
+   git clone
+   cd AI-Classification/Mood-Classification
+    ```
+2. **Install Dependencies**
+    Install the required Python dependencies.
+    ```bash
+    pip install -r requirements.txt
+    ```
+3. **Run Mood Classification**
+    ```bash
+    python mood_classifier.py
+    ```
+   
+You will then be asked to choose your input type (image, video, or webcam). Enter `image` to classify the mood in the sample image provided (`surprised.jpg`), or enter `video` to classify the mood in a video file. You can also use your webcam for live testing.
+
+The script will then display the detected mood in the image, video, or webcam stream and in the console.
+
+All our classifiers will only stop when you press `Q`, `ESC`, or otherwise close the window.
+
+### Output
+
+The output will be displayed the detected mood in the image, video, or webcam stream and in the console.
+
+Example output:
+
+<p align="center">
+  <img src="Mood-Classification/mood-classi.png" alt="Mood Classification Output" width="100%">
+</p>
 
 ## Character Classification
 
@@ -184,7 +222,7 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
     image
     ```
 
-The script will then process the image, detect characters, and display the class labels along with the confidence scores.
+The script will then display the detected characters in the image, video, or webcam stream.
 
 All our classifiers will only stop when you press `Q`, `ESC`, or otherwise close the window.
 
@@ -198,10 +236,6 @@ Example output:
   <img src="Character-Recognition/character-classi.png" alt="Character Classification Output" width="350">
 </p>
 
-### License
-
-This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
-
 ---
 
 ## Flower Classification
@@ -330,10 +364,6 @@ Example output:
   <img src="Animals-Classification/animal-classi.png" alt="Animal Classification Output" width="350">
 </p>
 
-### License
-
-This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
-
 ---
 
 ## Speech Recognition
@@ -377,7 +407,19 @@ Example output:
 ## Contact Information
 
 For any questions or issues, please contact:
-- Name: Son Nguyen
+- Name: [Son Nguyen](https://github.com/hoangsonww)
 - Email: [info@movie-verse.com](mailto:info@movie-verse.com)
 
+## Future Work
+
+- Add more classifiers for various tasks such as emotion recognition, sentiment analysis, and more.
+- Refine existing classifiers and improve their accuracy and performance.
+- Add more sample images and videos for testing the classifiers.
+
+## License
+
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
+
 ---
+
+This repository is a work in progress and under active development. If you have any suggestions or improvements, feel free to contribute to this repository. Thank you for visiting!