From c1803a5651dbacdcdc8bdabac4adf16f8941e256 Mon Sep 17 00:00:00 2001
From: surafeldev <surafel.nigusie.dev@gmail.com>
Date: Sat, 14 Dec 2024 15:02:19 +0300
Subject: [PATCH] create an image processer file

---
 relevance-scoring/.gitignore                  | 135 ++++++++++++++++++
 relevance-scoring/metadata/image_processor.py |  53 +++++++
 2 files changed, 188 insertions(+)
 create mode 100644 relevance-scoring/.gitignore
 create mode 100644 relevance-scoring/metadata/image_processor.py

diff --git a/relevance-scoring/.gitignore b/relevance-scoring/.gitignore
new file mode 100644
index 00000000..8318586b
--- /dev/null
+++ b/relevance-scoring/.gitignore
@@ -0,0 +1,135 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case you do not want to do that, uncomment the following line:
+# Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyderworkspace
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# VS Code
+.vscode/
\ No newline at end of file
diff --git a/relevance-scoring/metadata/image_processor.py b/relevance-scoring/metadata/image_processor.py
new file mode 100644
index 00000000..f48e97a5
--- /dev/null
+++ b/relevance-scoring/metadata/image_processor.py
@@ -0,0 +1,53 @@
+import requests
+from PIL import Image
+from io import BytesIO
+from transformers import pipeline
+import logging
+
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+
+def generate_image_description(image_url, caption_generator):
+    try:
+        logging.info(f"Fetching image from URL: {image_url}")
+        
+        # Download the image
+        response = requests.get(image_url)
+        if response.status_code != 200:
+            raise Exception(f"Failed to retrieve image. Status code: {response.status_code}")
+        
+        # Check the content type of the response
+        content_type = response.headers.get('Content-Type')
+        logging.info(f"Content-Type: {content_type}")
+        if 'image' not in content_type:
+            raise Exception(f"URL does not point to an image. Content-Type: {content_type}")
+        
+        # Print the first few bytes of the response content for debugging
+        logging.info(f"Response content (first 100 bytes): {response.content[:100]}")
+        
+        # Load the image
+        image = Image.open(BytesIO(response.content))
+        
+        # Generate the description
+        description = caption_generator(image)[0]['generated_text']
+        
+        return {
+            "url": image_url,
+            "description": description
+        }
+    except Exception as e:
+        logging.error(f"Error processing image {image_url}: {e}")
+        return {
+            "url": image_url,
+            "description": None
+        }
+
+if __name__ == "__main__":
+    # Initialize the image captioning model inside the main block
+    caption_generator = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
+    
+    # Sample image URL
+    image_url = "https://th.bing.com/th?id=ORMS.99706f16f78dd7e84c31c95eef897656&pid=Wdp&w=268&h=140&qlt=90&c=1&rs=1&dpr=1.5&p=0"
+    # Generate and print metadata
+    metadata = generate_image_description(image_url, caption_generator)
+    print(metadata)
\ No newline at end of file