diff --git a/Instagram/Instagram_Explore_API.ipynb b/Instagram/Instagram_Explore_API.ipynb new file mode 100644 index 0000000000..5ef4a4cf2f --- /dev/null +++ b/Instagram/Instagram_Explore_API.ipynb @@ -0,0 +1,340 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "dbfe669a", + "metadata": { + "execution": { + "iopub.execute_input": "2021-02-23T14:22:16.610471Z", + "iopub.status.busy": "2021-02-23T14:22:16.610129Z", + "iopub.status.idle": "2021-02-23T14:22:16.627784Z", + "shell.execute_reply": "2021-02-23T14:22:16.626866Z", + "shell.execute_reply.started": "2021-02-23T14:22:16.610384Z" + }, + "papermill": {}, + "tags": [] + }, + "source": [ + "\"Instagram.png\"" + ] + }, + { + "cell_type": "markdown", + "id": "5bbfcea2", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "# Instagram - Explore API\n", + "Give Feedback | Bug report" + ] + }, + { + "cell_type": "markdown", + "id": "394838ed", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Tags:** #instagram #snippet #dataframe #content" + ] + }, + { + "cell_type": "markdown", + "id": "370242e8", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Author:** [Varsha Kumar](https://www.linkedin.com/in/varsha-kumar-590466305/)" + ] + }, + { + "cell_type": "markdown", + "id": "fcdf88ea-b290-4dc9-8605-08c8724551fd", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Last update:** 2024-07-03 (Created: 2024-07-02)" + ] + }, + { + "cell_type": "markdown", + "id": "naas-description", + "metadata": { + "papermill": {}, + "tags": [ + "description" + ] + }, + "source": [ + "**Description:** This notebook retrieves data from an instagram profile through apify." + ] + }, + { + "cell_type": "markdown", + "id": "de96f02c", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Input" + ] + }, + { + "cell_type": "markdown", + "id": "93bc8174", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f8cfa596-61fc-4135-a913-915cc5aab9e9", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import requests\n", + "import json\n", + "import time\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "id": "0957e2bc", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Setup variables" + ] + }, + { + "cell_type": "markdown", + "id": "a142e6f7-02d9-4fee-912a-2eb410a82b03", + "metadata": {}, + "source": [ + "- `apify_token`: personal token to access data\n", + "- `instagram_profile_url`: link to instagram profile" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f9f3fbb-f787-45d4-a7f0-e13662c0b736", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "apify_token = \"apify_api_gXWnLEPiE7wC8ALUwQkJ0QcdbuQzU84xxxxx\"\n", + "instagram_profile_url = \"https://www.instagram.com/naaslife/\"" + ] + }, + { + "cell_type": "markdown", + "id": "9829569f", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Model" + ] + }, + { + "cell_type": "markdown", + "id": "40e3d074-6982-4381-bbb9-d5ee8c442c4a", + "metadata": {}, + "source": [ + "### Scrape instagram data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "94d07b97-00e6-4204-a40e-51f3515a9138", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def get_instagram_data(apify_token, instagram_profile_url):\n", + " # Extract the username from the profile URL\n", + " username = instagram_profile_url.split('/')[-2]\n", + " \n", + " # Define the Apify API URL for the Instagram Profile Scraper\n", + " api_url = \"https://api.apify.com/v2/acts/apify~instagram-profile-scraper/run-sync-get-dataset-items\"\n", + "\n", + " # Define the payload with the necessary parameters\n", + " payload = {\n", + " \"usernames\": [username], # Pass the username as a list\n", + " \"proxyConfig\": {\n", + " \"useApifyProxy\": True\n", + " }\n", + " }\n", + "\n", + " # Define the headers with the Apify API token\n", + " headers = {\n", + " \"Authorization\": f\"Bearer {apify_token}\",\n", + " \"Content-Type\": \"application/json\"\n", + " }\n", + "\n", + " # Make the request to the Apify API\n", + " response = requests.post(api_url, json=payload, headers=headers)\n", + "\n", + " # Check if the response is successful\n", + " if response.status_code == 200:\n", + " # Extract the JSON data from the response\n", + " data = response.json()\n", + "\n", + " # Check if the data contains the profile information\n", + " if data and len(data) > 0:\n", + " return data[0]\n", + " else:\n", + " return \"No profile data found.\"\n", + " else:\n", + " return f\"Error: {response.status_code} - {response.text}\"\n", + "\n", + "def process_instagram_data(data):\n", + " # Extract and organize data into DataFrames\n", + " posts = data.get('posts', [])\n", + " profiles = [data.get('user', {})]\n", + " places = [post.get('location', {}) for post in posts if post.get('location')]\n", + " hashtags = [hashtag for post in posts for hashtag in post.get('hashtags', [])]\n", + " photos = [post.get('images', []) for post in posts]\n", + " comments = [comment for post in posts for comment in post.get('comments', [])]\n", + "\n", + " # Create DataFrames\n", + " df_posts = pd.DataFrame(posts)\n", + " df_profiles = pd.DataFrame(profiles)\n", + " df_places = pd.DataFrame(places)\n", + " df_hashtags = pd.DataFrame(hashtags, columns=['hashtag'])\n", + " df_photos = pd.DataFrame(photos)\n", + " df_comments = pd.DataFrame(comments)\n", + "\n", + " return df_posts, df_profiles, df_places, df_hashtags, df_photos, df_comments" + ] + }, + { + "cell_type": "markdown", + "id": "14696ed5", + "metadata": { + "execution": { + "iopub.execute_input": "2021-07-02T23:32:10.789097Z", + "iopub.status.busy": "2021-07-02T23:32:10.788829Z", + "iopub.status.idle": "2021-07-02T23:32:10.796900Z", + "shell.execute_reply": "2021-07-02T23:32:10.796358Z", + "shell.execute_reply.started": "2021-07-02T23:32:10.789033Z" + }, + "papermill": {}, + "tags": [] + }, + "source": [ + "## Output" + ] + }, + { + "cell_type": "markdown", + "id": "72557a2c", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Display result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1d3039cc-b820-4b02-8e01-24fd97527009", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "profile_data = get_instagram_data(apify_token, instagram_profile_url)\n", + "\n", + "if isinstance(profile_data, dict):\n", + " df_posts, df_profiles, df_places, df_hashtags, df_photos, df_comments = process_instagram_data(profile_data)\n", + " \n", + " print(\"Posts DataFrame:\")\n", + " print(df_posts)\n", + " print(\"\\nProfiles DataFrame:\")\n", + " print(df_profiles)\n", + " print(\"\\nPlaces DataFrame:\")\n", + " print(df_places)\n", + " print(\"\\nHashtags DataFrame:\")\n", + " print(df_hashtags)\n", + " print(\"\\nPhotos DataFrame:\")\n", + " print(df_photos)\n", + " print(\"\\nComments DataFrame:\")\n", + " print(df_comments)\n", + "else:\n", + " print(profile_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e76164f0-64be-4e45-98c8-d5be914b449d", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + }, + "naas": { + "notebook_id": "8c1d59ba9fc141ddf76ab615ec70620884b5be94f4cde842bd75126ac862db52", + "notebook_path": "Instagram/Instagram_Get_stats_from_posts.ipynb" + }, + "papermill": { + "default_parameters": {}, + "environment_variables": {}, + "parameters": {}, + "version": "2.3.3" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}