Skip to content

Commit

Permalink
feat: instagram details
Browse files Browse the repository at this point in the history
  • Loading branch information
varshakumarr committed Jul 4, 2024
1 parent 1466878 commit b8e7c15
Showing 1 changed file with 340 additions and 0 deletions.
340 changes: 340 additions & 0 deletions Instagram/Instagram_Explore_API.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,340 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "dbfe669a",
"metadata": {
"execution": {
"iopub.execute_input": "2021-02-23T14:22:16.610471Z",
"iopub.status.busy": "2021-02-23T14:22:16.610129Z",
"iopub.status.idle": "2021-02-23T14:22:16.627784Z",
"shell.execute_reply": "2021-02-23T14:22:16.626866Z",
"shell.execute_reply.started": "2021-02-23T14:22:16.610384Z"
},
"papermill": {},
"tags": []
},
"source": [
"<img width=\"8%\" alt=\"Instagram.png\" src=\"https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/.github/assets/logos/Instagram.png\" style=\"border-radius: 15%\">"
]
},
{
"cell_type": "markdown",
"id": "5bbfcea2",
"metadata": {
"papermill": {},
"tags": []
},
"source": [
"# Instagram - Explore API\n",
"<a href=\"https://bit.ly/3JyWIk6\">Give Feedback</a> | <a href=\"https://github.com/jupyter-naas/awesome-notebooks/issues/new?assignees=&labels=bug&template=bug_report.md&title=Instagram+-+Get+stats+from+posts:+Error+short+description\">Bug report</a>"
]
},
{
"cell_type": "markdown",
"id": "394838ed",
"metadata": {
"papermill": {},
"tags": []
},
"source": [
"**Tags:** #instagram #snippet #dataframe #content"
]
},
{
"cell_type": "markdown",
"id": "370242e8",
"metadata": {
"papermill": {},
"tags": []
},
"source": [
"**Author:** [Varsha Kumar](https://www.linkedin.com/in/varsha-kumar-590466305/)"
]
},
{
"cell_type": "markdown",
"id": "fcdf88ea-b290-4dc9-8605-08c8724551fd",
"metadata": {
"papermill": {},
"tags": []
},
"source": [
"**Last update:** 2024-07-03 (Created: 2024-07-02)"
]
},
{
"cell_type": "markdown",
"id": "naas-description",
"metadata": {
"papermill": {},
"tags": [
"description"
]
},
"source": [
"**Description:** This notebook retrieves data from an instagram profile through apify."
]
},
{
"cell_type": "markdown",
"id": "de96f02c",
"metadata": {
"papermill": {},
"tags": []
},
"source": [
"## Input"
]
},
{
"cell_type": "markdown",
"id": "93bc8174",
"metadata": {
"papermill": {},
"tags": []
},
"source": [
"### Import libraries"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f8cfa596-61fc-4135-a913-915cc5aab9e9",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import requests\n",
"import json\n",
"import time\n",
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"id": "0957e2bc",
"metadata": {
"papermill": {},
"tags": []
},
"source": [
"### Setup variables"
]
},
{
"cell_type": "markdown",
"id": "a142e6f7-02d9-4fee-912a-2eb410a82b03",
"metadata": {},
"source": [
"- `apify_token`: personal token to access data\n",
"- `instagram_profile_url`: link to instagram profile"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7f9f3fbb-f787-45d4-a7f0-e13662c0b736",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"apify_token = \"apify_api_gXWnLEPiE7wC8ALUwQkJ0QcdbuQzU84xxxxx\"\n",
"instagram_profile_url = \"https://www.instagram.com/naaslife/\""
]
},
{
"cell_type": "markdown",
"id": "9829569f",
"metadata": {
"papermill": {},
"tags": []
},
"source": [
"## Model"
]
},
{
"cell_type": "markdown",
"id": "40e3d074-6982-4381-bbb9-d5ee8c442c4a",
"metadata": {},
"source": [
"### Scrape instagram data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "94d07b97-00e6-4204-a40e-51f3515a9138",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"def get_instagram_data(apify_token, instagram_profile_url):\n",
" # Extract the username from the profile URL\n",
" username = instagram_profile_url.split('/')[-2]\n",
" \n",
" # Define the Apify API URL for the Instagram Profile Scraper\n",
" api_url = \"https://api.apify.com/v2/acts/apify~instagram-profile-scraper/run-sync-get-dataset-items\"\n",
"\n",
" # Define the payload with the necessary parameters\n",
" payload = {\n",
" \"usernames\": [username], # Pass the username as a list\n",
" \"proxyConfig\": {\n",
" \"useApifyProxy\": True\n",
" }\n",
" }\n",
"\n",
" # Define the headers with the Apify API token\n",
" headers = {\n",
" \"Authorization\": f\"Bearer {apify_token}\",\n",
" \"Content-Type\": \"application/json\"\n",
" }\n",
"\n",
" # Make the request to the Apify API\n",
" response = requests.post(api_url, json=payload, headers=headers)\n",
"\n",
" # Check if the response is successful\n",
" if response.status_code == 200:\n",
" # Extract the JSON data from the response\n",
" data = response.json()\n",
"\n",
" # Check if the data contains the profile information\n",
" if data and len(data) > 0:\n",
" return data[0]\n",
" else:\n",
" return \"No profile data found.\"\n",
" else:\n",
" return f\"Error: {response.status_code} - {response.text}\"\n",
"\n",
"def process_instagram_data(data):\n",
" # Extract and organize data into DataFrames\n",
" posts = data.get('posts', [])\n",
" profiles = [data.get('user', {})]\n",
" places = [post.get('location', {}) for post in posts if post.get('location')]\n",
" hashtags = [hashtag for post in posts for hashtag in post.get('hashtags', [])]\n",
" photos = [post.get('images', []) for post in posts]\n",
" comments = [comment for post in posts for comment in post.get('comments', [])]\n",
"\n",
" # Create DataFrames\n",
" df_posts = pd.DataFrame(posts)\n",
" df_profiles = pd.DataFrame(profiles)\n",
" df_places = pd.DataFrame(places)\n",
" df_hashtags = pd.DataFrame(hashtags, columns=['hashtag'])\n",
" df_photos = pd.DataFrame(photos)\n",
" df_comments = pd.DataFrame(comments)\n",
"\n",
" return df_posts, df_profiles, df_places, df_hashtags, df_photos, df_comments"
]
},
{
"cell_type": "markdown",
"id": "14696ed5",
"metadata": {
"execution": {
"iopub.execute_input": "2021-07-02T23:32:10.789097Z",
"iopub.status.busy": "2021-07-02T23:32:10.788829Z",
"iopub.status.idle": "2021-07-02T23:32:10.796900Z",
"shell.execute_reply": "2021-07-02T23:32:10.796358Z",
"shell.execute_reply.started": "2021-07-02T23:32:10.789033Z"
},
"papermill": {},
"tags": []
},
"source": [
"## Output"
]
},
{
"cell_type": "markdown",
"id": "72557a2c",
"metadata": {
"papermill": {},
"tags": []
},
"source": [
"### Display result"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1d3039cc-b820-4b02-8e01-24fd97527009",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"profile_data = get_instagram_data(apify_token, instagram_profile_url)\n",
"\n",
"if isinstance(profile_data, dict):\n",
" df_posts, df_profiles, df_places, df_hashtags, df_photos, df_comments = process_instagram_data(profile_data)\n",
" \n",
" print(\"Posts DataFrame:\")\n",
" print(df_posts)\n",
" print(\"\\nProfiles DataFrame:\")\n",
" print(df_profiles)\n",
" print(\"\\nPlaces DataFrame:\")\n",
" print(df_places)\n",
" print(\"\\nHashtags DataFrame:\")\n",
" print(df_hashtags)\n",
" print(\"\\nPhotos DataFrame:\")\n",
" print(df_photos)\n",
" print(\"\\nComments DataFrame:\")\n",
" print(df_comments)\n",
"else:\n",
" print(profile_data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e76164f0-64be-4e45-98c8-d5be914b449d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
},
"naas": {
"notebook_id": "8c1d59ba9fc141ddf76ab615ec70620884b5be94f4cde842bd75126ac862db52",
"notebook_path": "Instagram/Instagram_Get_stats_from_posts.ipynb"
},
"papermill": {
"default_parameters": {},
"environment_variables": {},
"parameters": {},
"version": "2.3.3"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit b8e7c15

Please sign in to comment.