From e55efda8ed6eb696a8c13c0293ae537c4bca6cb0 Mon Sep 17 00:00:00 2001 From: Varsha Kumar Date: Fri, 28 Jun 2024 01:28:10 +0200 Subject: [PATCH 1/3] feat: github organization and profile --- GitHub/GitHub_Get_an_organization.ipynb | 463 ++++++++++++++++++++++ GitHub/GitHub_Get_profile_from_user.ipynb | 310 ++++++++++++++- 2 files changed, 758 insertions(+), 15 deletions(-) create mode 100644 GitHub/GitHub_Get_an_organization.ipynb diff --git a/GitHub/GitHub_Get_an_organization.ipynb b/GitHub/GitHub_Get_an_organization.ipynb new file mode 100644 index 0000000000..46cb6cca7a --- /dev/null +++ b/GitHub/GitHub_Get_an_organization.ipynb @@ -0,0 +1,463 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "84cadd0c", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "\"GitHub.png\"" + ] + }, + { + "cell_type": "markdown", + "id": "0d43ed38", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "# GitHub - Get an organization\n", + "Give Feedback | Bug report" + ] + }, + { + "cell_type": "markdown", + "id": "d9313642", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Tags:** #github #organization #automation #snippet" + ] + }, + { + "cell_type": "markdown", + "id": "8faf487c", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Author:** [Varsha Kumar](https://www.linkedin.com/in/varsha-kumar-590466305/)" + ] + }, + { + "cell_type": "markdown", + "id": "3dba1c73-548d-4008-82ad-fdb2cb376771", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Last update:** 2024-06-25 (Created: 2024-06-18)" + ] + }, + { + "cell_type": "markdown", + "id": "naas-description", + "metadata": { + "papermill": {}, + "tags": [ + "description" + ] + }, + "source": [ + "**Description:** This notebook allows users to retrieve organization data from a repository." + ] + }, + { + "cell_type": "markdown", + "id": "7412988b", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Input" + ] + }, + { + "cell_type": "markdown", + "id": "b483a140", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "353ef79c", + "metadata": { + "execution": { + "iopub.execute_input": "2024-06-25T13:15:25.095593Z", + "iopub.status.busy": "2024-06-25T13:15:25.095157Z", + "iopub.status.idle": "2024-06-25T13:15:27.766596Z", + "shell.execute_reply": "2024-06-25T13:15:27.765726Z", + "shell.execute_reply.started": "2024-06-25T13:15:25.095525Z" + }, + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "import requests\n", + "import re\n", + "import pandas as pd\n", + "import naas" + ] + }, + { + "cell_type": "markdown", + "id": "68b48858", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Setup variables" + ] + }, + { + "cell_type": "markdown", + "id": "b69f38ae-a69c-4360-8cf9-099cda04f098", + "metadata": {}, + "source": [ + "- `github_token`: personal token\n", + "- `repo_url`: link to the chosen github repo\n", + "- `company_name`: name of the company\n", + "- `api_key`: token to get linkedin\n", + "- `output_csv`: excel file" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "01647a55", + "metadata": { + "execution": { + "iopub.execute_input": "2024-06-25T13:15:27.768674Z", + "iopub.status.busy": "2024-06-25T13:15:27.768234Z", + "iopub.status.idle": "2024-06-25T13:15:30.852511Z", + "shell.execute_reply": "2024-06-25T13:15:30.851843Z", + "shell.execute_reply.started": "2024-06-25T13:15:27.768640Z" + }, + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "github_token = naas.secret.get(\"GITHUB_TOKEN\")\n", + "repo_url = \"https://github.com/jupyter-naas/awesome-notebooks\"\n", + "company_name = \"naas.ai\"\n", + "api_key = \"899fa50c3a6c56e8b19e37608140f327bf9bf807\"\n", + "output_csv = f\"{repo_url.split('github.com/')[1].replace('/', '_')}_organization.csv\"" + ] + }, + { + "cell_type": "markdown", + "id": "93347abb", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Model\n" + ] + }, + { + "cell_type": "markdown", + "id": "7537ee02-afad-4d87-8883-afae67365f46", + "metadata": {}, + "source": [ + "### Get github details" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "bb42b49d-b53e-4255-8a59-c824cd49aeac", + "metadata": { + "execution": { + "iopub.execute_input": "2024-06-25T13:15:30.853950Z", + "iopub.status.busy": "2024-06-25T13:15:30.853707Z", + "iopub.status.idle": "2024-06-25T13:15:31.263746Z", + "shell.execute_reply": "2024-06-25T13:15:31.262986Z", + "shell.execute_reply.started": "2024-06-25T13:15:30.853917Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def get_repo_details(repo_url, token):\n", + " # Extract the repo path from the URL\n", + " repo_path = repo_url.replace(\"https://github.com/\", \"\")\n", + " \n", + " # GitHub API endpoint for repository details\n", + " api_url = f\"https://api.github.com/repos/{repo_path}\"\n", + " \n", + " # Headers with the authorization token\n", + " headers = {\n", + " \"Authorization\": f\"token {token}\"\n", + " }\n", + " \n", + " # Send GET request to the GitHub API\n", + " response = requests.get(api_url, headers=headers)\n", + " \n", + " # Check if the request was successful\n", + " if response.status_code == 200:\n", + " repo_details = response.json()\n", + " return repo_details\n", + " else:\n", + " return f\"Error: {response.status_code} - {response.json()['message']}\"\n", + "repo_details = get_repo_details(repo_url, github_token)\n", + " \n", + "def get_company_details(domain, api_key):\n", + " # Hunter.io API endpoint for company information\n", + " url = f\"https://api.hunter.io/v2/domain-search?domain={domain}&api_key={api_key}\"\n", + " \n", + " # Send GET request to the Hunter.io API\n", + " response = requests.get(url)\n", + " \n", + " # Check if the request was successful\n", + " if response.status_code == 200:\n", + " data = response.json()\n", + " if 'data' in data:\n", + " return data['data']\n", + " else:\n", + " return \"Company details not found\"\n", + " else:\n", + " return f\"Error: {response.status_code} - {response.json().get('errors', 'Unknown error')}\"\n", + " \n", + "linkedin_details = get_company_details(company_name, api_key)" + ] + }, + { + "cell_type": "markdown", + "id": "b819d06a", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Output\n" + ] + }, + { + "cell_type": "markdown", + "id": "8b6ecf57-fec4-4e72-a7b8-4985bca76c05", + "metadata": {}, + "source": [ + "### Display result" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5182d09a-e6d7-4d9d-9f38-eb3a9ce87cf1", + "metadata": { + "execution": { + "iopub.execute_input": "2024-06-25T13:15:31.267186Z", + "iopub.status.busy": "2024-06-25T13:15:31.266948Z", + "iopub.status.idle": "2024-06-25T13:15:31.293783Z", + "shell.execute_reply": "2024-06-25T13:15:31.292934Z", + "shell.execute_reply.started": "2024-06-25T13:15:31.267161Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
URLNAMEIDOWNER_LOGINOWNER_IDDESCRIPTIONCREATED_ATTOPICSNUMBER_OF_OPEN_ISSUESLINKEDIN_URLLINKEDIN_DESCRIPTIONINDUSTRYTECHNOLOGIESCITYHEADCOUNTCOMPANY_TYPE
0https://api.github.com/repos/jupyter-naas/awes...awesome-notebooks299951448jupyter-naas71603764A powerful data & AI notebook templates catalo...2020-09-30T14:44:55Z[awesome, awesome-list, hacktoberfest, hacktob...135https://linkedin.com/company/naas-aiNaas.ai is a data science platform that allows...Software Development[express, google-analytics, google-tag-manager...Paris1-10privately held
\n", + "
" + ], + "text/plain": [ + " URL NAME \\\n", + "0 https://api.github.com/repos/jupyter-naas/awes... awesome-notebooks \n", + "\n", + " ID OWNER_LOGIN OWNER_ID \\\n", + "0 299951448 jupyter-naas 71603764 \n", + "\n", + " DESCRIPTION CREATED_AT \\\n", + "0 A powerful data & AI notebook templates catalo... 2020-09-30T14:44:55Z \n", + "\n", + " TOPICS NUMBER_OF_OPEN_ISSUES \\\n", + "0 [awesome, awesome-list, hacktoberfest, hacktob... 135 \n", + "\n", + " LINKEDIN_URL \\\n", + "0 https://linkedin.com/company/naas-ai \n", + "\n", + " LINKEDIN_DESCRIPTION INDUSTRY \\\n", + "0 Naas.ai is a data science platform that allows... Software Development \n", + "\n", + " TECHNOLOGIES CITY HEADCOUNT \\\n", + "0 [express, google-analytics, google-tag-manager... Paris 1-10 \n", + "\n", + " COMPANY_TYPE \n", + "0 privately held " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = []\n", + "\n", + "data.append({\n", + " \"URL\": repo_details[\"url\"],\n", + " \"NAME\": repo_details[\"name\"],\n", + " \"ID\": repo_details[\"id\"],\n", + " \"OWNER_LOGIN\": repo_details[\"owner\"][\"login\"],\n", + " \"OWNER_ID\": repo_details[\"owner\"][\"id\"],\n", + " \"DESCRIPTION\": repo_details[\"description\"],\n", + " \"CREATED_AT\": repo_details[\"created_at\"],\n", + " \"TOPICS\": repo_details[\"topics\"],\n", + " \"NUMBER_OF_OPEN_ISSUES\": repo_details[\"open_issues\"],\n", + " \"LINKEDIN_URL\": linkedin_details[\"linkedin\"],\n", + " \"LINKEDIN_DESCRIPTION\": linkedin_details[\"description\"],\n", + " \"INDUSTRY\": linkedin_details[\"industry\"],\n", + " \"TECHNOLOGIES\": linkedin_details[\"technologies\"],\n", + " \"CITY\": linkedin_details[\"city\"],\n", + " \"HEADCOUNT\": linkedin_details[\"headcount\"],\n", + " \"COMPANY_TYPE\": linkedin_details[\"company_type\"]\n", + "})\n", + "\n", + "df = pd.DataFrame(data)\n", + "df" + ] + }, + { + "cell_type": "markdown", + "id": "53724dc4-3314-45dc-9401-6899f485a70e", + "metadata": {}, + "source": [ + "### Save dataframe to csv" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "5699155d-b0e3-40a5-b4bc-bbbecb4473a3", + "metadata": { + "execution": { + "iopub.execute_input": "2024-06-25T13:15:31.295331Z", + "iopub.status.busy": "2024-06-25T13:15:31.294998Z", + "iopub.status.idle": "2024-06-25T13:15:31.397629Z", + "shell.execute_reply": "2024-06-25T13:15:31.396954Z", + "shell.execute_reply.started": "2024-06-25T13:15:31.295296Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "df.to_csv(output_csv, index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e04049a-dcbc-405f-811d-c0b6668ebc15", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + }, + "naas": { + "notebook_id": "b8a92a0e4b6e40db304564f999566443fb35e93df716ab4be5021aabba8230ee", + "notebook_path": "GitHub/GitHub_Add_new_issues_as_page_in_Notion_database.ipynb" + }, + "papermill": { + "default_parameters": {}, + "environment_variables": {}, + "parameters": {}, + "version": "2.3.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/GitHub/GitHub_Get_profile_from_user.ipynb b/GitHub/GitHub_Get_profile_from_user.ipynb index 562d7d99c4..9215f13a02 100644 --- a/GitHub/GitHub_Get_profile_from_user.ipynb +++ b/GitHub/GitHub_Get_profile_from_user.ipynb @@ -42,7 +42,7 @@ "tags": [] }, "source": [ - "**Author:** [Sanjeet Attili](https://www.linkedin.com/in/sanjeet-attili-760bab190/)" + "**Author:** [Varsha Kumar](https://www.linkedin.com/in/varsha-kumar-590466305/)" ] }, { @@ -53,7 +53,7 @@ "tags": [] }, "source": [ - "**Last update:** 2023-04-12 (Created: 2022-03-18)" + "**Last update:** 2024-06-25 (Created: 2022-03-18)" ] }, { @@ -88,7 +88,7 @@ "tags": [] }, "source": [ - "### Imports" + "### Import libraries" ] }, { @@ -96,12 +96,22 @@ "execution_count": 1, "id": "120e7d13-d71b-4faa-8d71-416c2fa0c2c6", "metadata": { + "execution": { + "iopub.execute_input": "2024-06-25T13:13:18.880510Z", + "iopub.status.busy": "2024-06-25T13:13:18.880055Z", + "iopub.status.idle": "2024-06-25T13:13:25.406452Z", + "shell.execute_reply": "2024-06-25T13:13:25.405750Z", + "shell.execute_reply.started": "2024-06-25T13:13:18.880427Z" + }, "papermill": {}, "tags": [] }, "outputs": [], "source": [ - "from naas_drivers import github" + "from naas_drivers import github\n", + "import naas\n", + "import requests\n", + "import pandas as pd" ] }, { @@ -112,22 +122,39 @@ "tags": [] }, "source": [ - "### Variables" + "### Setup variables" + ] + }, + { + "cell_type": "markdown", + "id": "6c1d574c-c48c-489a-9e32-baaea89f76bd", + "metadata": {}, + "source": [ + "- `github_username`: user profile\n", + "- `GITHUB_TOKEN`: token to access github information\n", + "- `output_csv`: excel file" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "d34cda87-0ba2-4992-bb11-506cd6a4e52b", "metadata": { + "execution": { + "iopub.execute_input": "2024-06-25T13:14:13.396619Z", + "iopub.status.busy": "2024-06-25T13:14:13.396365Z", + "iopub.status.idle": "2024-06-25T13:14:14.564488Z", + "shell.execute_reply": "2024-06-25T13:14:14.563808Z", + "shell.execute_reply.started": "2024-06-25T13:14:13.396591Z" + }, "papermill": {}, "tags": [] }, "outputs": [], "source": [ - "USER_URL = \"https://github.com/FlorentLvr\"\n", - "\n", - "GITHUB_TOKEN = \"ghp_Stz3qlkR3b00nKUW8rxJoxxxxxxxxxxxx\"" + "github_username = \"FlorentLvr\"\n", + "GITHUB_TOKEN = naas.secret.get(\"GITHUB_TOKEN\")\n", + "output_csv = f\"{github_username}_profile.csv\"" ] }, { @@ -154,15 +181,119 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "08bf3c6d-fe4d-41c9-980c-96259ef20598", "metadata": { + "execution": { + "iopub.execute_input": "2024-06-25T13:14:15.765905Z", + "iopub.status.busy": "2024-06-25T13:14:15.765671Z", + "iopub.status.idle": "2024-06-25T13:14:16.350195Z", + "shell.execute_reply": "2024-06-25T13:14:16.349649Z", + "shell.execute_reply.started": "2024-06-25T13:14:15.765880Z" + }, "papermill": {}, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Email not found in GitHub profile. Checking commit history...\n", + "Found emails: {'48032461+FlorentLvr@users.noreply.github.com', 'florent@naas.ai', 'florent.frvservices@gmail.com'}\n" + ] + }, + { + "data": { + "text/plain": [ + "{'login': 'FlorentLvr',\n", + " 'id': 48032461,\n", + " 'node_id': 'MDQ6VXNlcjQ4MDMyNDYx',\n", + " 'avatar_url': 'https://avatars.githubusercontent.com/u/48032461?v=4',\n", + " 'gravatar_id': '',\n", + " 'url': 'https://api.github.com/users/FlorentLvr',\n", + " 'html_url': 'https://github.com/FlorentLvr',\n", + " 'followers_url': 'https://api.github.com/users/FlorentLvr/followers',\n", + " 'following_url': 'https://api.github.com/users/FlorentLvr/following{/other_user}',\n", + " 'gists_url': 'https://api.github.com/users/FlorentLvr/gists{/gist_id}',\n", + " 'starred_url': 'https://api.github.com/users/FlorentLvr/starred{/owner}{/repo}',\n", + " 'subscriptions_url': 'https://api.github.com/users/FlorentLvr/subscriptions',\n", + " 'organizations_url': 'https://api.github.com/users/FlorentLvr/orgs',\n", + " 'repos_url': 'https://api.github.com/users/FlorentLvr/repos',\n", + " 'events_url': 'https://api.github.com/users/FlorentLvr/events{/privacy}',\n", + " 'received_events_url': 'https://api.github.com/users/FlorentLvr/received_events',\n", + " 'type': 'User',\n", + " 'site_admin': False,\n", + " 'name': 'Florent',\n", + " 'company': '@jupyter-naas',\n", + " 'blog': '',\n", + " 'location': None,\n", + " 'email': None,\n", + " 'hireable': None,\n", + " 'bio': None,\n", + " 'twitter_username': None,\n", + " 'public_repos': 1,\n", + " 'public_gists': 0,\n", + " 'followers': 27,\n", + " 'following': 5,\n", + " 'created_at': '2019-02-26T20:34:19Z',\n", + " 'updated_at': '2024-01-18T14:50:35Z'}" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "df_user = github.connect(GITHUB_TOKEN).users.get_profile(USER_URL)" + "def get_github_user_info(username, token=None):\n", + " url = f\"https://api.github.com/users/{username}\"\n", + " headers = {\"Authorization\": f\"token {token}\"} if token else {}\n", + " response = requests.get(url, headers=headers)\n", + " if response.status_code == 200:\n", + " return response.json()\n", + " else:\n", + " return None\n", + "\n", + "def get_commit_emails(username, token=None):\n", + " url = f\"https://api.github.com/users/{username}/events/public\"\n", + " headers = {\"Authorization\": f\"token {token}\"} if token else {}\n", + " response = requests.get(url, headers=headers)\n", + " emails = set()\n", + " \n", + " if response.status_code == 200:\n", + " events = response.json()\n", + " for event in events:\n", + " if event['type'] == 'PushEvent':\n", + " for commit in event['payload']['commits']:\n", + " emails.add(commit['author']['email'])\n", + " return emails\n", + " else:\n", + " return f\"Error: {response.status_code} - {response.json().get('message', 'Unknown error')}\"\n", + "\n", + "\n", + "# Fetch basic GitHub user information\n", + "github_user_info = get_github_user_info(github_username, GITHUB_TOKEN)\n", + "\n", + "if github_user_info:\n", + " email = github_user_info.get('email')\n", + " if email:\n", + " print(f\"Email: {email}\")\n", + " else:\n", + " print(\"Email not found in GitHub profile. Checking commit history...\")\n", + " # If no email is found in the profile, check commit history\n", + " emails = get_commit_emails(github_username, GITHUB_TOKEN)\n", + " if isinstance(emails, set):\n", + " if emails:\n", + " print(f\"Found emails: {emails}\")\n", + " else:\n", + " print(\"No emails found in commit history.\")\n", + " else:\n", + " print(emails)\n", + "else:\n", + " print(\"GitHub user not found.\")\n", + " \n", + "github_user_info" ] }, { @@ -189,16 +320,165 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "7f433f48-09db-4e0b-b55d-ce07909899a7", "metadata": { + "execution": { + "iopub.execute_input": "2024-06-25T13:14:17.764555Z", + "iopub.status.busy": "2024-06-25T13:14:17.764226Z", + "iopub.status.idle": "2024-06-25T13:14:17.791103Z", + "shell.execute_reply": "2024-06-25T13:14:17.790384Z", + "shell.execute_reply.started": "2024-06-25T13:14:17.764518Z" + }, "papermill": {}, "tags": [] }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
LOGINIDNODE_IDTYPENAMECOMPANYLOCATIONEMAILS_IF_FOUNDHIREABLEBIOTWITTER_USERNAMEPUBLIC_REPOSPUBLIC_GISTSFOLLOWERSFOLLOWINGCREATED_ATUPDATED_AT
0FlorentLvr48032461MDQ6VXNlcjQ4MDMyNDYxUserFlorent@jupyter-naasNone{48032461+FlorentLvr@users.noreply.github.com,...NoneNoneNone102752019-02-26T20:34:19Z2024-01-18T14:50:35Z
\n", + "
" + ], + "text/plain": [ + " LOGIN ID NODE_ID TYPE NAME COMPANY \\\n", + "0 FlorentLvr 48032461 MDQ6VXNlcjQ4MDMyNDYx User Florent @jupyter-naas \n", + "\n", + " LOCATION EMAILS_IF_FOUND HIREABLE BIO \\\n", + "0 None {48032461+FlorentLvr@users.noreply.github.com,... None None \n", + "\n", + " TWITTER_USERNAME PUBLIC_REPOS PUBLIC_GISTS FOLLOWERS FOLLOWING \\\n", + "0 None 1 0 27 5 \n", + "\n", + " CREATED_AT UPDATED_AT \n", + "0 2019-02-26T20:34:19Z 2024-01-18T14:50:35Z " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = []\n", + "\n", + "data.append({\n", + " \"LOGIN\": github_user_info[\"login\"],\n", + " \"ID\": github_user_info[\"id\"],\n", + " \"NODE_ID\": github_user_info[\"node_id\"],\n", + " \"TYPE\": github_user_info[\"type\"],\n", + " \"NAME\": github_user_info[\"name\"],\n", + " \"COMPANY\": github_user_info[\"company\"],\n", + " \"LOCATION\": github_user_info[\"location\"],\n", + " \"EMAILS_IF_FOUND\": emails,\n", + " \"HIREABLE\": github_user_info[\"hireable\"],\n", + " \"BIO\": github_user_info[\"bio\"],\n", + " \"TWITTER_USERNAME\": github_user_info[\"twitter_username\"],\n", + " \"PUBLIC_REPOS\": github_user_info[\"public_repos\"],\n", + " \"PUBLIC_GISTS\": github_user_info[\"public_gists\"],\n", + " \"FOLLOWERS\": github_user_info[\"followers\"],\n", + " \"FOLLOWING\": github_user_info[\"following\"],\n", + " \"CREATED_AT\": github_user_info[\"created_at\"],\n", + " \"UPDATED_AT\": github_user_info[\"updated_at\"],\n", + "})\n", + "\n", + "df = pd.DataFrame(data)\n", + "df" + ] + }, + { + "cell_type": "markdown", + "id": "5f87c6c5-6af6-4b75-a2b8-0257c198dcab", + "metadata": {}, + "source": [ + "### Save dataframe to csv" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "620e50ee-ec7f-47ca-a49f-3fd59ae60058", + "metadata": { + "execution": { + "iopub.execute_input": "2024-06-25T13:14:33.151402Z", + "iopub.status.busy": "2024-06-25T13:14:33.151105Z", + "iopub.status.idle": "2024-06-25T13:14:33.175194Z", + "shell.execute_reply": "2024-06-25T13:14:33.174505Z", + "shell.execute_reply.started": "2024-06-25T13:14:33.151371Z" + }, + "tags": [] + }, "outputs": [], "source": [ - "df_user" + "df.to_csv(output_csv, index=False)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b59bc5b-349b-4cba-996c-67ca4ef4b086", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -232,4 +512,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} From ef5aaa1983effe7501d7f4025222795fba616e01 Mon Sep 17 00:00:00 2001 From: Varsha Kumar Date: Fri, 28 Jun 2024 01:31:51 +0200 Subject: [PATCH 2/3] feat: Hunter.io findings --- Hunter.io/Hunter.io_Find_email.ipynb | 331 ++++++++++++++ Hunter.io/Hunter.io_Search_domain.ipynb | 556 ++++++++++++++++++++++++ Hunter.io/Hunter.io_Verify_email.ipynb | 327 ++++++++++++++ 3 files changed, 1214 insertions(+) create mode 100644 Hunter.io/Hunter.io_Find_email.ipynb create mode 100644 Hunter.io/Hunter.io_Search_domain.ipynb create mode 100644 Hunter.io/Hunter.io_Verify_email.ipynb diff --git a/Hunter.io/Hunter.io_Find_email.ipynb b/Hunter.io/Hunter.io_Find_email.ipynb new file mode 100644 index 0000000000..bff2ee3fcf --- /dev/null +++ b/Hunter.io/Hunter.io_Find_email.ipynb @@ -0,0 +1,331 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "84cadd0c", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "\"GitHub.png\"" + ] + }, + { + "cell_type": "markdown", + "id": "0d43ed38", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "# Hunter.io - Find email\n", + "Give Feedback | Bug report" + ] + }, + { + "cell_type": "markdown", + "id": "d9313642", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Tags:** #github #email #find #automation #snippet" + ] + }, + { + "cell_type": "markdown", + "id": "8faf487c", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Author:** [Varsha Kumar](https://www.linkedin.com/in/varsha-kumar-590466305/)" + ] + }, + { + "cell_type": "markdown", + "id": "3dba1c73-548d-4008-82ad-fdb2cb376771", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Last update:** 2024-06-26 (Created: 2024-06-25)" + ] + }, + { + "cell_type": "markdown", + "id": "naas-description", + "metadata": { + "papermill": {}, + "tags": [ + "description" + ] + }, + "source": [ + "**Description:** This notebook allows users to find an email through hunter.io." + ] + }, + { + "cell_type": "markdown", + "id": "7412988b", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Input" + ] + }, + { + "cell_type": "markdown", + "id": "b483a140", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "353ef79c", + "metadata": { + "execution": { + "iopub.execute_input": "2024-06-26T06:51:07.687424Z", + "iopub.status.busy": "2024-06-26T06:51:07.686982Z", + "iopub.status.idle": "2024-06-26T06:51:07.820922Z", + "shell.execute_reply": "2024-06-26T06:51:07.820320Z", + "shell.execute_reply.started": "2024-06-26T06:51:07.687346Z" + }, + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "import requests\n", + "import pickle" + ] + }, + { + "cell_type": "markdown", + "id": "68b48858", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Setup variables" + ] + }, + { + "cell_type": "markdown", + "id": "b69f38ae-a69c-4360-8cf9-099cda04f098", + "metadata": {}, + "source": [ + "- `domain_name`: company name\n", + "- `api_key`: token to get data through hunter.io" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "01647a55", + "metadata": { + "execution": { + "iopub.execute_input": "2024-06-26T06:51:07.823964Z", + "iopub.status.busy": "2024-06-26T06:51:07.823776Z", + "iopub.status.idle": "2024-06-26T06:51:07.845207Z", + "shell.execute_reply": "2024-06-26T06:51:07.844693Z", + "shell.execute_reply.started": "2024-06-26T06:51:07.823942Z" + }, + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "domain_name = \"naas.ai\"\n", + "api_key = \"899fa50c3a6c56e8b19e37608140f327bf9xxxxx\"" + ] + }, + { + "cell_type": "markdown", + "id": "93347abb", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Model\n" + ] + }, + { + "cell_type": "markdown", + "id": "ebfad846-2a68-4855-909c-1b2a9a7bcfa5", + "metadata": {}, + "source": [ + "### Save to pickle" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "187ace2f-4049-4e55-b856-a955add1bfd4", + "metadata": { + "execution": { + "iopub.execute_input": "2024-06-26T06:51:07.847589Z", + "iopub.status.busy": "2024-06-26T06:51:07.847422Z", + "iopub.status.idle": "2024-06-26T06:51:07.941121Z", + "shell.execute_reply": "2024-06-26T06:51:07.940546Z", + "shell.execute_reply.started": "2024-06-26T06:51:07.847571Z" + } + }, + "outputs": [], + "source": [ + "def save_to_pickle(data, filename):\n", + " with open(filename, 'wb') as file:\n", + " pickle.dump(data, file)\n", + " print(f\"Data saved to {filename}\")" + ] + }, + { + "cell_type": "markdown", + "id": "7537ee02-afad-4d87-8883-afae67365f46", + "metadata": {}, + "source": [ + "### Find emails" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "bb42b49d-b53e-4255-8a59-c824cd49aeac", + "metadata": { + "execution": { + "iopub.execute_input": "2024-06-26T06:52:03.944977Z", + "iopub.status.busy": "2024-06-26T06:52:03.944753Z", + "iopub.status.idle": "2024-06-26T06:52:03.951063Z", + "shell.execute_reply": "2024-06-26T06:52:03.950533Z", + "shell.execute_reply.started": "2024-06-26T06:52:03.944954Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def domain_search(domain, api_key):\n", + " url = f\"https://api.hunter.io/v2/domain-search?domain={domain}&api_key={api_key}\"\n", + " response = requests.get(url)\n", + " \n", + " if response.status_code == 200:\n", + " data = response.json()\n", + " emails = []\n", + "\n", + " # Extract emails from the response\n", + " if 'data' in data:\n", + " for email in data['data']['emails']:\n", + " emails.append(email['value'])\n", + " \n", + " # Save emails to pickle file\n", + " save_to_pickle(emails, f\"{domain}_emails.pickle\")\n", + " return emails\n", + " else:\n", + " return {\"error\": \"No data found\"}\n", + " else:\n", + " return {\"error\": f\"Error {response.status_code}: {response.json().get('errors', 'Unknown error')}\"}" + ] + }, + { + "cell_type": "markdown", + "id": "b819d06a", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Output\n" + ] + }, + { + "cell_type": "markdown", + "id": "8b6ecf57-fec4-4e72-a7b8-4985bca76c05", + "metadata": {}, + "source": [ + "### Display result" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "5182d09a-e6d7-4d9d-9f38-eb3a9ce87cf1", + "metadata": { + "execution": { + "iopub.execute_input": "2024-06-26T06:52:05.312000Z", + "iopub.status.busy": "2024-06-26T06:52:05.311776Z", + "iopub.status.idle": "2024-06-26T06:52:05.442030Z", + "shell.execute_reply": "2024-06-26T06:52:05.441430Z", + "shell.execute_reply.started": "2024-06-26T06:52:05.311977Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Data saved to naas.ai_emails.pickle\n" + ] + }, + { + "data": { + "text/plain": [ + "['jeremy@naas.ai', 'abi@naas.ai', 'support@naas.ai']" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "domain_search_result = domain_search(domain_name, api_key)\n", + "domain_search_result" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + }, + "naas": { + "notebook_id": "b8a92a0e4b6e40db304564f999566443fb35e93df716ab4be5021aabba8230ee", + "notebook_path": "GitHub/GitHub_Add_new_issues_as_page_in_Notion_database.ipynb" + }, + "papermill": { + "default_parameters": {}, + "environment_variables": {}, + "parameters": {}, + "version": "2.3.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Hunter.io/Hunter.io_Search_domain.ipynb b/Hunter.io/Hunter.io_Search_domain.ipynb new file mode 100644 index 0000000000..82b6252f8f --- /dev/null +++ b/Hunter.io/Hunter.io_Search_domain.ipynb @@ -0,0 +1,556 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "84cadd0c", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "\"GitHub.png\"" + ] + }, + { + "cell_type": "markdown", + "id": "0d43ed38", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "# Hunter.io - Search domain\n", + "Give Feedback | Bug report" + ] + }, + { + "cell_type": "markdown", + "id": "d9313642", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Tags:** #github #organization #automation #snippet" + ] + }, + { + "cell_type": "markdown", + "id": "8faf487c", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Author:** [Varsha Kumar](https://www.linkedin.com/in/varsha-kumar-590466305/)" + ] + }, + { + "cell_type": "markdown", + "id": "3dba1c73-548d-4008-82ad-fdb2cb376771", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Last update:** 2024-06-26 (Created: 2024-06-25)" + ] + }, + { + "cell_type": "markdown", + "id": "naas-description", + "metadata": { + "papermill": {}, + "tags": [ + "description" + ] + }, + "source": [ + "**Description:** This notebook allows users to retrieve organization data from a repository through hunter.io." + ] + }, + { + "cell_type": "markdown", + "id": "7412988b", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Input" + ] + }, + { + "cell_type": "markdown", + "id": "b483a140", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "353ef79c", + "metadata": { + "execution": { + "iopub.execute_input": "2024-06-27T06:37:09.928745Z", + "iopub.status.busy": "2024-06-27T06:37:09.928368Z", + "iopub.status.idle": "2024-06-27T06:37:10.121417Z", + "shell.execute_reply": "2024-06-27T06:37:10.120425Z", + "shell.execute_reply.started": "2024-06-27T06:37:09.928673Z" + }, + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "import requests\n", + "import pickle" + ] + }, + { + "cell_type": "markdown", + "id": "f2781aef-1c8d-4b6b-89bf-bb1a7513619b", + "metadata": {}, + "source": [ + "### How to get api key" + ] + }, + { + "cell_type": "markdown", + "id": "1492a5db-0c16-4f86-bf04-e0fa76e87f16", + "metadata": {}, + "source": [ + "1. Create and account with hunter.io (free plan).\n", + "2. Go to account and settings or look up \"https://hunter.io/api-keys\".\n", + "3. Under the tab \"API\" an exisiting key should be there. If not, click \"New key\" and create one.\n", + "4. Copy your key and use it to retrieve anything" + ] + }, + { + "cell_type": "markdown", + "id": "68b48858", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Setup variables" + ] + }, + { + "cell_type": "markdown", + "id": "b69f38ae-a69c-4360-8cf9-099cda04f098", + "metadata": {}, + "source": [ + "- `domain_name`: name of the company\n", + "- `api_key`: token to get data through hunter.io\n", + "- `file_path`: name of pickle file" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "01647a55", + "metadata": { + "execution": { + "iopub.execute_input": "2024-06-27T06:37:10.123635Z", + "iopub.status.busy": "2024-06-27T06:37:10.123259Z", + "iopub.status.idle": "2024-06-27T06:37:10.126869Z", + "shell.execute_reply": "2024-06-27T06:37:10.126313Z", + "shell.execute_reply.started": "2024-06-27T06:37:10.123603Z" + }, + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "domain_name = \"naas.ai\"\n", + "api_key = \"899fa50c3a6c56e8b19e37608140f327bf9xxxxx\"\n", + "file_path = f\"{domain_name}_domain.pickle\"" + ] + }, + { + "cell_type": "markdown", + "id": "93347abb", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Model\n" + ] + }, + { + "cell_type": "markdown", + "id": "9baade14-7bdf-40bb-98f5-b818420dece3", + "metadata": {}, + "source": [ + "### Save to pickle" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c6043f13-3b5a-4427-9637-99ea83e71f97", + "metadata": { + "execution": { + "iopub.execute_input": "2024-06-27T06:37:10.128230Z", + "iopub.status.busy": "2024-06-27T06:37:10.127861Z", + "iopub.status.idle": "2024-06-27T06:37:10.221005Z", + "shell.execute_reply": "2024-06-27T06:37:10.220407Z", + "shell.execute_reply.started": "2024-06-27T06:37:10.128199Z" + } + }, + "outputs": [], + "source": [ + "def save_to_pickle(data, filename):\n", + " with open(filename, 'wb') as file:\n", + " pickle.dump(data, file)" + ] + }, + { + "cell_type": "markdown", + "id": "a92d9ac3-b6a8-44c3-a715-614be57bed88", + "metadata": {}, + "source": [ + "### Check if data already exists" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "fff67afd-c575-4b6f-9206-eeda3c4c5bd4", + "metadata": { + "execution": { + "iopub.execute_input": "2024-06-27T06:37:10.222225Z", + "iopub.status.busy": "2024-06-27T06:37:10.221978Z", + "iopub.status.idle": "2024-06-27T06:37:10.322474Z", + "shell.execute_reply": "2024-06-27T06:37:10.321744Z", + "shell.execute_reply.started": "2024-06-27T06:37:10.222196Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File not found\n" + ] + } + ], + "source": [ + "data = None\n", + "try:\n", + " with open(file_path, 'rb') as file:\n", + " data = pickle.load(file)\n", + " print(\"Data loaded successfully\")\n", + "except FileNotFoundError:\n", + " print(\"File not found\")\n", + "except pickle.UnpicklingError:\n", + " print(\"Error unpickling the file\")\n", + "except Exception as e:\n", + " print(f\"An error occurred: {e}\")" + ] + }, + { + "cell_type": "markdown", + "id": "7537ee02-afad-4d87-8883-afae67365f46", + "metadata": {}, + "source": [ + "### Get company details" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "bb42b49d-b53e-4255-8a59-c824cd49aeac", + "metadata": { + "execution": { + "iopub.execute_input": "2024-06-27T06:37:10.323884Z", + "iopub.status.busy": "2024-06-27T06:37:10.323451Z", + "iopub.status.idle": "2024-06-27T06:37:10.566877Z", + "shell.execute_reply": "2024-06-27T06:37:10.566140Z", + "shell.execute_reply.started": "2024-06-27T06:37:10.323852Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Data has been saved to naas.ai_domain.pickle\n" + ] + } + ], + "source": [ + "def get_company_details(domain, api_key):\n", + " # Hunter.io API endpoint for company information\n", + " url = f\"https://api.hunter.io/v2/domain-search?domain={domain}&api_key={api_key}\"\n", + " \n", + " # Send GET request to the Hunter.io API\n", + " response = requests.get(url)\n", + " \n", + " # Check if the request was successful\n", + " if response.status_code == 200:\n", + " data = response.json()\n", + " if 'data' in data:\n", + " return data['data']\n", + " else:\n", + " return \"Company details not found\"\n", + " else:\n", + " return f\"Error: {response.status_code} - {response.json().get('errors', 'Unknown error')}\"\n", + "\n", + "if data is None:\n", + " data = get_company_details(domain_name, api_key)\n", + " # Writing data to a pickle file\n", + " try:\n", + " save_to_pickle(data, file_path)\n", + " print(f\"Data has been saved to {file_path}\")\n", + " except Exception as e:\n", + " print(f\"An error occurred while saving the data: {e}\")" + ] + }, + { + "cell_type": "markdown", + "id": "b819d06a", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Output\n" + ] + }, + { + "cell_type": "markdown", + "id": "8b6ecf57-fec4-4e72-a7b8-4985bca76c05", + "metadata": {}, + "source": [ + "### Display result" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "5182d09a-e6d7-4d9d-9f38-eb3a9ce87cf1", + "metadata": { + "execution": { + "iopub.execute_input": "2024-06-27T06:37:10.569366Z", + "iopub.status.busy": "2024-06-27T06:37:10.569188Z", + "iopub.status.idle": "2024-06-27T06:37:10.583821Z", + "shell.execute_reply": "2024-06-27T06:37:10.582919Z", + "shell.execute_reply.started": "2024-06-27T06:37:10.569345Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'domain': 'naas.ai',\n", + " 'disposable': False,\n", + " 'webmail': False,\n", + " 'accept_all': False,\n", + " 'pattern': '{first}',\n", + " 'organization': 'Naas.ai',\n", + " 'description': 'Naas.ai is a data science platform that allows users to schedule, run, and expose data projects using Jupyter notebooks.',\n", + " 'industry': 'Software Development',\n", + " 'twitter': 'https://twitter.com/jupyternaas',\n", + " 'facebook': None,\n", + " 'linkedin': 'https://linkedin.com/company/naas-ai',\n", + " 'instagram': None,\n", + " 'youtube': None,\n", + " 'technologies': ['express',\n", + " 'google-analytics',\n", + " 'google-tag-manager',\n", + " 'node-js',\n", + " 'stripe',\n", + " 'youtube'],\n", + " 'country': 'FR',\n", + " 'state': None,\n", + " 'city': 'Paris',\n", + " 'postal_code': None,\n", + " 'street': None,\n", + " 'headcount': '1-10',\n", + " 'company_type': 'privately held',\n", + " 'emails': [{'value': 'jeremy@naas.ai',\n", + " 'type': 'personal',\n", + " 'confidence': 99,\n", + " 'sources': [{'domain': 'docs.naas.ai',\n", + " 'uri': 'http://docs.naas.ai/policies/privacy',\n", + " 'extracted_on': '2024-05-16',\n", + " 'last_seen_on': '2024-05-16',\n", + " 'still_on_page': True},\n", + " {'domain': 'docs.naas.ai',\n", + " 'uri': 'http://docs.naas.ai/resources/community/code-of-conduct',\n", + " 'extracted_on': '2024-05-16',\n", + " 'last_seen_on': '2024-05-16',\n", + " 'still_on_page': True},\n", + " {'domain': 'site.naas.ai',\n", + " 'uri': 'http://site.naas.ai/docs/policies/privacy',\n", + " 'extracted_on': '2024-02-26',\n", + " 'last_seen_on': '2024-02-26',\n", + " 'still_on_page': False},\n", + " {'domain': 'site.naas.ai',\n", + " 'uri': 'http://site.naas.ai/docs/solutions/introduction',\n", + " 'extracted_on': '2024-02-26',\n", + " 'last_seen_on': '2024-02-26',\n", + " 'still_on_page': False},\n", + " {'domain': 'docs.naas.ai',\n", + " 'uri': 'http://docs.naas.ai/templates/formant/formant_query_device_network',\n", + " 'extracted_on': '2023-09-06',\n", + " 'last_seen_on': '2023-12-06',\n", + " 'still_on_page': False},\n", + " {'domain': 'docs.naas.ai',\n", + " 'uri': 'http://docs.naas.ai/templates/notion/notion_update_page',\n", + " 'extracted_on': '2023-06-06',\n", + " 'last_seen_on': '2023-12-06',\n", + " 'still_on_page': False},\n", + " {'domain': 'docs.naas.ai',\n", + " 'uri': 'http://docs.naas.ai/templates/notion/notion_update_pages_from_database',\n", + " 'extracted_on': '2023-06-06',\n", + " 'last_seen_on': '2023-12-06',\n", + " 'still_on_page': False}],\n", + " 'first_name': None,\n", + " 'last_name': None,\n", + " 'position': None,\n", + " 'seniority': None,\n", + " 'department': None,\n", + " 'linkedin': None,\n", + " 'twitter': None,\n", + " 'phone_number': None,\n", + " 'verification': {'date': None, 'status': None}},\n", + " {'value': 'abi@naas.ai',\n", + " 'type': 'personal',\n", + " 'confidence': 86,\n", + " 'sources': [{'domain': 'docs.naas.ai',\n", + " 'uri': 'http://docs.naas.ai/resources/developers/overview',\n", + " 'extracted_on': '2024-05-16',\n", + " 'last_seen_on': '2024-05-16',\n", + " 'still_on_page': True},\n", + " {'domain': 'site.naas.ai',\n", + " 'uri': 'http://site.naas.ai/docs/developers/overview',\n", + " 'extracted_on': '2024-02-26',\n", + " 'last_seen_on': '2024-02-26',\n", + " 'still_on_page': False}],\n", + " 'first_name': None,\n", + " 'last_name': None,\n", + " 'position': None,\n", + " 'seniority': None,\n", + " 'department': None,\n", + " 'linkedin': None,\n", + " 'twitter': None,\n", + " 'phone_number': None,\n", + " 'verification': {'date': None, 'status': None}},\n", + " {'value': 'support@naas.ai',\n", + " 'type': 'generic',\n", + " 'confidence': 84,\n", + " 'sources': [{'domain': 'docs.naas.ai',\n", + " 'uri': 'http://docs.naas.ai/mission/unlocking-monetization-opportunities',\n", + " 'extracted_on': '2024-05-16',\n", + " 'last_seen_on': '2024-05-16',\n", + " 'still_on_page': True},\n", + " {'domain': 'docs.naas.ai',\n", + " 'uri': 'http://docs.naas.ai/resources/developers/overview',\n", + " 'extracted_on': '2024-05-16',\n", + " 'last_seen_on': '2024-05-16',\n", + " 'still_on_page': True},\n", + " {'domain': 'docs.naas.ai',\n", + " 'uri': 'http://docs.naas.ai/safety/product-warnings',\n", + " 'extracted_on': '2024-05-16',\n", + " 'last_seen_on': '2024-05-16',\n", + " 'still_on_page': True},\n", + " {'domain': 'docs.naas.ai',\n", + " 'uri': 'http://docs.naas.ai/usage/aia-personal-ai-assistant',\n", + " 'extracted_on': '2024-05-16',\n", + " 'last_seen_on': '2024-05-16',\n", + " 'still_on_page': True},\n", + " {'domain': 'github.com',\n", + " 'uri': 'http://github.com/jupyter-naas/awesome-notebooks',\n", + " 'extracted_on': '2023-10-07',\n", + " 'last_seen_on': '2023-10-07',\n", + " 'still_on_page': True},\n", + " {'domain': 'site.naas.ai',\n", + " 'uri': 'http://site.naas.ai/docs/developers/overview',\n", + " 'extracted_on': '2024-02-26',\n", + " 'last_seen_on': '2024-02-26',\n", + " 'still_on_page': False},\n", + " {'domain': 'site.naas.ai',\n", + " 'uri': 'http://site.naas.ai/docs/platform/scalable-computing',\n", + " 'extracted_on': '2024-02-26',\n", + " 'last_seen_on': '2024-02-26',\n", + " 'still_on_page': False},\n", + " {'domain': 'site.naas.ai',\n", + " 'uri': 'http://site.naas.ai/docs/platform/security-suite',\n", + " 'extracted_on': '2024-02-26',\n", + " 'last_seen_on': '2024-02-26',\n", + " 'still_on_page': False}],\n", + " 'first_name': None,\n", + " 'last_name': None,\n", + " 'position': None,\n", + " 'seniority': None,\n", + " 'department': 'support',\n", + " 'linkedin': None,\n", + " 'twitter': None,\n", + " 'phone_number': None,\n", + " 'verification': {'date': None, 'status': None}}],\n", + " 'linked_domains': []}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c0818cf8-436a-45e4-b35b-9677333d047a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + }, + "naas": { + "notebook_id": "b8a92a0e4b6e40db304564f999566443fb35e93df716ab4be5021aabba8230ee", + "notebook_path": "GitHub/GitHub_Add_new_issues_as_page_in_Notion_database.ipynb" + }, + "papermill": { + "default_parameters": {}, + "environment_variables": {}, + "parameters": {}, + "version": "2.3.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Hunter.io/Hunter.io_Verify_email.ipynb b/Hunter.io/Hunter.io_Verify_email.ipynb new file mode 100644 index 0000000000..79df919c44 --- /dev/null +++ b/Hunter.io/Hunter.io_Verify_email.ipynb @@ -0,0 +1,327 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "84cadd0c", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "\"GitHub.png\"" + ] + }, + { + "cell_type": "markdown", + "id": "0d43ed38", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "# Hunter.io - Verify email\n", + "Give Feedback | Bug report" + ] + }, + { + "cell_type": "markdown", + "id": "d9313642", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Tags:** #github #email #verification #automation #snippet" + ] + }, + { + "cell_type": "markdown", + "id": "8faf487c", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Author:** [Varsha Kumar](https://www.linkedin.com/in/varsha-kumar-590466305/)" + ] + }, + { + "cell_type": "markdown", + "id": "3dba1c73-548d-4008-82ad-fdb2cb376771", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Last update:** 2024-06-26 (Created: 2024-06-25)" + ] + }, + { + "cell_type": "markdown", + "id": "naas-description", + "metadata": { + "papermill": {}, + "tags": [ + "description" + ] + }, + "source": [ + "**Description:** This notebook allows users to verify an email through hunter.io." + ] + }, + { + "cell_type": "markdown", + "id": "7412988b", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Input" + ] + }, + { + "cell_type": "markdown", + "id": "b483a140", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "353ef79c", + "metadata": { + "execution": { + "iopub.execute_input": "2024-06-26T06:26:45.089534Z", + "iopub.status.busy": "2024-06-26T06:26:45.089075Z", + "iopub.status.idle": "2024-06-26T06:26:47.672815Z", + "shell.execute_reply": "2024-06-26T06:26:47.672198Z", + "shell.execute_reply.started": "2024-06-26T06:26:45.089462Z" + }, + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "import requests\n", + "import pickle" + ] + }, + { + "cell_type": "markdown", + "id": "68b48858", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Setup variables" + ] + }, + { + "cell_type": "markdown", + "id": "b69f38ae-a69c-4360-8cf9-099cda04f098", + "metadata": {}, + "source": [ + "- `email`: email for verification\n", + "- `api_key`: token to get data through hunter.io\n", + "- `file_path`: name of pickle file" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "01647a55", + "metadata": { + "execution": { + "iopub.execute_input": "2024-06-26T06:26:47.674042Z", + "iopub.status.busy": "2024-06-26T06:26:47.673804Z", + "iopub.status.idle": "2024-06-26T06:26:47.678728Z", + "shell.execute_reply": "2024-06-26T06:26:47.678182Z", + "shell.execute_reply.started": "2024-06-26T06:26:47.674015Z" + }, + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "email = \"florent@naas.ai\"\n", + "api_key = \"899fa50c3a6c56e8b19e37608140f327bf9xxxxx\"\n", + "file_path = \"verification_result.pickle\"" + ] + }, + { + "cell_type": "markdown", + "id": "93347abb", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Model\n" + ] + }, + { + "cell_type": "markdown", + "id": "7537ee02-afad-4d87-8883-afae67365f46", + "metadata": {}, + "source": [ + "### Verify email" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "bb42b49d-b53e-4255-8a59-c824cd49aeac", + "metadata": { + "execution": { + "iopub.execute_input": "2024-06-26T06:26:47.680893Z", + "iopub.status.busy": "2024-06-26T06:26:47.680699Z", + "iopub.status.idle": "2024-06-26T06:26:47.774581Z", + "shell.execute_reply": "2024-06-26T06:26:47.773759Z", + "shell.execute_reply.started": "2024-06-26T06:26:47.680873Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def verify_email_with_hunter(api_key, email):\n", + " url = f\"https://api.hunter.io/v2/email-verifier\"\n", + " params = {\n", + " 'email': email,\n", + " 'api_key': api_key\n", + " }\n", + " response = requests.get(url, params=params)\n", + " \n", + " if response.status_code == 200:\n", + " return response.json()\n", + " else:\n", + " return f\"Error: {response.status_code} - {response.json().get('errors', 'Unknown error')}\"\n", + "\n", + "# Verify the email\n", + "verification_result = verify_email_with_hunter(email, api_key)" + ] + }, + { + "cell_type": "markdown", + "id": "b96cc59e-714e-426f-8630-df99b5454326", + "metadata": {}, + "source": [ + "### Save to pickle" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5878561a-ded9-4caf-a6f9-53a1cb76b352", + "metadata": { + "execution": { + "iopub.execute_input": "2024-06-26T06:26:47.776305Z", + "iopub.status.busy": "2024-06-26T06:26:47.775802Z", + "iopub.status.idle": "2024-06-26T06:26:47.875107Z", + "shell.execute_reply": "2024-06-26T06:26:47.874530Z", + "shell.execute_reply.started": "2024-06-26T06:26:47.776270Z" + } + }, + "outputs": [], + "source": [ + "def save_to_pickle(data, filename):\n", + " with open(filename, 'wb') as file:\n", + " pickle.dump(data, file)" + ] + }, + { + "cell_type": "markdown", + "id": "b819d06a", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Output\n" + ] + }, + { + "cell_type": "markdown", + "id": "8b6ecf57-fec4-4e72-a7b8-4985bca76c05", + "metadata": {}, + "source": [ + "### Display result" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "5182d09a-e6d7-4d9d-9f38-eb3a9ce87cf1", + "metadata": { + "execution": { + "iopub.execute_input": "2024-06-26T06:26:47.877696Z", + "iopub.status.busy": "2024-06-26T06:26:47.877520Z", + "iopub.status.idle": "2024-06-26T06:26:48.049499Z", + "shell.execute_reply": "2024-06-26T06:26:48.048853Z", + "shell.execute_reply.started": "2024-06-26T06:26:47.877677Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Verification result has been saved to verification_result.pickle\n" + ] + } + ], + "source": [ + "# Save the result to a pickle file\n", + "save_to_pickle(verification_result, file_path)\n", + "\n", + "print(f\"Verification result has been saved to {file_path}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c0818cf8-436a-45e4-b35b-9677333d047a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + }, + "naas": { + "notebook_id": "b8a92a0e4b6e40db304564f999566443fb35e93df716ab4be5021aabba8230ee", + "notebook_path": "GitHub/GitHub_Add_new_issues_as_page_in_Notion_database.ipynb" + }, + "papermill": { + "default_parameters": {}, + "environment_variables": {}, + "parameters": {}, + "version": "2.3.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 2a775379cb59d007131d7642c015ac9da3beda80 Mon Sep 17 00:00:00 2001 From: Varsha Kumar Date: Tue, 2 Jul 2024 15:18:13 +0200 Subject: [PATCH 3/3] feat: hunter.io tag changes --- Hunter.io/Hunter.io_Find_email.ipynb | 67 +++++++++++-------------- Hunter.io/Hunter.io_Search_domain.ipynb | 2 +- Hunter.io/Hunter.io_Verify_email.ipynb | 2 +- 3 files changed, 32 insertions(+), 39 deletions(-) diff --git a/Hunter.io/Hunter.io_Find_email.ipynb b/Hunter.io/Hunter.io_Find_email.ipynb index bff2ee3fcf..3fe815bc0e 100644 --- a/Hunter.io/Hunter.io_Find_email.ipynb +++ b/Hunter.io/Hunter.io_Find_email.ipynb @@ -31,7 +31,7 @@ "tags": [] }, "source": [ - "**Tags:** #github #email #find #automation #snippet" + "**Tags:** #hunter.io #email #find #automation #snippet" ] }, { @@ -97,11 +97,11 @@ "id": "353ef79c", "metadata": { "execution": { - "iopub.execute_input": "2024-06-26T06:51:07.687424Z", - "iopub.status.busy": "2024-06-26T06:51:07.686982Z", - "iopub.status.idle": "2024-06-26T06:51:07.820922Z", - "shell.execute_reply": "2024-06-26T06:51:07.820320Z", - "shell.execute_reply.started": "2024-06-26T06:51:07.687346Z" + "iopub.execute_input": "2024-07-02T13:12:36.582783Z", + "iopub.status.busy": "2024-07-02T13:12:36.582275Z", + "iopub.status.idle": "2024-07-02T13:12:37.144950Z", + "shell.execute_reply": "2024-07-02T13:12:37.144176Z", + "shell.execute_reply.started": "2024-07-02T13:12:36.582704Z" }, "papermill": {}, "tags": [] @@ -138,11 +138,11 @@ "id": "01647a55", "metadata": { "execution": { - "iopub.execute_input": "2024-06-26T06:51:07.823964Z", - "iopub.status.busy": "2024-06-26T06:51:07.823776Z", - "iopub.status.idle": "2024-06-26T06:51:07.845207Z", - "shell.execute_reply": "2024-06-26T06:51:07.844693Z", - "shell.execute_reply.started": "2024-06-26T06:51:07.823942Z" + "iopub.execute_input": "2024-07-02T13:12:37.146791Z", + "iopub.status.busy": "2024-07-02T13:12:37.146440Z", + "iopub.status.idle": "2024-07-02T13:12:37.149816Z", + "shell.execute_reply": "2024-07-02T13:12:37.149240Z", + "shell.execute_reply.started": "2024-07-02T13:12:37.146759Z" }, "papermill": {}, "tags": [] @@ -178,11 +178,11 @@ "id": "187ace2f-4049-4e55-b856-a955add1bfd4", "metadata": { "execution": { - "iopub.execute_input": "2024-06-26T06:51:07.847589Z", - "iopub.status.busy": "2024-06-26T06:51:07.847422Z", - "iopub.status.idle": "2024-06-26T06:51:07.941121Z", - "shell.execute_reply": "2024-06-26T06:51:07.940546Z", - "shell.execute_reply.started": "2024-06-26T06:51:07.847571Z" + "iopub.execute_input": "2024-07-02T13:12:37.151289Z", + "iopub.status.busy": "2024-07-02T13:12:37.150856Z", + "iopub.status.idle": "2024-07-02T13:12:37.371332Z", + "shell.execute_reply": "2024-07-02T13:12:37.370722Z", + "shell.execute_reply.started": "2024-07-02T13:12:37.151257Z" } }, "outputs": [], @@ -203,15 +203,15 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "id": "bb42b49d-b53e-4255-8a59-c824cd49aeac", "metadata": { "execution": { - "iopub.execute_input": "2024-06-26T06:52:03.944977Z", - "iopub.status.busy": "2024-06-26T06:52:03.944753Z", - "iopub.status.idle": "2024-06-26T06:52:03.951063Z", - "shell.execute_reply": "2024-06-26T06:52:03.950533Z", - "shell.execute_reply.started": "2024-06-26T06:52:03.944954Z" + "iopub.execute_input": "2024-07-02T13:12:37.372357Z", + "iopub.status.busy": "2024-07-02T13:12:37.372183Z", + "iopub.status.idle": "2024-07-02T13:12:37.568827Z", + "shell.execute_reply": "2024-07-02T13:12:37.568244Z", + "shell.execute_reply.started": "2024-07-02T13:12:37.372337Z" }, "tags": [] }, @@ -260,33 +260,26 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "id": "5182d09a-e6d7-4d9d-9f38-eb3a9ce87cf1", "metadata": { "execution": { - "iopub.execute_input": "2024-06-26T06:52:05.312000Z", - "iopub.status.busy": "2024-06-26T06:52:05.311776Z", - "iopub.status.idle": "2024-06-26T06:52:05.442030Z", - "shell.execute_reply": "2024-06-26T06:52:05.441430Z", - "shell.execute_reply.started": "2024-06-26T06:52:05.311977Z" + "iopub.execute_input": "2024-07-02T13:12:37.569955Z", + "iopub.status.busy": "2024-07-02T13:12:37.569644Z", + "iopub.status.idle": "2024-07-02T13:12:37.827483Z", + "shell.execute_reply": "2024-07-02T13:12:37.826861Z", + "shell.execute_reply.started": "2024-07-02T13:12:37.569931Z" }, "tags": [] }, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Data saved to naas.ai_emails.pickle\n" - ] - }, { "data": { "text/plain": [ - "['jeremy@naas.ai', 'abi@naas.ai', 'support@naas.ai']" + "{'error': \"Error 401: [{'id': 'authentication_failed', 'code': 401, 'details': 'No user found for the API key supplied'}]\"}" ] }, - "execution_count": 8, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } diff --git a/Hunter.io/Hunter.io_Search_domain.ipynb b/Hunter.io/Hunter.io_Search_domain.ipynb index 82b6252f8f..c4c7080a88 100644 --- a/Hunter.io/Hunter.io_Search_domain.ipynb +++ b/Hunter.io/Hunter.io_Search_domain.ipynb @@ -31,7 +31,7 @@ "tags": [] }, "source": [ - "**Tags:** #github #organization #automation #snippet" + "**Tags:** #hunter.io #organization #automation #snippet" ] }, { diff --git a/Hunter.io/Hunter.io_Verify_email.ipynb b/Hunter.io/Hunter.io_Verify_email.ipynb index 79df919c44..d02c7192fc 100644 --- a/Hunter.io/Hunter.io_Verify_email.ipynb +++ b/Hunter.io/Hunter.io_Verify_email.ipynb @@ -31,7 +31,7 @@ "tags": [] }, "source": [ - "**Tags:** #github #email #verification #automation #snippet" + "**Tags:** #hunter.io #email #verification #automation #snippet" ] }, {