import os
import json
import time
import re
from dotenv import load_dotenv
import requests
# Load environment variables
load_dotenv()
# API keys from environment variables
ATTIO_API_KEY = os.getenv('ATTIO_API_KEY')
LINKUP_API_KEY = os.getenv('LINKUP_API_KEY')
if not ATTIO_API_KEY or not LINKUP_API_KEY:
print("Error: Missing API keys in .env file")
exit(1)
# File paths and settings
FILE = 'new_companies.json'
MAX_COMPANIES = 50
def load_existing_data():
"""Load file with new companies to enrich"""
try:
with open(FILE, 'r') as f:
return json.load(f)
except (FileNotFoundError, json.JSONDecodeError):
print(f"Error: Could not load {FILE}")
exit(1)
def find_website_url(company_name, company_domain=None):
if not company_name or not company_name.strip():
return ""
url = "https://api.linkup.so/v1/search"
headers = {
"Authorization": f"Bearer {LINKUP_API_KEY}",
"Content-Type": "application/json"
}
domain_info = company_domain if company_domain else ""
payload = {
"q": f"Based on the name {company_name} and the email domain {domain_info}, find the most likely company website URL. Only return a result if you are 90% sure this is the correct website. If {company_name} seems like a generic company name (e.g. personal, perso, n/a), return nothing. If the domain is a generic domain (e.g. gmail.com, yahoo.com, hotmail.com, icloud.com), do not consider it. Only consider professional email domains. Only return the company domain URL.",
"depth": "standard",
"outputType": "sourcedAnswer",
"includeImages": "false"
}
print(f"Sending API request to Linkup for: {company_name}")
try:
response = requests.post(url, headers=headers, json=payload)
print(f"API response status: {response.status_code}")
response.raise_for_status()
result = response.json()
if 'answer' in result:
url_text = result['answer']
url_pattern = re.compile(r'https?://\S+')
url_match = url_pattern.search(url_text)
if url_match:
website_url = url_match.group(0)
return re.sub(r'[.,;:"\')]\s*$', '', website_url)
return ""
except Exception as e:
print(f"Error in API call: {e}")
return ""
def analyze_icp_fit(company_name, website_url):
if not website_url:
return "1"
url = "https://api.linkup.so/v1/search"
headers = {
"Authorization": f"Bearer {LINKUP_API_KEY}",
"Content-Type": "application/json"
}
payload = {
"q": f"Analyze the website {website_url}. Determine if this company could be an Ideal Customer Profile (ICP) for my company https://www.linkup.so/. For context, we are selling a search API. We target AI companies, Tech Companies, and corporates, as well as consulting and financial firms. Our search API allows companies to enrich applications with real-time web knowledge and business intelligence, at scale. Consider factors like industry and whether they're likely to need API services, and if they might be building software products. Return a rating from 1 to 5, 1 being lowest ICP, 5 being highest ICP. Universities and schools should get a 3. Only return the rating, nothing else.",
"depth": "deep",
"outputType": "sourcedAnswer",
"includeImages": "false"
}
print(f"Sending ICP analysis request to Linkup for: {company_name}")
try:
response = requests.post(url, headers=headers, json=payload)
print(f"ICP API response status: {response.status_code}")
response.raise_for_status()
result = response.json()
if 'answer' in result:
return result['answer']
return "1"
except Exception as e:
print(f"Error in ICP API call: {e}")
return "1"
def save_results(results):
print(f"\nSaving results to {FILE}...")
try:
# First try to save to a temporary file
temp_file = f"{FILE}.tmp"
with open(temp_file, 'w') as f:
json.dump(results, f, indent=2)
# If successful, rename the temp file to the actual file
if os.path.exists(FILE):
os.replace(temp_file, FILE)
else:
os.rename(temp_file, FILE)
print(f"Successfully saved {len(results)} results")
# Verify the save
with open(FILE, 'r') as f:
saved_data = json.load(f)
print(f"Verified save: {len(saved_data)} results in file")
except Exception as e:
print(f"Error saving results: {e}")
# Try to clean up temp file if it exists
if os.path.exists(temp_file):
try:
os.remove(temp_file)
except:
pass
def main():
results = load_existing_data()
print(f"\nLoaded {len(results)} existing results")
# Find organizations to process
to_process = {record_id: (results[record_id]["name"], results[record_id].get("domain"))
for record_id in results
if not results[record_id].get("website") and
results[record_id].get("name") and
(not results[record_id].get("icp_analysis") or results[record_id].get("icp_analysis") == "")}
companies_to_process = dict(list(to_process.items())[:MAX_COMPANIES])
print(f"\nFound {len(to_process)} organizations to process")
print(f"Will process {len(companies_to_process)} organizations...")
processed_count = 0
websites_found = 0
for record_id, (org_name, org_domain) in companies_to_process.items():
print(f"\nProcessing: {org_name}")
company_result = results[record_id]
website_url = find_website_url(org_name, org_domain)
if website_url:
company_result["website"] = website_url
websites_found += 1
print(f"Found: {website_url}")
time.sleep(2)
icp_analysis = analyze_icp_fit(org_name, website_url)
company_result["icp_analysis"] = icp_analysis
print(f"Updated {org_name} with website: {website_url} and ICP: {icp_analysis}")
else:
# Set default ICP analysis to "1" when no website is found
company_result["icp_analysis"] = "1"
print(f"Updated {org_name} with default ICP: 1")
results[record_id] = company_result
processed_count += 1
# Save after each company
save_results(results)
print(f"Saved results after processing {org_name}")
if processed_count < len(companies_to_process):
time.sleep(3)
websites_with_analysis = sum(1 for r in results.values() if r.get("website") and r.get("icp_analysis"))
print(f"\nFinal Results:")
print(f"Processed {processed_count} organizations")
print(f"Found {websites_found} websites")
print(f"Total organizations with websites: {sum(1 for r in results.values() if r.get('website'))}")
print(f"Total organizations with analysis: {websites_with_analysis}")
# Final save with verification
save_results(results)
if __name__ == "__main__":
main()