import requests
import re
import json

url = "https://www.instagram.com/p/DVOazPSjJQm/embed/"
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}

r = requests.get(url, headers=headers)
html = r.text
print(f"HTML length: {len(html)}")

# Try JSON data embedded in script tags
m1 = re.findall(r'"edge_media_preview_like":\{"count":(\d+)', html)
print(f"edge_media_preview_like: {m1}")

m2 = re.findall(r'"like_count":(\d+)', html)
print(f"like_count: {m2}")

m3 = re.findall(r'"edge_media_to_comment":\{"count":(\d+)', html)
print(f"comments: {m3}")

m4 = re.findall(r'"edge_media_preview_comment":\{"count":(\d+)', html)
print(f"edge_media_preview_comment: {m4}")

m5 = re.findall(r'"video_view_count":(\d+)', html)
print(f"video_view_count: {m5}")

# Try the oEmbed API
oembed_url = "https://api.instagram.com/oembed/?url=https://www.instagram.com/p/DVOazPSjJQm/"
try:
    r2 = requests.get(oembed_url, headers=headers, timeout=10)
    print(f"\noEmbed status: {r2.status_code}")
    if r2.status_code == 200:
        data = r2.json()
        print(f"oEmbed data: {json.dumps(data, indent=2)[:500]}")
except Exception as e:
    print(f"oEmbed failed: {e}")

# Check for any number followed by "likes" in the HTML
m6 = re.findall(r'(\d[\d,]*)\s*likes?', html, re.IGNORECASE)
print(f"\nAll 'N likes' patterns: {m6}")

# Check for the data-log-event anchor
m7 = re.findall(r'data-log-event="likeCountClick"[^>]*>([^<]+)<', html)
print(f"likeCountClick anchor text: {m7}")

# Try window.__additionalDataLoaded or similar
m8 = re.findall(r'window\.__additionalDataLoaded\([^,]+,\s*(\{.+?\})\s*\)', html)
if m8:
    print(f"\n__additionalDataLoaded found, length: {len(m8[0])}")
    try:
        jdata = json.loads(m8[0])
        print(json.dumps(jdata, indent=2)[:1000])
    except:
        print("Could not parse JSON")
