In [None]:
# === install if needed (only once in the kernel) ===
# %pip install pybaseball pandas --quiet

# === imports and setup ===
import time
import warnings
import os
import pandas as pd
import matplotlib.pyplot as plt
from pybaseball import cache, statcast

warnings.filterwarnings("ignore", category=FutureWarning)
cache.enable()  # enable caching so partial progress persists

# === robust chunked fetch with retries ===
def fetch_statcast_with_retries(start_date, end_date, chunk_days=15, max_retries=4, backoff=2):
    frames = []
    current = pd.to_datetime(start_date)
    final = pd.to_datetime(end_date)
    while current <= final:
        chunk_end = min(current + pd.Timedelta(days=chunk_days - 1), final)
        attempt = 0
        success = False
        while attempt < max_retries and not success:
            try:
                print(f"Fetching {current.date()} → {chunk_end.date()} (attempt {attempt + 1})")
                chunk = statcast(current.strftime("%Y-%m-%d"), chunk_end.strftime("%Y-%m-%d"))
                frames.append(chunk)
                success = True
            except Exception as e:
                attempt += 1
                wait = backoff ** attempt
                print(f"  Warning: chunk {current.date()} to {chunk_end.date()} failed: {e!r}. Retrying in {wait}s...")
                time.sleep(wait)
        if not success:
            print(f"  ERROR: giving up on {current.date()} → {chunk_end.date()} after {max_retries} attempts.")
        current = chunk_end + pd.Timedelta(days=1)
    if frames:
        return pd.concat(frames, ignore_index=True)
    else:
        return pd.DataFrame()

# === parameters ===
start_date = "2015-04-08"  # Statcast begins 2015
end_date   = "2025-07-31"

# === fetch all statcast data in chunks ===
full_df = fetch_statcast_with_retries(start_date, end_date, chunk_days=15)

# === filter to Mariners home runs (T-Mobile/Safeco Field) ===
# note: this captures HRs in games where SEA is the home team at their park
hr_df = full_df[
    (full_df.get("events") == "home_run") &
    (full_df.get("home_team") == "SEA")
].copy()

# === distance and exit velocity fields ===
# distance: prefer 'estimated_distance' if present; else 'hit_distance_sc'
if "estimated_distance" in hr_df.columns:
    hr_df["distance"] = hr_df["estimated_distance"]
else:
    hr_df["distance"] = hr_df.get("hit_distance_sc", pd.NA)

# exit velocity: prefer 'launch_speed'; else 'hit_speed'
if "launch_speed" in hr_df.columns:
    hr_df["exit_velocity"] = hr_df["launch_speed"]
else:
    hr_df["exit_velocity"] = hr_df.get("hit_speed", pd.NA)

# === ensure batter name exists ===
# pybaseball sometimes uses 'player_name'; fallback to batter id as string
if "player_name" not in hr_df.columns:
    hr_df["player_name"] = hr_df["batter"].astype(str)

# === top 10 longest home runs ===
# guard against empty df
if hr_df.empty:
    raise SystemExit("No Statcast rows matched the HR + SEA home-team filter. Try a different date range.")

top10 = hr_df.nlargest(10, "distance").reset_index(drop=True)

# === clean display table ===
display_df = pd.DataFrame({
    "Rank": range(1, len(top10) + 1),
    "Batter": top10["player_name"],
    "Date": pd.to_datetime(top10["game_date"]).dt.strftime("%Y-%m-%d") if "game_date" in top10.columns else "",
    "Distance (ft)": top10["distance"].round(1),
    "Exit Velocity (mph)": top10["exit_velocity"].round(1)
})

# === pretty formatting for readability ===
display_df["Date"] = pd.to_datetime(display_df["Date"], errors="coerce").dt.strftime("%b %d, %Y")

def _fmt_dist(x):
    if pd.isna(x): return ""
    x = float(x)
    return f"{int(x)}" if x.is_integer() else f"{x:.1f}"

display_df["Distance (ft)"] = display_df["Distance (ft)"].apply(_fmt_dist)
display_df["Exit Velocity (mph)"] = display_df["Exit Velocity (mph)"].apply(
    lambda v: f"{float(v):.1f}" if pd.notna(v) else ""
)

# === big, clean table (no title/subtitle) saved to PNG ===
fig, ax = plt.subplots(figsize=(14, 7))  # larger canvas for readability
ax.axis("off")

table = ax.table(
    cellText=display_df.values.tolist(),
    colLabels=display_df.columns.tolist(),
    cellLoc="center",
    colLoc="center",
    loc="center"
)

# larger fonts + scale to breathe
table.auto_set_font_size(False)
table.set_fontsize(13)   # bump font size
table.scale(1.3, 1.6)    # widen/heighten cells

# bold header + clear borders for readability
for (r, c), cell in table.get_celld().items():
    cell.set_edgecolor("black")
    if r == 0:  # header row
        cell.set_text_props(weight="bold")

out_path = "top10_bigger_table.png"
fig.savefig(out_path, dpi=300, bbox_inches="tight")
print("Saved:", os.path.abspath(out_path))

# === optional: also save the table data as CSV for reuse ===
display_df.to_csv("top10_bigger_table.csv", index=False)
print("Saved CSV:", os.path.abspath("top10_bigger_table.csv"))

In [None]:
# === Longest HRs Leaders Card — Absolute Positioning (simple grid controls) ===
# Requires: a DataFrame named `top_all_with_ties` (or replace `leaders_df` below with yours)
# Expected cols: game_date, batter (MLBAM id), batter_name, batter_team, away_team, hit_distance_sc
# Optional: pitcher_name

import os, io
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from PIL import Image, ImageDraw, ImageFont

# ──────────────────────────────────────────────────────────────────────────────
# ✏️ SIMPLE KNOBS — absolute positions only (align by your grid)
# ──────────────────────────────────────────────────────────────────────────────
TITLE_TEXT    = "LONGEST HOME RUNS • SAFECO/T-MOBILE"
SUBTITLE_TEXT = "In the Statcast ERA • 2015–2025 • Top distance with ties"                # None → auto from data’s date range

# Canvas & panel
CANVAS_W      = 5000
CANVAS_H      = 1800
LEFT_PANEL_W  = 2475
TOP_MARGIN    = 200
BOTTOM_MARGIN = 0
ROW_HEIGHT    = 220
AUTO_SHRINK_ROWS = True

# Left block anchors
RANK_X      = 40
HEADSHOT_CX = 250
NAME_X      = 475

# Info line absolute columns (no “vs …”)
DATE_X            = 2100        # column for the date text
PITCHER_X         = 1600        # column for the "off Pitcher" text
INFO_Y_OFFSET     = 0           # vertical tweak for both date & pitcher (relative to row center)
PITCHER_Y_OFFSET  = 0           # extra vertical tweak for pitcher only
INFO_FONT_SIZE    = 22
INFO_FONT_FAMILY  = "DejaVu Sans"   # same family for date & pitcher for perfect alignment

# Date formatting
SHOW_YEAR_ONLY  = False                  # True -> only "2016"
DATE_FORMAT     = "%b %d, %Y"            # if SHOW_YEAR_ONLY=False, we still force month ALL CAPS below

# Distance box (white box + number + FT), absolute placement
BOX_LEFT        = 1300       # untrimmed left edge anchor
BOX_WIDTH       = 300        # untrimmed width
BOX_HEIGHT      = 100        # untrimmed height
BOX_TRIM_LEFT   = 25         # ✂️ cut this many px from the LEFT edge
BOX_TRIM_BOTTOM = 20         # ✂️ cut this many px from the BOTTOM (top edge stays fixed)
BOX_ADD_RIGHT   = -30          # ➕ add this many px of white to the RIGHT side (0 keeps current look)
NUM_CENTER      = False      # True -> center number in the visible box; False -> left-align to NUM_X
NUM_X           = BOX_LEFT + 24    # used only if NUM_CENTER=False
FT_PADDING      = 14         # gap between the number’s right edge and "FT"

# Logos (absolute X, centered vertically on row)
SHOW_LOGOS    = True
LOGO_MODE     = "espn"       # "none" | "text" | "espn"
LOGO_X        = 400          # absolute x for the team logo (tweak to taste)
LOGO_Y_OFFSET = 0            # small vertical tweak for logo
LOGO_ZOOM     = 0.14         # visual scale

# Debug grid
DEBUG_GRID = False
GRID_STEP  = 50

# Colors
SEA_PRIMARY    = "#0e2a4d"
ROW_EVEN       = "#15365f"
ROW_ODD        = "#102C4D"
TITLE_COLOR    = "white"
SUBTITLE_COLOR = "#d7e3ff"

# Right-side hero photo (cover-fit). Skipped if path doesn’t exist.
HERO_IMAGE = r"C:\Users\mason\Documents\Safeco_Field_Seattle.jpg"

# MLB headshots (live fetch by batter id)
USE_MLB_HEADSHOTS = True
MLB_HEADSHOT_URL  = "https://img.mlbstatic.com/mlb-photos/image/upload/w_600,q_auto:best/v1/people/{id}/headshot/67/current"

# ESPN team logo mapping (slug per team for PNG at 500px)
ESPN_TEAM_SLUG = {
    "ARI":"ari","ATL":"atl","BAL":"bal","BOS":"bos","CHC":"chc","CWS":"cws","CIN":"cin","CLE":"cle",
    "COL":"col","DET":"det","HOU":"hou","KC":"kc","KCR":"kc","LAA":"laa","LAD":"lad","MIA":"mia",
    "MIL":"mil","MIN":"min","NYM":"nym","NYY":"nyy","OAK":"oak","PHI":"phi","PIT":"pit","SD":"sd",
    "SDP":"sd","SEA":"sea","SF":"sf","SFG":"sf","STL":"stl","TB":"tb","TBR":"tb","TEX":"tex",
    "TOR":"tor","WSH":"wsh","WSN":"wsh","AZ":"ari","SFN":"sf","LAN":"lad","SDN":"sd"
}

# ──────────────────────────────────────────────────────────────────────────────
# Helpers
# ──────────────────────────────────────────────────────────────────────────────
def load_image(path_or_url):
    try:
        if isinstance(path_or_url, str) and path_or_url.startswith(("http://","https://")):
            import requests
            r = requests.get(path_or_url, timeout=8)
            r.raise_for_status()
            return Image.open(io.BytesIO(r.content)).convert("RGBA")
        if isinstance(path_or_url, str) and os.path.exists(path_or_url):
            return Image.open(path_or_url).convert("RGBA")
    except Exception:
        pass
    return None

def initials_avatar(name, size=300, bg=(200,200,200,255), fg=(60,60,60,255)):
    txt = "".join([w[:1] for w in str(name).split()[:2]]).upper() or "?"
    img = Image.new("RGBA", (size, size), (0,0,0,0))
    d = ImageDraw.Draw(img)
    d.ellipse((0,0,size-1,size-1), fill=bg)
    try:
        font = ImageFont.truetype("arial.ttf", int(size*0.42))
    except:
        font = ImageFont.load_default()
    tw, th = d.textbbox((0,0), txt, font=font)[2:]
    d.text(((size-tw)//2, (size-th)//2-4), txt, fill=fg, font=font)
    return img

def circle_crop(im, target=300, border_px=4, border_color=(255,255,255,255)):
    if im is None:
        im = initials_avatar("?", size=target)
    s = min(im.width, im.height)
    x0 = (im.width - s)//2; y0 = (im.height - s)//2
    im = im.crop((x0,y0,x0+s,y0+s)).resize((target, target), Image.LANCZOS)
    mask = Image.new("L", (target, target), 0)
    ImageDraw.Draw(mask).ellipse((0,0,target-1,target-1), fill=255)
    out = Image.new("RGBA", (target, target), (0,0,0,0))
    out.paste(im, (0,0), mask)
    if border_px > 0:
        bg = Image.new("RGBA", (target+2*border_px, target+2*border_px), (0,0,0,0))
        ImageDraw.Draw(bg).ellipse((0,0,bg.width-1,bg.height-1), fill=border_color)
        bg.paste(out, (border_px, border_px), out)
        return bg
    return out

def add_pil(ax, pil_image, center_xy, zoom=0.18, z=5):
    arr = np.asarray(pil_image)
    oi  = OffsetImage(arr, zoom=zoom)
    ab  = AnnotationBbox(oi, center_xy, frameon=False, zorder=z)
    ax.add_artist(ab)

def draw_grid(ax, step=50, W=2400, H=1400):
    for x in np.arange(0, W+1, step):
        ax.plot([x,x],[0,H], lw=0.5, color="#CCCCCC", alpha=.6, zorder=999)
        ax.text(x, 10, f"{int(x)}", ha="center", va="bottom", fontsize=8, color="#777", zorder=999)
    for y in np.arange(0, H+1, step):
        ax.plot([0,W],[y,y], lw=0.5, color="#CCCCCC", alpha=.6, zorder=999)
        ax.text(10, y, f"{int(y)}", ha="left", va="center", fontsize=8, color="#777", zorder=999)

def team_badge(team_code):
    code = str(team_code).upper()[:3] or "—"
    badge = Image.new("RGBA", (220, 220), (0,0,0,0))
    d = ImageDraw.Draw(badge)
    d.ellipse((0,0,219,219), fill=(245,245,245,255))
    try:
        font = ImageFont.truetype("arial.ttf", 80)
    except:
        font = ImageFont.load_default()
    tw, th = d.textbbox((0,0), code, font=font)[2:]
    d.text(((220-tw)//2,(220-th)//2-6), code, fill=(30,30,30,255), font=font)
    return badge

def logo_for_team(team_code):
    if not SHOW_LOGOS or LOGO_MODE == "none":
        return None
    code = str(team_code or "").upper()
    if LOGO_MODE == "text" or not code:
        return team_badge(code)
    if LOGO_MODE == "espn":
        slug = ESPN_TEAM_SLUG.get(code, code.lower())
        url = f"https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/{slug}.png"
        im = load_image(url)
        if im is not None:
            return im
        return team_badge(code)
    return team_badge(code)

def headshot_for_row(row):
    if USE_MLB_HEADSHOTS and "batter" in row and pd.notna(row["batter"]):
        try:
            url = MLB_HEADSHOT_URL.format(id=int(row["batter"]))
            im = load_image(url)
            if im is not None:
                return im
        except Exception:
            pass
    return initials_avatar(row.get("batter_name","?"), size=260)

# ──────────────────────────────────────────────────────────────────────────────
# Renderer
# ──────────────────────────────────────────────────────────────────────────────
def make_leaders_card(leaders: pd.DataFrame,
                      title=TITLE_TEXT,
                      subtitle=SUBTITLE_TEXT,
                      out_path=r"C:\Users\mason\Desktop\leaders_card2_2400x1400.png"):
    assert LEFT_PANEL_W < CANVAS_W, "LEFT_PANEL_W must be smaller than CANVAS_W"

    # Sort & prep
    L = leaders.sort_values("hit_distance_sc", ascending=False).copy()
    L["value"] = L["hit_distance_sc"].round(0).astype(int)

    # Auto-shrink rows to fit fixed canvas if needed
    n_rows = len(L)
    row_h = ROW_HEIGHT
    max_rows_fit = int((CANVAS_H - TOP_MARGIN - BOTTOM_MARGIN) // row_h)
    if AUTO_SHRINK_ROWS and n_rows > max_rows_fit and n_rows > 0:
        row_h = max(80, int((CANVAS_H - TOP_MARGIN - BOTTOM_MARGIN) / n_rows))

    # Canvas
    W, H = CANVAS_W, CANVAS_H
    fig, ax = plt.subplots(figsize=(W/120, H/120), dpi=120)
    ax.set_xlim(0, W); ax.set_ylim(0, H); ax.axis("off")

    # Panels
    ax.add_patch(patches.Rectangle((0,0), LEFT_PANEL_W, H, facecolor=SEA_PRIMARY, alpha=1.0, zorder=0))
    ax.add_patch(patches.Rectangle((LEFT_PANEL_W,0), W-LEFT_PANEL_W, H, facecolor="#ffffff", alpha=1.0, zorder=0))

    # Hero image (cover-fit) + soft veil
    panel_w, panel_h = W - LEFT_PANEL_W, H
    if HERO_IMAGE and os.path.exists(HERO_IMAGE) and panel_w > 0:
        hero = load_image(HERO_IMAGE)
        if hero:
            ar = hero.width / hero.height
            if panel_w / panel_h > ar:
                nh = int(panel_w / ar); nw = panel_w
            else:
                nw = int(panel_h * ar); nh = panel_h
            hero = hero.resize((nw, nh), Image.LANCZOS)
            x0 = max(0, (nw - panel_w)//2); y0 = max(0, (nh - panel_h)//2)
            hero = hero.crop((x0, y0, x0+panel_w, y0+panel_h))
            ax.imshow(hero, extent=(LEFT_PANEL_W, W, 0, H), zorder=1)
            ax.add_patch(patches.Rectangle((LEFT_PANEL_W,0), panel_w, panel_h,
                                           facecolor="white", alpha=0.14, zorder=2))

    # Titles
    ax.text(40, H-10, title, color=TITLE_COLOR, fontsize=96, fontweight=800, ha="left", va="top", zorder=3)
    if subtitle is None and "game_date" in L.columns:
        dmin = pd.to_datetime(L["game_date"]).min().strftime("%Y")
        dmax = pd.to_datetime(L["game_date"]).max().strftime("%Y")
        subtitle = f"{dmin}–{dmax} • Top distance with ties"
    if subtitle:
        ax.text(LEFT_PANEL_W + 300, H-200, subtitle, color=SUBTITLE_COLOR, fontsize=36, ha="left", va="top", zorder=3)

    # Debug grid
    if DEBUG_GRID:
        draw_grid(ax, step=GRID_STEP, W=W, H=H)

    # Rows
    y_top = H - TOP_MARGIN
    for i, row in L.reset_index(drop=True).iterrows():
        y1 = y_top - i*row_h
        y0 = y1 - row_h + 18
        ym = (y0 + y1) / 2

        # alternating row background
        bg = ROW_EVEN if i % 2 == 0 else ROW_ODD
        ax.add_patch(patches.Rectangle((24, y0), LEFT_PANEL_W-48, row_h-26,
                                       facecolor=bg, ec="white", lw=1.2, zorder=3))

        # rank
        ax.text(RANK_X, ym, f"{i+1}.", color="white", fontsize=36, fontweight=800,
                va="center", ha="left", zorder=5)

        # headshot (LEFT side)
        hs = circle_crop(headshot_for_row(row), target=260, border_px=4)
        add_pil(ax, hs, (HEADSHOT_CX, ym), zoom=0.23, z=6)

        # name (primary)
        name_txt = str(row.get("batter_name","Unknown")).title()
        ax.text(NAME_X, ym, name_txt, color="white", fontsize=40, fontweight=700,
                va="center", ha="left", zorder=6)

        # ── date & pitcher at absolute columns ───────────────────────────────
        gdt = pd.to_datetime(row.get("game_date"), errors="coerce")
        if not pd.isna(gdt):
            if SHOW_YEAR_ONLY:
                date_txt = gdt.strftime("%Y")
            else:
                # "JUL 2, 2016" — force month to ALL CAPS
                date_txt = f"{gdt.strftime('%b').upper()} {gdt.day}, {gdt.year}"
        else:
            date_txt = ""

        pitcher = row.get("pitcher_name")
        pitcher_txt = f"off {pitcher}" if pd.notna(pitcher) else ""

        info_y = ym + INFO_Y_OFFSET
        # date (uniform column)
        ax.text(DATE_X, info_y, date_txt,
                color="#C7D7F2", fontsize=INFO_FONT_SIZE, fontfamily=INFO_FONT_FAMILY,
                ha="left", va="center", zorder=6)

        # pitcher (uniform column)
        if pitcher_txt:
            ax.text(PITCHER_X, info_y + PITCHER_Y_OFFSET, pitcher_txt,
                    color="#C7D7F2", fontsize=INFO_FONT_SIZE, fontfamily=INFO_FONT_FAMILY,
                    ha="left", va="center", zorder=6)

        # ── distance box + number + FT (absolute, with trims & add-right) ───
        # Trim LEFT & BOTTOM, keep TOP edge fixed; optionally add width to the RIGHT.
        visible_w = max(40, BOX_WIDTH - BOX_TRIM_LEFT + BOX_ADD_RIGHT)
        visible_h = max(30, BOX_HEIGHT - BOX_TRIM_BOTTOM)

        rect_left = BOX_LEFT + BOX_TRIM_LEFT          # shifted right by the left trim
        rect_top  = ym + (BOX_HEIGHT / 2)             # original top (stay fixed)
        rect_bot  = rect_top - visible_h              # shave from bottom only

        # draw the trimmed/extended box
        ax.add_patch(patches.Rectangle(
            (rect_left, rect_bot),
            visible_w, visible_h,
            facecolor="white", ec="none", zorder=7
        ))

        # number
        if NUM_CENTER:
            num_x = rect_left + visible_w / 2
            num_text = ax.text(num_x, ym, f"{int(row['value'])}",
                               color=SEA_PRIMARY, fontsize=42, fontweight=900,
                               ha="center", va="center", zorder=8)
            # compute the number’s right edge in data coords to place "FT"
            fig.canvas.draw()
            renderer  = fig.canvas.get_renderer()
            num_right = num_text.get_window_extent(renderer=renderer) \
                                .transformed(ax.transData.inverted()).xmax
            ax.text(num_right + FT_PADDING, ym, "FT",
                    color=SEA_PRIMARY, fontsize=16,
                    ha="left", va="center", zorder=8)
        else:
            num_x = NUM_X
            num_text = ax.text(
                num_x, ym, f"{int(row['value'])}",
                color=SEA_PRIMARY, fontsize=42, fontweight=900,
                ha="left", va="center", zorder=8
            )

            # Place "FT" right after the number (no overlap)
            fig.canvas.draw()  # ensure text extents exist
            renderer = fig.canvas.get_renderer()
            num_bbox = num_text.get_window_extent(renderer=renderer)   # in display (px)

            # convert bbox.xmax (px) -> data x, then add your FT_PADDING (≈ px here)
            ft_x = ax.transData.inverted().transform((num_bbox.xmax, 0))[0] + FT_PADDING

            ax.text(
                ft_x, ym, "FT",
                color=SEA_PRIMARY, fontsize=16,
                ha="left", va="center", zorder=9  # on top of the number
            )

        # ── team logo (absolute X) ───────────────────────────────────────────
        if SHOW_LOGOS:
            team = str(row.get("batter_team","")).upper()
            logo_img = logo_for_team(team)
            if logo_img is not None:
                add_pil(ax, logo_img, (LOGO_X, ym + LOGO_Y_OFFSET),
                        zoom=LOGO_ZOOM, z=7)

    # footer
    ax.text(LEFT_PANEL_W + 25, 24, "Created by Mason Colborn • Source: Baseball Savant (Statcast) • Images and Logos via ESPN •",
            color="#bcd1f1", fontsize=22, ha="left", va="bottom", zorder=4)

    # save
    os.makedirs(os.path.dirname(out_path), exist_ok=True)
    fig.savefig(out_path, dpi=300, bbox_inches="tight")
    plt.close(fig)
    print(f"Saved: {out_path} | canvas {W}×{H}px | left panel {LEFT_PANEL_W}px | rows {n_rows} (row_h={row_h})")

# ──────────────────────────────────────────────────────────────────────────────
# ✅ CALL IT — actually saves the image
# ──────────────────────────────────────────────────────────────────────────────
OUTPUT_PATH = r"C:\Users\mason\Desktop\leaders_card2_2400x1400.png"

try:
    leaders_df = top_all_with_ties.copy()
except NameError:
    raise RuntimeError("Set `leaders_df` to the DataFrame you want to render (e.g., top_all_with_ties).")

required = {"batter","batter_name","batter_team","hit_distance_sc"}
missing = required - set(leaders_df.columns)
if missing:
    raise ValueError(f"leaders_df is missing required columns: {missing}")

make_leaders_card(leaders=leaders_df, title=TITLE_TEXT, subtitle=SUBTITLE_TEXT, out_path=OUTPUT_PATH)
print("Saved to:", OUTPUT_PATH, "| exists:", os.path.exists(OUTPUT_PATH))