All pastes #5ykNveyFWw Raw Copy code Copy link Edit

text paste

public text v1 · immutable
#5ykNveyFWw ·published 2026-05-31 03:25 UTC
rendered paste body
"""Knox County TN Jail — Auto Monitor====================================Automatically checks the Knox County jail roster every 10-15 minutesfor new inmates. When a new inmate is found, their info is saved tothe output folder as a ready-to-post Facebook text file + mugshot image.FACEBOOK API SUPPORT:  Set FACEBOOK_ENABLED = True and fill in your page token and page ID  to automatically post to Facebook when a new inmate is detected.  Instructions for getting a token are in the README comments below.HOW TO RUN:  python knox_monitor.py  (Leave the terminal open — it will keep running until you close it)HOW TO STOP:  Press Ctrl+C in the terminal"""import timeimport randomimport jsonimport osimport requestsimport tracebackfrom datetime import datetimefrom dataclasses import dataclass, fieldfrom typing import Optionalfrom bs4 import BeautifulSoup# ─────────────────────────────────────────────#  CONFIGURATION# ─────────────────────────────────────────────REAL_SITE_URL = "https://sheriff.knoxcountytn.gov"ROSTER_PATH   = "/index.php"# How often to check for new inmates (in seconds)MIN_CHECK_INTERVAL = 10 * 60   # 10 minutesMAX_CHECK_INTERVAL = 15 * 60   # 15 minutes# Delay between image downloadsMIN_DELAY_SECONDS = 3MAX_DELAY_SECONDS = 7# ── How long to wait between Facebook posts ───# Change these two numbers to control posting speed# Examples:#   2-3 minutes:  MIN_POST_INTERVAL = 2 * 60 / MAX_POST_INTERVAL = 3 * 60#   25-30 minutes: MIN_POST_INTERVAL = 25 * 60 / MAX_POST_INTERVAL = 30 * 60MIN_POST_INTERVAL = 2 * 60    # 2 minutes  ← change thisMAX_POST_INTERVAL = 3 * 60    # 3 minutes  ← change this# Where to save output filesOUTPUT_DIR = r"C:\Knox County"SEEN_FILE  = os.path.join(OUTPUT_DIR, "seen_inmates.json")# Your Facebook page name (used in posts)FACEBOOK_PAGE_NAME = "Knox County Mugshots"# ── Facebook API (optional) ───────────────────# Set FACEBOOK_ENABLED = True to auto-post when new inmates are found# To get your token and page ID:#   1. Go to https://developers.facebook.com and create an app#   2. Add the "Pages API" product#   3. Generate a Page Access Token for your mugshots page#   4. Find your Page ID in your Facebook page settings → AboutFACEBOOK_ENABLED  = TrueFACEBOOK_TOKEN    = "EAAVsKezZBGQ8BRtZBR5Pg7i2yEh1m8ZBOIknV5LcwHSqbffFTKTk8tqiM8MsWnbGCttCjCPQ3nKVP9nu4dyIwVu5HjZCIKquT3ankwBeuizXZA3fiM5GJnIf99mac7Qd8Sty958qcqW1l3KoZArRYmsyTPIZAszLWaD8F1BMY8xWgdWVdxqOOvPtcYnBImO8b2qA20ZA"FACEBOOK_PAGE_ID  = "1057858950737975"# ── Discord Webhook (optional) ────────────────# Set DISCORD_ENABLED = True to receive error alerts in Discord# To get your webhook URL:#   1. Open Discord and go to the channel you want alerts in#   2. Click the gear icon (Edit Channel) → Integrations → Webhooks#   3. Click "New Webhook", give it a name, copy the URL and paste belowDISCORD_ENABLED     = TrueDISCORD_WEBHOOK_URL = "https://discord.com/api/webhooks/1484760889457643632/Z4cBmdR8_urJVbwWLkkVe3yFmkHcildddpP0XU-RpAFnGJH_cccq5foMK11saAaeuI6j"# Disclaimer added to every postDISCLAIMER = (    "All persons are presumed innocent until proven guilty in a court of law. "    "Booking information is a matter of public record.")# ─────────────────────────────────────────────#  DISCORD ALERTS# ─────────────────────────────────────────────def discord_alert(message: str):    """Send an alert message to a Discord channel via webhook."""    if not DISCORD_ENABLED:        return    try:        response = requests.post(DISCORD_WEBHOOK_URL, json={            "content": f"⚠️ **Knox County Monitor Alert**\n{message}",            "username": "Knox Monitor"        })        if response.status_code in (200, 204):            print(f"  Discord alert sent")        else:            print(f"  Discord alert failed ({response.status_code}): {response.text}")    except Exception as e:        print(f"  Discord alert error: {e}")# ─────────────────────────────────────────────#  DATA MODEL# ─────────────────────────────────────────────@dataclassclass ArrestRecord:    name: str    dob: Optional[str] = None    booking_number: Optional[str] = None    booking_date: Optional[str] = None    bond: Optional[str] = None    bonds: list[str] = field(default_factory=list)    charges: list[str] = field(default_factory=list)    arresting_agency: Optional[str] = None    image_url: Optional[str] = None    image_data: Optional[bytes] = None    source_url: Optional[str] = None# ─────────────────────────────────────────────#  SEEN INMATES TRACKER# Saves known IDN#s to a file so memory persists across restarts# ─────────────────────────────────────────────def load_seen_inmates() -> set:    """Load the set of already-seen IDN numbers from disk."""    os.makedirs(OUTPUT_DIR, exist_ok=True)    if not os.path.exists(SEEN_FILE):        return set()    try:        with open(SEEN_FILE, "r") as f:            data = json.load(f)            return set(data)    except Exception:        return set()def save_seen_inmates(seen: set):    """Save the set of seen IDN numbers to disk."""    os.makedirs(OUTPUT_DIR, exist_ok=True)    with open(SEEN_FILE, "w") as f:        json.dump(list(seen), f, indent=2)# ─────────────────────────────────────────────#  SCRAPER# ─────────────────────────────────────────────class KnoxCountyScraper:    def __init__(self):        self.session = requests.Session()        self.session.headers.update({            "User-Agent": (                "MugshotPageBot/1.0 (public records aggregator; "                "contact: your@email.com)"            )        })    def polite_wait(self):        delay = random.uniform(MIN_DELAY_SECONDS, MAX_DELAY_SECONDS)        print(f"  Waiting {delay:.1f}s...")        time.sleep(delay)    def fetch_page(self, url: str) -> BeautifulSoup:        print(f"  Fetching: {url}")        response = self.session.get(url, timeout=15)        response.raise_for_status()        return BeautifulSoup(response.text, "html.parser")    def parse_roster(self, soup: BeautifulSoup) -> list[ArrestRecord]:        """Parse all inmates from the roster page."""        records = []        all_tables = soup.find_all("table")        # Find the starting table index for each inmate        inmate_start_indices = []        for idx, table in enumerate(all_tables):            name_span = table.find("span", class_="redbold")            if name_span:                name_text = name_span.get_text(strip=True)                if name_text and len(name_text) > 3 and "IDN" not in name_text:                    inmate_start_indices.append(idx)        for pos, start_idx in enumerate(inmate_start_indices):            end_idx = inmate_start_indices[pos + 1] if pos + 1 < len(inmate_start_indices) else len(all_tables)            inmate_tables = all_tables[start_idx:end_idx]            try:                # Name + DOB                t1 = inmate_tables[0]                name_span = t1.find("span", class_="redbold")                name = name_span.get_text(strip=True) if name_span else None                if not name:                    continue                dob = None                for td in t1.find_all("td"):                    text = td.get_text(strip=True)                    if "D.O.B." in text:                        dob = text.replace("D.O.B.", "").strip()                        break                # Mugshot image                image_url = None                prev_img = t1.find_previous("img")                if prev_img and prev_img.get("src") and "showfile" in prev_img["src"]:                    src = prev_img["src"]                    image_url = src if src.startswith("http") else REAL_SITE_URL + "/" + src.lstrip("/")                # IDN#                booking_number = None                for t in inmate_tables:                    for td in t.find_all("td"):                        text = td.get_text(strip=True)                        if "IDN#" in text:                            booking_number = text.replace("IDN#:", "").strip()                            break                    if booking_number:                        break                # Charges + Bond — collect all charge/bond pairs                charges = []                bonds = []       # list of bond strings, one per charge row                booking_date = None                for t in inmate_tables:                    if "Booked/Served" not in t.get_text():                        continue                    rows = t.find_all("tr")                    data_rows = [r for r in rows if not r.find("th")]                    idx = 0                    while idx < len(data_rows):                        charge_row = data_rows[idx]                        bond_row   = data_rows[idx + 1] if idx + 1 < len(data_rows) else None                        idx += 2                        # ── Parse charge row ──────────────────────────────────                        tds = charge_row.find_all("td")                        if not tds or len(tds) < 3:                            continue                        date_text   = tds[1].get_text(strip=True)                        charge_text = tds[2].get_text(strip=True)                        doc_type    = tds[0].get_text(strip=True)                        skip = ("\xa0", "Charge", "", "Court Date", "Document Type")                        # Some rows pack multiple charges into one cell separated by commas                        if charge_text and charge_text not in skip:                            raw_charges = [c.strip() for c in charge_text.split(",") if c.strip()]                        elif charge_text.upper().startswith("EXPIRES"):                            raw_charges = [f"{doc_type} - {charge_text}"]                        else:                            raw_charges = []                        for c in raw_charges:                            if c not in charges:                                charges.append(c)                        if date_text and not booking_date and date_text not in ("Booked/Served", "\xa0", ""):                            booking_date = date_text                        # ── Parse bond row ────────────────────────────────────                        if not bond_row:                            continue                        bond_tags = bond_row.find_all("strong", class_="bond", recursive=False)                        if not bond_tags:                            bond_tags = []                            for td in bond_row.find_all("td", recursive=False):                                bond_tags.extend(td.find_all("strong", class_="bond", recursive=False))                        if not bond_tags:                            # Not a bond row — back up and reprocess                            idx -= 1                            continue                        bond_type   = None                        bond_amount = None                        for tag in bond_tags:                            label = tag.get_text(strip=True)                            value = tag.next_sibling                            while value and isinstance(value, str) and not value.strip():                                value = value.next_sibling                            value = value.strip() if isinstance(value, str) else (value.get_text(strip=True) if value else "")                            if "Bond Amount" in label and value and value.upper() not in ("NONE", "DENIED"):                                bond_amount = value                            if "Bond Type" in label and value and value.upper() not in ("NONE", "DENIED"):                                bond_type = value                        if bond_amount:                            bond_str = f"{bond_type + ' - ' if bond_type else ''}{bond_amount}"                        elif bond_type and bond_type.upper() == "DENIED":                            bond_str = "DENIED"                        else:                            bond_str = "None"                        # Pair bond with all charges from this charge row                        for c in raw_charges:                            bonds.append(f"{c}: {bond_str}")                    break                records.append(ArrestRecord(                    name=name,                    dob=dob,                    booking_number=booking_number,                    booking_date=booking_date,                    bonds=bonds,                    charges=charges,                    arresting_agency="Knox County Sheriff's Office",                    image_url=image_url,                    source_url=REAL_SITE_URL + ROSTER_PATH,                ))            except Exception as e:                print(f"  ERROR parsing inmate at table {start_idx}: {e}")                traceback.print_exc()        return records    def download_image(self, image_url: str) -> Optional[bytes]:        try:            self.polite_wait()            resp = self.session.get(image_url, timeout=15)            resp.raise_for_status()            return resp.content        except Exception as e:            print(f"  Image download failed: {e}")            return None    def is_refresh_page(self, soup: BeautifulSoup) -> bool:        """Detect the 'page refreshing, please check back soon' holding page."""        page_text = soup.get_text().lower()        refresh_phrases = [            "please check back soon",            "page refreshing",            "temporarily unavailable",            "updating records",            "system is updating",        ]        return any(phrase in page_text for phrase in refresh_phrases)    def check_for_new_inmates(self, seen: set) -> list[ArrestRecord]:        """Fetch the roster and return only inmates not in the seen set."""        roster_url = REAL_SITE_URL.rstrip("/") + ROSTER_PATH        soup = self.fetch_page(roster_url)        # If the site is refreshing, wait and retry up to 3 times        if self.is_refresh_page(soup):            print("  Site is refreshing — will retry in 60 seconds...")            for attempt in range(1, 4):                time.sleep(60)                print(f"  Retry attempt {attempt}/3...")                soup = self.fetch_page(roster_url)                if not self.is_refresh_page(soup):                    print("  Site is back up, continuing...")                    break            else:                # All retries failed — skip this check entirely                print("  Site still refreshing after 3 attempts — skipping this check")                return []        all_records = self.parse_roster(soup)        new_records = []        for record in all_records:            if record.booking_number and record.booking_number not in seen:                new_records.append(record)        return new_records# ─────────────────────────────────────────────#  FACEBOOK POSTER (optional)# ─────────────────────────────────────────────class FacebookPoster:    def post(self, record: ArrestRecord, post_text: str):        """Post text + image to Facebook page via the Graph API."""        if not FACEBOOK_ENABLED:            return        try:            if record.image_data:                # Post with photo                url = f"https://graph.facebook.com/{FACEBOOK_PAGE_ID}/photos"                response = requests.post(url, data={                    "caption": post_text,                    "access_token": FACEBOOK_TOKEN,                }, files={                    "source": ("mugshot.jpg", record.image_data, "image/jpeg")                })            else:                # Text-only post                url = f"https://graph.facebook.com/{FACEBOOK_PAGE_ID}/feed"                response = requests.post(url, data={                    "message": post_text,                    "access_token": FACEBOOK_TOKEN,                })            print(f"  Facebook response ({response.status_code}): {response.text}")            if response.status_code == 200:                print(f"  Posted to Facebook: {record.name}")            else:                error_msg = (                    f"Failed to post **{record.name}** (IDN: {record.booking_number})\n"                    f"Status: {response.status_code}\n"                    f"Error: {response.text}"                )                print(f"  Facebook post FAILED ({response.status_code}): {response.text}")                discord_alert(error_msg)        except Exception as e:            print(f"  Facebook post error: {e}")            traceback.print_exc()            discord_alert(f"Exception while posting **{record.name}**: {e}")# ─────────────────────────────────────────────#  POST FORMATTER# ─────────────────────────────────────────────def format_name(raw: str) -> str:    """    Convert 'LAST, FIRST MIDDLE' to 'First Last' or 'First Middle Last'.    Handles names with hyphens and apostrophes.    """    raw = raw.strip()    if "," in raw:        parts = raw.split(",", 1)        last = parts[0].strip().title()        first_middle = parts[1].strip().title()        return f"{first_middle} {last}"    # No comma — just title case as-is    return raw.title()def format_post(record: ArrestRecord) -> str:    # Format name from LAST, FIRST MIDDLE → First Middle Last    display_name = format_name(record.name)    # Charges    charge_list = "\n".join(record.charges) if record.charges else "Not listed"    # Hashtags    # Agency tag — strip spaces and special chars    agency = record.arresting_agency or ""    agency_tag = "#" + "".join(c for c in agency if c.isalnum()) if agency else ""    # Name tag — First Last, no middle, no punctuation    name_parts = display_name.split()    if len(name_parts) >= 2:        name_tag = "#" + name_parts[0] + name_parts[-1]    else:        name_tag = "#" + display_name.replace(" ", "")    # ICE tag — add if charges mention immigration/ICE    ice_keywords = ["immigration", "ice", "ice detainer", "hold for ice",                    "immigration detainee", "hold for immigration"]    charges_lower = " ".join(record.charges).lower()    ice_tag = "\n#ICE" if any(k in charges_lower for k in ice_keywords) else ""    return (        f"{display_name}\n"        f"\n"        f"Date Booked: {record.booking_date or 'Unknown'}\n"        f"\n"        f"Charges:\n"        f"{charge_list}\n"        f"\n"        f"Arresting Agency: {agency or 'Unknown'}\n"        f"\n"        f"{agency_tag}{ice_tag}\n"        f"{name_tag}\n"        f"#knoxcountymugshots\n"        f"#knoxvillemugshots\n"        f"#mugshots"    )def save_record(record: ArrestRecord, index: int):    """Save text post and image to the output folder."""    os.makedirs(OUTPUT_DIR, exist_ok=True)    safe_name = "".join(c if c.isalnum() or c in " _-" else "_" for c in record.name)    base = f"{index:03d}_{safe_name}"    # Save text    text_path = os.path.join(OUTPUT_DIR, f"{base}.txt")    with open(text_path, "w", encoding="utf-8") as f:        f.write(format_post(record))    print(f"  Saved: {text_path}")    # Save image    if record.image_data:        img_path = os.path.join(OUTPUT_DIR, f"{base}.jpg")        with open(img_path, "wb") as f:            f.write(record.image_data)        print(f"  Image saved: {img_path}")# ─────────────────────────────────────────────#  MAIN MONITOR LOOP# ─────────────────────────────────────────────def main():    print("=" * 55)    print("  Knox County TN Jail — Auto Monitor")    print(f"  Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")    print(f"  Checking every {MIN_CHECK_INTERVAL//60}–{MAX_CHECK_INTERVAL//60} minutes")    print(f"  Output: {os.path.abspath(OUTPUT_DIR)}")    print(f"  Facebook: {'ENABLED' if FACEBOOK_ENABLED else 'disabled'}")    print("  Press Ctrl+C to stop")    print("=" * 55)    scraper = KnoxCountyScraper()    poster  = FacebookPoster()    # Load previously seen inmates    seen = load_seen_inmates()    print(f"\nLoaded {len(seen)} previously seen inmates from disk")    check_count = 0    while True:        check_count += 1        now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")        print(f"\n[Check #{check_count} at {now}]")        try:            new_records = scraper.check_for_new_inmates(seen)            if not new_records:                print(f"  No new inmates found ({len(seen)} total seen so far)")            else:                print(f"  {len(new_records)} NEW inmate(s) found!")                for i, record in enumerate(new_records, start=1):                    print(f"\n  >> NEW: {record.name} (IDN: {record.booking_number})")                    # Download image                    if record.image_url:                        record.image_data = scraper.download_image(record.image_url)                    # Save to disk                    save_record(record, i)                    # Post to Facebook (if enabled)                    if FACEBOOK_ENABLED:                        poster.post(record, format_post(record))                        last_posted = datetime.now()                        print(f"  Last posted at: {last_posted.strftime('%Y-%m-%d %H:%M:%S')}")                        # Wait between posts — controlled by MIN/MAX_POST_INTERVAL in config                        if i < len(new_records):                            wait = random.randint(MIN_POST_INTERVAL, MAX_POST_INTERVAL)                            next_post = datetime.fromtimestamp(time.time() + wait).strftime("%H:%M:%S")                            print(f"  Next post at {next_post} (in {wait//60}m {wait%60}s)...")                            time.sleep(wait)                    # Mark as seen                    if record.booking_number:                        seen.add(record.booking_number)                # Save updated seen list                save_seen_inmates(seen)        except Exception as e:            print(f"  ERROR during check: {e}")            traceback.print_exc()            print("  Will try again next interval...")        # Wait before next check        wait = random.randint(MIN_CHECK_INTERVAL, MAX_CHECK_INTERVAL)        next_check = datetime.fromtimestamp(time.time() + wait).strftime("%H:%M:%S")        print(f"\n  Next check at {next_check} (in {wait//60}m {wait%60}s)...")        time.sleep(wait)if __name__ == "__main__":    try:        main()    except KeyboardInterrupt:        print("\n\nMonitor stopped by user. Goodbye!")