All pastes#5ykNveyFWw Raw Copy code Copy link Edit
text paste
publictextv1 · immutable
#5ykNveyFWw·published 2026-05-31 03:25 UTC
LinesSyntaxWrapMono
rendered paste body
"""Knox County TN Jail — Auto Monitor====================================Automatically checks the Knox County jail roster every 10-15 minutesfor new inmates. When a new inmate is found, their info is saved tothe output folder as a ready-to-post Facebook text file + mugshot image.FACEBOOK API SUPPORT: Set FACEBOOK_ENABLED = True and fill in your page token and page ID to automatically post to Facebook when a new inmate is detected. Instructions for getting a token are in the README comments below.HOW TO RUN: python knox_monitor.py (Leave the terminal open — it will keep running until you close it)HOW TO STOP: Press Ctrl+C in the terminal"""import timeimport randomimport jsonimport osimport requestsimport tracebackfrom datetime import datetimefrom dataclasses import dataclass, fieldfrom typing import Optionalfrom bs4 import BeautifulSoup# ─────────────────────────────────────────────# CONFIGURATION# ─────────────────────────────────────────────REAL_SITE_URL = "https://sheriff.knoxcountytn.gov"ROSTER_PATH = "/index.php"# How often to check for new inmates (in seconds)MIN_CHECK_INTERVAL = 10 * 60 # 10 minutesMAX_CHECK_INTERVAL = 15 * 60 # 15 minutes# Delay between image downloadsMIN_DELAY_SECONDS = 3MAX_DELAY_SECONDS = 7# ── How long to wait between Facebook posts ───# Change these two numbers to control posting speed# Examples:# 2-3 minutes: MIN_POST_INTERVAL = 2 * 60 / MAX_POST_INTERVAL = 3 * 60# 25-30 minutes: MIN_POST_INTERVAL = 25 * 60 / MAX_POST_INTERVAL = 30 * 60MIN_POST_INTERVAL = 2 * 60 # 2 minutes ← change thisMAX_POST_INTERVAL = 3 * 60 # 3 minutes ← change this# Where to save output filesOUTPUT_DIR = r"C:\Knox County"SEEN_FILE = os.path.join(OUTPUT_DIR, "seen_inmates.json")# Your Facebook page name (used in posts)FACEBOOK_PAGE_NAME = "Knox County Mugshots"# ── Facebook API (optional) ───────────────────# Set FACEBOOK_ENABLED = True to auto-post when new inmates are found# To get your token and page ID:# 1. Go to https://developers.facebook.com and create an app# 2. Add the "Pages API" product# 3. Generate a Page Access Token for your mugshots page# 4. Find your Page ID in your Facebook page settings → AboutFACEBOOK_ENABLED = TrueFACEBOOK_TOKEN = "EAAVsKezZBGQ8BRtZBR5Pg7i2yEh1m8ZBOIknV5LcwHSqbffFTKTk8tqiM8MsWnbGCttCjCPQ3nKVP9nu4dyIwVu5HjZCIKquT3ankwBeuizXZA3fiM5GJnIf99mac7Qd8Sty958qcqW1l3KoZArRYmsyTPIZAszLWaD8F1BMY8xWgdWVdxqOOvPtcYnBImO8b2qA20ZA"FACEBOOK_PAGE_ID = "1057858950737975"# ── Discord Webhook (optional) ────────────────# Set DISCORD_ENABLED = True to receive error alerts in Discord# To get your webhook URL:# 1. Open Discord and go to the channel you want alerts in# 2. Click the gear icon (Edit Channel) → Integrations → Webhooks# 3. Click "New Webhook", give it a name, copy the URL and paste belowDISCORD_ENABLED = TrueDISCORD_WEBHOOK_URL = "https://discord.com/api/webhooks/1484760889457643632/Z4cBmdR8_urJVbwWLkkVe3yFmkHcildddpP0XU-RpAFnGJH_cccq5foMK11saAaeuI6j"# Disclaimer added to every postDISCLAIMER = ( "All persons are presumed innocent until proven guilty in a court of law. " "Booking information is a matter of public record.")# ─────────────────────────────────────────────# DISCORD ALERTS# ─────────────────────────────────────────────def discord_alert(message: str): """Send an alert message to a Discord channel via webhook.""" if not DISCORD_ENABLED: return try: response = requests.post(DISCORD_WEBHOOK_URL, json={ "content": f"⚠️ **Knox County Monitor Alert**\n{message}", "username": "Knox Monitor" }) if response.status_code in (200, 204): print(f" Discord alert sent") else: print(f" Discord alert failed ({response.status_code}): {response.text}") except Exception as e: print(f" Discord alert error: {e}")# ─────────────────────────────────────────────# DATA MODEL# ─────────────────────────────────────────────@dataclassclass ArrestRecord: name: str dob: Optional[str] = None booking_number: Optional[str] = None booking_date: Optional[str] = None bond: Optional[str] = None bonds: list[str] = field(default_factory=list) charges: list[str] = field(default_factory=list) arresting_agency: Optional[str] = None image_url: Optional[str] = None image_data: Optional[bytes] = None source_url: Optional[str] = None# ─────────────────────────────────────────────# SEEN INMATES TRACKER# Saves known IDN#s to a file so memory persists across restarts# ─────────────────────────────────────────────def load_seen_inmates() -> set: """Load the set of already-seen IDN numbers from disk.""" os.makedirs(OUTPUT_DIR, exist_ok=True) if not os.path.exists(SEEN_FILE): return set() try: with open(SEEN_FILE, "r") as f: data = json.load(f) return set(data) except Exception: return set()def save_seen_inmates(seen: set): """Save the set of seen IDN numbers to disk.""" os.makedirs(OUTPUT_DIR, exist_ok=True) with open(SEEN_FILE, "w") as f: json.dump(list(seen), f, indent=2)# ─────────────────────────────────────────────# SCRAPER# ─────────────────────────────────────────────class KnoxCountyScraper: def __init__(self): self.session = requests.Session() self.session.headers.update({ "User-Agent": ( "MugshotPageBot/1.0 (public records aggregator; " "contact: your@email.com)" ) }) def polite_wait(self): delay = random.uniform(MIN_DELAY_SECONDS, MAX_DELAY_SECONDS) print(f" Waiting {delay:.1f}s...") time.sleep(delay) def fetch_page(self, url: str) -> BeautifulSoup: print(f" Fetching: {url}") response = self.session.get(url, timeout=15) response.raise_for_status() return BeautifulSoup(response.text, "html.parser") def parse_roster(self, soup: BeautifulSoup) -> list[ArrestRecord]: """Parse all inmates from the roster page.""" records = [] all_tables = soup.find_all("table") # Find the starting table index for each inmate inmate_start_indices = [] for idx, table in enumerate(all_tables): name_span = table.find("span", class_="redbold") if name_span: name_text = name_span.get_text(strip=True) if name_text and len(name_text) > 3 and "IDN" not in name_text: inmate_start_indices.append(idx) for pos, start_idx in enumerate(inmate_start_indices): end_idx = inmate_start_indices[pos + 1] if pos + 1 < len(inmate_start_indices) else len(all_tables) inmate_tables = all_tables[start_idx:end_idx] try: # Name + DOB t1 = inmate_tables[0] name_span = t1.find("span", class_="redbold") name = name_span.get_text(strip=True) if name_span else None if not name: continue dob = None for td in t1.find_all("td"): text = td.get_text(strip=True) if "D.O.B." in text: dob = text.replace("D.O.B.", "").strip() break # Mugshot image image_url = None prev_img = t1.find_previous("img") if prev_img and prev_img.get("src") and "showfile" in prev_img["src"]: src = prev_img["src"] image_url = src if src.startswith("http") else REAL_SITE_URL + "/" + src.lstrip("/") # IDN# booking_number = None for t in inmate_tables: for td in t.find_all("td"): text = td.get_text(strip=True) if "IDN#" in text: booking_number = text.replace("IDN#:", "").strip() break if booking_number: break # Charges + Bond — collect all charge/bond pairs charges = [] bonds = [] # list of bond strings, one per charge row booking_date = None for t in inmate_tables: if "Booked/Served" not in t.get_text(): continue rows = t.find_all("tr") data_rows = [r for r in rows if not r.find("th")] idx = 0 while idx < len(data_rows): charge_row = data_rows[idx] bond_row = data_rows[idx + 1] if idx + 1 < len(data_rows) else None idx += 2 # ── Parse charge row ────────────────────────────────── tds = charge_row.find_all("td") if not tds or len(tds) < 3: continue date_text = tds[1].get_text(strip=True) charge_text = tds[2].get_text(strip=True) doc_type = tds[0].get_text(strip=True) skip = ("\xa0", "Charge", "", "Court Date", "Document Type") # Some rows pack multiple charges into one cell separated by commas if charge_text and charge_text not in skip: raw_charges = [c.strip() for c in charge_text.split(",") if c.strip()] elif charge_text.upper().startswith("EXPIRES"): raw_charges = [f"{doc_type} - {charge_text}"] else: raw_charges = [] for c in raw_charges: if c not in charges: charges.append(c) if date_text and not booking_date and date_text not in ("Booked/Served", "\xa0", ""): booking_date = date_text # ── Parse bond row ──────────────────────────────────── if not bond_row: continue bond_tags = bond_row.find_all("strong", class_="bond", recursive=False) if not bond_tags: bond_tags = [] for td in bond_row.find_all("td", recursive=False): bond_tags.extend(td.find_all("strong", class_="bond", recursive=False)) if not bond_tags: # Not a bond row — back up and reprocess idx -= 1 continue bond_type = None bond_amount = None for tag in bond_tags: label = tag.get_text(strip=True) value = tag.next_sibling while value and isinstance(value, str) and not value.strip(): value = value.next_sibling value = value.strip() if isinstance(value, str) else (value.get_text(strip=True) if value else "") if "Bond Amount" in label and value and value.upper() not in ("NONE", "DENIED"): bond_amount = value if "Bond Type" in label and value and value.upper() not in ("NONE", "DENIED"): bond_type = value if bond_amount: bond_str = f"{bond_type + ' - ' if bond_type else ''}{bond_amount}" elif bond_type and bond_type.upper() == "DENIED": bond_str = "DENIED" else: bond_str = "None" # Pair bond with all charges from this charge row for c in raw_charges: bonds.append(f"{c}: {bond_str}") break records.append(ArrestRecord( name=name, dob=dob, booking_number=booking_number, booking_date=booking_date, bonds=bonds, charges=charges, arresting_agency="Knox County Sheriff's Office", image_url=image_url, source_url=REAL_SITE_URL + ROSTER_PATH, )) except Exception as e: print(f" ERROR parsing inmate at table {start_idx}: {e}") traceback.print_exc() return records def download_image(self, image_url: str) -> Optional[bytes]: try: self.polite_wait() resp = self.session.get(image_url, timeout=15) resp.raise_for_status() return resp.content except Exception as e: print(f" Image download failed: {e}") return None def is_refresh_page(self, soup: BeautifulSoup) -> bool: """Detect the 'page refreshing, please check back soon' holding page.""" page_text = soup.get_text().lower() refresh_phrases = [ "please check back soon", "page refreshing", "temporarily unavailable", "updating records", "system is updating", ] return any(phrase in page_text for phrase in refresh_phrases) def check_for_new_inmates(self, seen: set) -> list[ArrestRecord]: """Fetch the roster and return only inmates not in the seen set.""" roster_url = REAL_SITE_URL.rstrip("/") + ROSTER_PATH soup = self.fetch_page(roster_url) # If the site is refreshing, wait and retry up to 3 times if self.is_refresh_page(soup): print(" Site is refreshing — will retry in 60 seconds...") for attempt in range(1, 4): time.sleep(60) print(f" Retry attempt {attempt}/3...") soup = self.fetch_page(roster_url) if not self.is_refresh_page(soup): print(" Site is back up, continuing...") break else: # All retries failed — skip this check entirely print(" Site still refreshing after 3 attempts — skipping this check") return [] all_records = self.parse_roster(soup) new_records = [] for record in all_records: if record.booking_number and record.booking_number not in seen: new_records.append(record) return new_records# ─────────────────────────────────────────────# FACEBOOK POSTER (optional)# ─────────────────────────────────────────────class FacebookPoster: def post(self, record: ArrestRecord, post_text: str): """Post text + image to Facebook page via the Graph API.""" if not FACEBOOK_ENABLED: return try: if record.image_data: # Post with photo url = f"https://graph.facebook.com/{FACEBOOK_PAGE_ID}/photos" response = requests.post(url, data={ "caption": post_text, "access_token": FACEBOOK_TOKEN, }, files={ "source": ("mugshot.jpg", record.image_data, "image/jpeg") }) else: # Text-only post url = f"https://graph.facebook.com/{FACEBOOK_PAGE_ID}/feed" response = requests.post(url, data={ "message": post_text, "access_token": FACEBOOK_TOKEN, }) print(f" Facebook response ({response.status_code}): {response.text}") if response.status_code == 200: print(f" Posted to Facebook: {record.name}") else: error_msg = ( f"Failed to post **{record.name}** (IDN: {record.booking_number})\n" f"Status: {response.status_code}\n" f"Error: {response.text}" ) print(f" Facebook post FAILED ({response.status_code}): {response.text}") discord_alert(error_msg) except Exception as e: print(f" Facebook post error: {e}") traceback.print_exc() discord_alert(f"Exception while posting **{record.name}**: {e}")# ─────────────────────────────────────────────# POST FORMATTER# ─────────────────────────────────────────────def format_name(raw: str) -> str: """ Convert 'LAST, FIRST MIDDLE' to 'First Last' or 'First Middle Last'. Handles names with hyphens and apostrophes. """ raw = raw.strip() if "," in raw: parts = raw.split(",", 1) last = parts[0].strip().title() first_middle = parts[1].strip().title() return f"{first_middle} {last}" # No comma — just title case as-is return raw.title()def format_post(record: ArrestRecord) -> str: # Format name from LAST, FIRST MIDDLE → First Middle Last display_name = format_name(record.name) # Charges charge_list = "\n".join(record.charges) if record.charges else "Not listed" # Hashtags # Agency tag — strip spaces and special chars agency = record.arresting_agency or "" agency_tag = "#" + "".join(c for c in agency if c.isalnum()) if agency else "" # Name tag — First Last, no middle, no punctuation name_parts = display_name.split() if len(name_parts) >= 2: name_tag = "#" + name_parts[0] + name_parts[-1] else: name_tag = "#" + display_name.replace(" ", "") # ICE tag — add if charges mention immigration/ICE ice_keywords = ["immigration", "ice", "ice detainer", "hold for ice", "immigration detainee", "hold for immigration"] charges_lower = " ".join(record.charges).lower() ice_tag = "\n#ICE" if any(k in charges_lower for k in ice_keywords) else "" return ( f"{display_name}\n" f"\n" f"Date Booked: {record.booking_date or 'Unknown'}\n" f"\n" f"Charges:\n" f"{charge_list}\n" f"\n" f"Arresting Agency: {agency or 'Unknown'}\n" f"\n" f"{agency_tag}{ice_tag}\n" f"{name_tag}\n" f"#knoxcountymugshots\n" f"#knoxvillemugshots\n" f"#mugshots" )def save_record(record: ArrestRecord, index: int): """Save text post and image to the output folder.""" os.makedirs(OUTPUT_DIR, exist_ok=True) safe_name = "".join(c if c.isalnum() or c in " _-" else "_" for c in record.name) base = f"{index:03d}_{safe_name}" # Save text text_path = os.path.join(OUTPUT_DIR, f"{base}.txt") with open(text_path, "w", encoding="utf-8") as f: f.write(format_post(record)) print(f" Saved: {text_path}") # Save image if record.image_data: img_path = os.path.join(OUTPUT_DIR, f"{base}.jpg") with open(img_path, "wb") as f: f.write(record.image_data) print(f" Image saved: {img_path}")# ─────────────────────────────────────────────# MAIN MONITOR LOOP# ─────────────────────────────────────────────def main(): print("=" * 55) print(" Knox County TN Jail — Auto Monitor") print(f" Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") print(f" Checking every {MIN_CHECK_INTERVAL//60}–{MAX_CHECK_INTERVAL//60} minutes") print(f" Output: {os.path.abspath(OUTPUT_DIR)}") print(f" Facebook: {'ENABLED' if FACEBOOK_ENABLED else 'disabled'}") print(" Press Ctrl+C to stop") print("=" * 55) scraper = KnoxCountyScraper() poster = FacebookPoster() # Load previously seen inmates seen = load_seen_inmates() print(f"\nLoaded {len(seen)} previously seen inmates from disk") check_count = 0 while True: check_count += 1 now = datetime.now().strftime("%Y-%m-%d %H:%M:%S") print(f"\n[Check #{check_count} at {now}]") try: new_records = scraper.check_for_new_inmates(seen) if not new_records: print(f" No new inmates found ({len(seen)} total seen so far)") else: print(f" {len(new_records)} NEW inmate(s) found!") for i, record in enumerate(new_records, start=1): print(f"\n >> NEW: {record.name} (IDN: {record.booking_number})") # Download image if record.image_url: record.image_data = scraper.download_image(record.image_url) # Save to disk save_record(record, i) # Post to Facebook (if enabled) if FACEBOOK_ENABLED: poster.post(record, format_post(record)) last_posted = datetime.now() print(f" Last posted at: {last_posted.strftime('%Y-%m-%d %H:%M:%S')}") # Wait between posts — controlled by MIN/MAX_POST_INTERVAL in config if i < len(new_records): wait = random.randint(MIN_POST_INTERVAL, MAX_POST_INTERVAL) next_post = datetime.fromtimestamp(time.time() + wait).strftime("%H:%M:%S") print(f" Next post at {next_post} (in {wait//60}m {wait%60}s)...") time.sleep(wait) # Mark as seen if record.booking_number: seen.add(record.booking_number) # Save updated seen list save_seen_inmates(seen) except Exception as e: print(f" ERROR during check: {e}") traceback.print_exc() print(" Will try again next interval...") # Wait before next check wait = random.randint(MIN_CHECK_INTERVAL, MAX_CHECK_INTERVAL) next_check = datetime.fromtimestamp(time.time() + wait).strftime("%H:%M:%S") print(f"\n Next check at {next_check} (in {wait//60}m {wait%60}s)...") time.sleep(wait)if __name__ == "__main__": try: main() except KeyboardInterrupt: print("\n\nMonitor stopped by user. Goodbye!")