|
____________________________________
Split .json SCRIPT ____________________________________
import json import os import math import tkinter as tk from tkinter import filedialog
def split_list(lst, n): """Return exactly n lists distributing elements as evenly as possible.""" total = len(lst) base = total // n rem = total % n sizes = [(base + (1 if i < rem else 0)) for i in range(n)] out = [] idx = 0 for s in sizes: out.append(lst[idx: idx + s]) idx += s return out
def split_items(items, n): """Split a list of (key,value) pairs into n chunks (lists of pairs).""" total = len(items) base = total // n rem = total % n sizes = [(base + (1 if i < rem else 0)) for i in range(n)] out = [] idx = 0 for s in sizes: out.append(items[idx: idx + s]) idx += s return out
def split_json_file(skip_empty=False): root = tk.Tk() root.withdraw() input_path = filedialog.askopenfilename( title="Select a JSON file", filetypes=[("JSON Files", "*.json")] ) if not input_path: print("No file selected. Exiting.") return
# number of splits while True: try: num_splits = int(input("How many files would you like to split it into? ").strip()) if num_splits < 1: print("Enter a positive integer.") continue break except ValueError: print("Enter a valid integer.")
# read using windows-1252 (as you said) try: with open(input_path, "r", encoding="windows-1252") as f: data = json.load(f) except Exception as e: print("Failed to read JSON:", e) return
# prepare output base and folder output_dir = filedialog.askdirectory(title="Select folder to save split files") if not output_dir: print("No folder selected. Exiting.") return
default_base = os.path.splitext(os.path.basename(input_path))[0] + "_part" base_name = input(f"Enter base name for split files (default: '{default_base}'): ").strip() or default_base
# Helper to write a JSON object to disk def write_json(obj, idx, total_width): filename = f"{base_name}_{str(idx).zfill(total_width)}.json" path = os.path.join(output_dir, filename) with open(path, "w", encoding="utf-8") as out: json.dump(obj, out, indent=2, ensure_ascii=False) return path
# Decide splitting strategy wrote_any = False total_width = len(str(num_splits))
if isinstance(data, list): # straightforward: split list elements parts = split_list(data, num_splits) for i, part in enumerate(parts, start=1): if skip_empty and len(part) == 0: print(f"Skipping empty part {i}") continue path = write_json(part, i, total_width) print(f"Saved {path} ({len(part)} items)") wrote_any = True
elif isinstance(data, dict): keys = list(data.keys()) if len(keys) > 1: # split top-level key/value pairs across files items = [(k, data[k]) for k in keys] chunks = split_items(items, num_splits) for i, chunk in enumerate(chunks, start=1): if skip_empty and len(chunk) == 0: print(f"Skipping empty part {i}") continue out_dict = {k: v for k, v in chunk} path = write_json(out_dict, i, total_width) print(f"Saved {path} ({len(chunk)} keys)") wrote_any = True
elif len(keys) == 1: # single key at top-level — inspect its value top_key = keys[0] inner = data[top_key] if isinstance(inner, list): # split inner list and wrap back with same top-level key parts = split_list(inner, num_splits) for i, part in enumerate(parts, start=1): if skip_empty and len(part) == 0: print(f"Skipping empty part {i}") continue out_obj = {top_key: part} path = write_json(out_obj, i, total_width) print(f"Saved {path} ({len(part)} items under '{top_key}')") wrote_any = True elif isinstance(inner, dict): # split inner dict items across files and wrap back inner_items = list(inner.items()) chunks = split_items(inner_items, num_splits) for i, chunk in enumerate(chunks, start=1): if skip_empty and len(chunk) == 0: print(f"Skipping empty part {i}") continue out_inner = {k: v for k, v in chunk} out_obj = {top_key: out_inner} path = write_json(out_obj, i, total_width) print(f"Saved {path} ({len(chunk)} keys under '{top_key}')") wrote_any = True else: # scalar or unknown single-object — can't meaningfully split inner structure print("Top-level is a single key with a scalar/non-splittable value.") # We'll create first file with the full object and optionally empty others (or skip) if not skip_empty: for i in range(1, num_splits + 1): obj = data if i == 1 else ({} if isinstance(data, dict) else []) path = write_json(obj, i, total_width) print(f"Saved {path} ({'full object' if i==1 else 'empty'})") wrote_any = True else: path = write_json(data, 1, total_width) print(f"Saved {path} (full object)") wrote_any = True else: # empty dict print("Top-level JSON object is an empty object {}.") for i in range(1, num_splits + 1): if skip_empty: print(f"Skipping empty part {i}") continue path = write_json({}, i, total_width) print(f"Saved {path} (empty object)") wrote_any = True
else: # scalar (string/number/bool/null) print("Top-level JSON is a scalar (not a list or dict).") if not skip_empty: for i in range(1, num_splits + 1): obj = data if i == 1 else None path = write_json(obj, i, total_width) print(f"Saved {path} ({'value' if i==1 else 'null'})") wrote_any = True else: path = write_json(data, 1, total_width) print(f"Saved {path} (scalar)") wrote_any = True
if not wrote_any: print("No files were written (maybe all parts were empty and skip_empty=True).") else: print("Splitting complete!")
if __name__ == "__main__": # If you prefer to skip writing empty parts, call with skip_empty=True split_json_file(skip_empty=False)
|