import json import os def convert_json_to_jsonl(): # 1. Define input and output paths input_path = '/data/zgq/yaozhengjian/Datasets/FFHQ_val/CelebA_HQ/captions_lq.json' output_dir = '/data/phd/yaozhengjian/Code/RL/ART-FRv2/DiffusionNFT/dataset/restore_face' output_file = 'train_metadata.jsonl' output_path = os.path.join(output_dir, output_file) # 2. Check and create output directory (if it doesn't exist) if not os.path.exists(output_dir): os.makedirs(output_dir) print(f"Created directory: {output_dir}") # 3. Read source JSON file try: with open(input_path, 'r', encoding='utf-8') as f: data = json.load(f) print(f"Successfully read source file, containing {len(data)} items.") except FileNotFoundError: print(f"Error: File not found {input_path}") return except json.JSONDecodeError: print(f"Error: File {input_path} is not valid JSON format") return # 4. Process data and write to JSONL file with open(output_path, 'w', encoding='utf-8') as f_out: for item in data: # Extract filename or path # Option A: If you want to keep the full absolute path from the source file (recommended, unless you plan to move images) image_path = item['image'] # Option B: If you only want the filename (e.g., "validation_104.png"), please uncomment the line below: # image_path = os.path.basename(item['image']) # Build new dictionary object new_entry = { "prompt": item['caption'], "image": image_path, "requirement": "Restore" } # Write one line of JSON string f_out.write(json.dumps(new_entry, ensure_ascii=False) + '\n') print(f"Conversion complete! File saved to: {output_path}") # Print first 3 lines as examples for inspection print("\n--- Example of the first 3 lines of the generated file ---") with open(output_path, 'r', encoding='utf-8') as f_check: for i in range(3): line = f_check.readline() if not line: break print(line.strip()) if __name__ == "__main__": convert_json_to_jsonl()