import json
import os
def convert_json_to_jsonl():
# 1. Define input and output paths
input_path = '/data/zgq/yaozhengjian/Datasets/FFHQ_val/CelebA_HQ/captions_lq.json'
output_dir = '/data/phd/yaozhengjian/Code/RL/ART-FRv2/DiffusionNFT/dataset/restore_face'
output_file = 'train_metadata.jsonl'
output_path = os.path.join(output_dir, output_file)
# 2. Check and create output directory (if it doesn't exist)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
print(f"Created directory: {output_dir}")
# 3. Read source JSON file
try:
with open(input_path, 'r', encoding='utf-8') as f:
data = json.load(f)
print(f"Successfully read source file, containing {len(data)} items.")
except FileNotFoundError:
print(f"Error: File not found {input_path}")
return
except json.JSONDecodeError:
print(f"Error: File {input_path} is not valid JSON format")
return
# 4. Process data and write to JSONL file
with open(output_path, 'w', encoding='utf-8') as f_out:
for item in data:
# Extract filename or path
# Option A: If you want to keep the full absolute path from the source file (recommended, unless you plan to move images)
image_path = item['image']
# Option B: If you only want the filename (e.g., "validation_104.png"), please uncomment the line below:
# image_path = os.path.basename(item['image'])
# Build new dictionary object
new_entry = {
"prompt": item['caption'],
"image": image_path,
"requirement": "Restore"
}
# Write one line of JSON string
f_out.write(json.dumps(new_entry, ensure_ascii=False) + '\n')
print(f"Conversion complete! File saved to: {output_path}")
# Print first 3 lines as examples for inspection
print("\n--- Example of the first 3 lines of the generated file ---")
with open(output_path, 'r', encoding='utf-8') as f_check:
for i in range(3):
line = f_check.readline()
if not line: break
print(line.strip())
if __name__ == "__main__":
convert_json_to_jsonl()