CodeBERT-Attack / oj-attack / ojclone_norm.py
ojclone_norm.py
Raw
# -*- coding: utf-8 -*-
"""
Created on Wed Jan  6 17:35:12 2021

@author: DrLC
"""

import json
from utils import normalize
from cparser import CCode
import tqdm

if __name__ == "__main__":
    
    with open("../data/ojclone.jsonl", "r") as f:
        d = []
        for line in tqdm.tqdm(f.readlines()):
            d.append(json.loads(line))
            
    cnt = 0
    with open("../data/ojclone_norm.jsonl", "w") as f:
        for l in tqdm.tqdm(d):
            try:
                l['func'] = normalize(CCode(l['func']).getTokenSeq())
                cnt += 1
            except:
                l['func'] = []
            f.write(json.dumps(l)+"\n")
            
    print (cnt)