CodeBERT-Attack / preprocess / extract_java_special_token.py
extract_java_special_token.py
Raw
# -*- coding: utf-8 -*-
"""
Created on Sun Dec  6 16:28:55 2020

@author: DrLC
"""

import json
import tqdm

def load_json(path):
    
    ret = []
    with open(path, "r") as f:
        for line in tqdm.tqdm(f.readlines()):
            ret.append(json.loads(line))
    return ret

def get_class_token(data):
    
    ret = []
    for api in tqdm.tqdm(data):
        if api['className'] not in ret:
            ret.append(api['className'])
    return ret

def get_package_token(data):
    
    ret = []
    for api in tqdm.tqdm(data):
        for t in api['classPackage'].split("."):
            if t not in ret:
                ret.append(t)
    return ret

def get_static_token(data):
    
    ret = []
    for api in tqdm.tqdm(data):
        if 'static' in api['methodModifierAndType'].split():
            ret.append(api['methodName'])
    return ret

if __name__ == "__main__":
    
    d = load_json("./javase7.json")
    class_token = get_class_token(d)
    package_token = get_package_token(d)
    static_token = get_static_token(d)
    
    with open("java_special_ids.txt", 'w') as f:
        for t in tqdm.tqdm(class_token + package_token + static_token):
            f.write(t + "\n")