Source code for pyggi.program

"""

This module contains GranularityLevel and Program class.

"""
import os
import shutil
import json
from enum import Enum
from distutils.dir_util import copy_tree
from .logger import Logger


[docs]class GranularityLevel(Enum): """ GranularityLevel represents the granularity levels of program. """ LINE = 'line' AST = 'AST'
[docs] @classmethod def is_valid(cls, value): """ :param value: The value of enum to check :return: Whether there is an enum that has a value equal to the `value` :rtype: bool .. hint:: There are some examples, :: GranularityLevel.is_valid('line') >> True GranularityLevel.is_valid('random_text') >> False """ return any(value == item.value for item in cls)
[docs]class Program(object): """ Program encapsulates the original source code. Currently, PYGGI stores the source code as a list of code lines, as lines are the only supported unit of modifications. For modifications at other granularity levels, this class needs to process and store the source code accordingly (for example, by parsing and storing the AST). """ CONFIG_FILE_NAME = 'PYGGI_CONFIG' TMP_DIR = "./pyggi_tmp/" def __init__(self, path, granularity_level=GranularityLevel.LINE, config_file_name=CONFIG_FILE_NAME): assert isinstance(granularity_level, GranularityLevel) self.path = path.strip() if self.path.endswith('/'): self.path = self.path[:-1] self.name = os.path.basename(self.path) self.logger = Logger(self.name) self.granularity_level = granularity_level with open(os.path.join(self.path, config_file_name)) as config_file: config = json.load(config_file) self.test_command = config['test_command'] self.target_files = config['target_files'] Program.clean_tmp_dir(self.tmp_path) copy_tree(self.path, self.tmp_path) self.contents = Program.parse(self.granularity_level, self.path, self.target_files) self.modification_weights = dict() self._modification_points = None def __str__(self): if self.granularity_level == GranularityLevel.LINE: code = '' for k in sorted(self.contents.keys()): idx = 0 for line in self.contents[k]: code += "{}\t: {}\t: {}\n".format(k, idx, line) idx += 1 return code return self.target_files
[docs] def reset_tmp_dir(self): Program.clean_tmp_dir(self.tmp_path) copy_tree(self.path, self.tmp_path)
@property def tmp_path(self): """ :return: The path of the temporary dirctory :rtype: str """ return os.path.join(Program.TMP_DIR, self.name) @property def modification_points(self): """ :return: The list of position of modification points for each target program :rtype: dict(str, ?) """ assert isinstance(self.granularity_level, GranularityLevel) if self._modification_points: return self._modification_points self._modification_points = dict() if self.granularity_level == GranularityLevel.LINE: for target_file in self.target_files: self._modification_points[target_file] = list(range(len(self.contents[target_file]))) elif self.granularity_level == GranularityLevel.AST: for target_file in self.target_files: if Program.is_python_code(target_file): from .helper import stmt_python self._modification_points[target_file] = stmt_python.get_modification_points( self.contents[target_file]) return self._modification_points
[docs] def select_modification_point(self, target_file, method="random"): """ :param str target_file: The modification point is chosen within target_file :param str method: The way how to choose a modification point, *'random'* or *'weighted'* :return: The **index** of modification point :rtype: int """ import random assert target_file in self.target_files assert method in ['random', 'weighted'] candidates = self.modification_points[target_file] if method == 'random' or target_file not in self.modification_weights: return random.randrange(len(candidates)) elif method == 'weighted': cumulated_weights = sum(self.modification_weights[target_file]) list_of_prob = list(map(lambda w: float(w)/cumulated_weights, self.modification_weights[target_file])) return random.choices(list(range(len(candidates))), weights=list_of_prob, k=1)[0]
[docs] def set_modification_weights(self, target_file, weights): """ :param str target_file: The path to file :param weights: The modification weight([0,1]) of each modification points :type weights: list(float) :return: None :rtype: None """ from copy import deepcopy assert target_file in self.target_files assert len(self.modification_points[target_file]) == len(weights) assert not list(filter(lambda w: w < 0 or w > 1, weights)) self.modification_weights[target_file] = deepcopy(weights)
[docs] def write_to_tmp_dir(self, new_contents): """ Write new contents to the temporary directory of program :param new_contents: The new contents of the program. Refer to *apply* method of :py:class:`.patch.Patch` :type new_contents: dict(str, ?) :rtype: None """ for target_file in new_contents: with open(os.path.join(self.tmp_path, target_file), 'w') as tmp_file: tmp_file.write(Program.to_source(self.granularity_level, new_contents[target_file]))
[docs] def print_modification_points(self, target_file, indices=None): """ Print the source of each modification points :param target_file: The path to target file :type target_file: str :return: None :rtype: None """ title_format = "=" * 25 + " {} {} " + "=" * 25 if not indices: indices = range(len(self.modification_points[target_file])) if self.granularity_level == GranularityLevel.LINE: def print_modification_point(contents, modification_points, i): print(title_format.format('line', i)) print(contents[modification_points[i]]) elif self.granularity_level == GranularityLevel.AST: if Program.is_python_code(target_file): def print_modification_point(contents, modification_points, i): import astor from .helper import stmt_python print(title_format.format('node', i)) blk, idx = stmt_python.pos_2_block_n_index(contents, modification_points[i]) print(astor.to_source(blk[idx])) for i in indices: print_modification_point(self.contents[target_file], self.modification_points[target_file], i)
[docs] @classmethod def to_source(cls, granularity_level, contents_of_file): """ Change contents of file to the source code :param granularity_level: The parsing level of the program :type granularity_level: :py:class:`GranularityLevel` :param contents_of_file: The contents of the file which is the parsed form of source code :type contents_of_file: ? :return: The source code :rtype: str """ if granularity_level == GranularityLevel.LINE: return '\n'.join(contents_of_file) + '\n' elif granularity_level == GranularityLevel.AST: import astor return astor.to_source(contents_of_file) return ''
[docs] @classmethod def clean_tmp_dir(cls, tmp_path): """ Clean the temporary project directory if it exists. :param str tmp_path: The path of directory to clean. :return: None """ if os.path.exists(tmp_path): shutil.rmtree(tmp_path) if not os.path.exists(Program.TMP_DIR): os.mkdir(Program.TMP_DIR) os.mkdir(tmp_path)
[docs] @classmethod def parse(cls, granularity_level, path, target_files): """ :param granularity_level: The granularity level of a program :type granularity_level: :py:class:`.program.GranularityLevel` :param str path: The project root path :param target_files: The paths to target files from the project root :type target_files: list(str) :return: The contents of the files, see `Hint` :rtype: dict(str, list(str)) .. hint:: - key: the file name - value: the contents of the file """ assert isinstance(granularity_level, GranularityLevel) if granularity_level == GranularityLevel.LINE: contents = {} for target in target_files: with open(os.path.join(path, target), 'r') as target_file: contents[target] = list( map(str.rstrip, target_file.readlines())) return contents elif granularity_level == GranularityLevel.AST: import ast import astor contents = {} for target in target_files: if cls.is_python_code(target): root = astor.parse_file(os.path.join(path, target)) contents[target] = root else: raise Exception('Program', '{} file is not supported'.format(cls.get_file_extension(target))) return contents return None
[docs] @staticmethod def is_python_code(source_path): """ :param source_path: The path of the source file :type source_path: str :return: whether the file's extention is *.py* or not :rtype: bool """ _, file_extension = os.path.splitext(source_path) return file_extension == '.py'
[docs] @staticmethod def is_java_code(source_path): """ :param source_path: The path of the source file :type source_path: str :return: whether the file's extention is *.java* or not :rtype: bool """ _, file_extension = os.path.splitext(source_path) return file_extension == '.java'
[docs] @staticmethod def get_file_extension(file_path): """ :param file_path: The path of file :type file_path: str :return: file extension :rtype: str """ _, file_extension = os.path.splitext(file_path) return file_extension
[docs] @staticmethod def have_the_same_file_extension(file_path_1, file_path_2): """ :param file_path_1: The path of file 1 :type file_path_1: str :param file_path_2: The path of file 2 :type file_path_2: str :return: same or not :rtype: bool """ return Program.get_file_extension(file_path_1) == Program.get_file_extension(file_path_2)