diff --git a/.github/workflows/pr-review.yml b/.github/workflows/pr-review.yml new file mode 100644 index 00000000..8b7f9a90 --- /dev/null +++ b/.github/workflows/pr-review.yml @@ -0,0 +1,114 @@ +name: Code Critic PR Review + +on: + pull_request: + types: [opened, reopened, ready_for_review] + branches: [master] + issue_comment: + types: [created] + +jobs: + pr_review: + runs-on: [ docker ] + if: github.event_name == 'pull_request' || (github.event_name == 'issue_comment' && contains(github.event.comment.body, '/review')) + + steps: + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + + - name: Print Python version + run: python --version + + - name: Create GitHub App token + uses: actions/create-github-app-token@v1 + id: app-token + with: + # required + app-id: ${{ vars.CODE_CRITIC_APP_ID }} + private-key: ${{ secrets.CODE_CRITIC_APP_PRIVATE_KEY }} + owner: ${{ github.repository_owner }} + repositories: "code-critic" + + - name: Get PR head ref for issue comments + if: github.event_name == 'issue_comment' + id: get-pr-ref + run: | + PR_API_URL="${{ github.event.issue.pull_request.url }}" + PR_JSON=$(curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" $PR_API_URL) + PR_HEAD_REF=$(echo "$PR_JSON" | jq -r .head.ref) + echo "::set-output name=pr_head_ref::$PR_HEAD_REF" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Checkout PR branch + uses: actions/checkout@v3 + with: + ref: ${{ steps.get-pr-ref.outputs.pr_head_ref || github.event.pull_request.head.ref }} + fetch-depth: 0 # Necessary to fetch all history for diff + + - name: Checkout code-critic repository + uses: actions/checkout@v3 + with: + repository: navi-medici/code-critic + token: ${{ steps.app-token.outputs.token }} + ref: main + path: code-critic + + - name: Install requirements + shell: bash + run: | + pip install -r code-critic/requirements.txt + - name: React with eyes to /review command + if: github.event_name == 'issue_comment' && contains(github.event.comment.body, '/review') + run: | + echo "Reacting with emoji to /review command..." + repo_owner=$(echo "${{ github.repository }}" | cut -d '/' -f 1) + repo_name=$(echo "${{ github.repository }}" | cut -d '/' -f 2) + comment_id=${{ github.event.comment.id }} + echo "Repo Owner: $repo_owner, Repo Name: $repo_name, Comment ID: $comment_id" + python code-critic/utils/react_on_comment.py $repo_owner $repo_name $comment_id eyes + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up environment + run: | + echo "GITHUB_REPOSITORY_OWNER=${{ github.repository_owner }}" >> $GITHUB_ENV + echo "REPO_NAME=$(echo ${{ github.repository }} | cut -d'/' -f2)" >> $GITHUB_ENV + if [ "${{ github.event_name }}" == "pull_request" ]; then + echo "PR_NUMBER=${{ github.event.pull_request.number }}" >> $GITHUB_ENV + elif [ "${{ github.event_name }}" == "issue_comment" ]; then + PR_URL=${{ github.event.issue.pull_request.html_url }} + PR_NUMBER=$(basename $PR_URL) + echo "PR_NUMBER=$PR_NUMBER" >> $GITHUB_ENV + fi + - name: List directory contents + run: ls -R $GITHUB_WORKSPACE + + - name: Run code review script + shell: bash + run: python $GITHUB_WORKSPACE/scripts/pr-review.py + env: + GPT_MODEL_NAME: ${{ secrets.GPT_MODEL_NAME }} + AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }} + AZURE_API_BASE: ${{ secrets.AZURE_API_BASE }} + AZURE_API_VERSION: ${{ secrets.AZURE_API_VERSION }} + + - name: Read file and post comments + run: python code-critic/utils/post_comments.py $PR_NUMBER code_review_output.txt + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PR_NUMBER: ${{ env.PR_NUMBER }} + + - name: React with rocket marking completion + if: github.event_name == 'issue_comment' && contains(github.event.comment.body, '/review') + run: | + echo "Reacting with rocket" + repo_owner=$(echo "${{ github.repository }}" | cut -d '/' -f 1) + repo_name=$(echo "${{ github.repository }}" | cut -d '/' -f 2) + comment_id=${{ github.event.comment.id }} + echo "Repo Owner: $repo_owner, Repo Name: $repo_name, Comment ID: $comment_id" + python code-critic/utils/react_on_comment.py $repo_owner $repo_name $comment_id rocket + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/App.tsx b/App.tsx index d3305db7..33285071 100644 --- a/App.tsx +++ b/App.tsx @@ -94,6 +94,15 @@ function App() { } }; + const testFunction = () => { + const newArray: number[] = []; + [1, 2, 3, 4, 5].map(item => { + newArray.push(item * 2); + }); + return newArray; + }; + + const getActiveRouteName = (state) => { if (!state || typeof state.index !== 'number') { return 'Unknown'; diff --git a/scripts/pr-review.py b/scripts/pr-review.py new file mode 100644 index 00000000..69b2b24c --- /dev/null +++ b/scripts/pr-review.py @@ -0,0 +1,242 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[12]: + + +import os +import re +import base64 +import sys +import subprocess +import concurrent.futures +from typing import List, Tuple +from litellm import completion +from tenacity import retry, wait_exponential, stop_after_attempt + + +# In[13]: +OUTPUT_FILE = 'code_review_output.txt' +DEFAULT_MAX_WORKERS = 3 + +# Function to check if the current directory is a git repository root +def is_git_repository_root(directory: str) -> bool: + return os.path.isdir(os.path.join(directory, '.git')) + +def install_packages_from_file(filename: str): + try: + with open(filename, 'r') as f: + for line in f: + package_name = line.strip() + if package_name: + subprocess.check_call([sys.executable, '-m', 'pip', 'install', package_name, '--quiet'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + except Exception as e: + print("") + + +def extract_code(directory: str) -> List[Tuple[str, str]]: + code_snippets = [] + include_terms = {'service', 'controller', 'listener', 'scheduler', 'utils', 'client', 'repository', 'dao'} + for root, dirs, files in os.walk(directory): + # Skip test directories + if 'test' in root.lower(): + continue + + for file in files: + if file.endswith(('.java', '.kt')): + file_path = os.path.join(root, file) + # Only include directories with include terms + if not any(term in file_path.lower() for term in include_terms): + continue + with open(file_path, 'r', encoding='utf-8') as f: + code = f.read() + # Remove import statements + code = re.sub(r'^import .*$', '', code, flags=re.MULTILINE) + code_snippets.append((file_path, code)) + return code_snippets + +# Function to classify code snippets +def classify_code(code_snippets: List[Tuple[str, str]]) -> List[Tuple[str, str, str]]: + classified_code = [] + for file_path, code in code_snippets: + class_type = 'Unknown' + if 'Service' in file_path: + class_type = 'Service' + elif 'Client' in file_path: + class_type = 'Client' + elif 'Controller' in file_path: + class_type = 'Controller' + elif 'Listener' in file_path: + class_type = 'Listener' + elif 'Util' in file_path: + class_type = 'Util' + classified_code.append((file_path, code, class_type)) + return classified_code + + +# In[15]: + +def create_system_prompt() -> str: + encoded_prompt = os.getenv('ENCODED_CODE_REVIEW_SYSTEM_PROMPT', 'Um9sZTpHUFQgQ29kZSBSZXZpZXcgQWdlbnQ7IApPYmplY3RpdmU6UmV2aWV3IEpTLCBKU1gsIFRTLCBUU1ggY29kZTsKVGhpbmdzIHRvIHJldmlldzogRm9jdXMgb24gbWFqb3IgaXNzdWVzCjEuIE5vIG1hZ2ljIG51bWJlcnMgYW5kIG1hZ2ljIHN0cmluZ3MKMi4gaW1wcm92ZSBuYW1pbmcgb2YgdmFyaWFibGVzIGFuZCBmdW5jdGlvbnMKMy4gaW1wcm92ZSB0eXBlc2NyaXB0IHVzZQo0LiBubyBoYXJkY29kaW5nIG9mIHotaW5kZXggYW5kIGNvbG91cnMKNS4gYXZvaWQgZGF0ZS5ub3coKSB3ZSBoYXZlIHNlcnZlciB0aW1lIGF2YWlsYWJsZSB3aGljaCBpcyBtb3JlIGNvbnNpc3RlbnQKNi4gdXNlIG9wdGlvbmFsIGNoYWluaW5nIHdoZXJlIGV2ZXIgcG9zc2libGUuCjcuIHByZWZlciBmdW5jdGlvbmFsIHByb2dyYW1taW5nCjguIHJldXNlIGV4aXN0aW5nIGhlbHBlciBjb2RlCjkuIERvbuKAmXQgdXNlIGNhcnJldCBpbiBwYWNrYWdlLmpzb24KMTAuIEFwcHJvcHJpYXRlIHVzZSBvZiB1c2VNZW1vIGFuZCB1c2VDYWxsYmFjawoxMS4gRG9uJ3QgY3JlYXRlIGNvbXBvbmVudHMgaW5zaWRlIHJlbmRlciBmdW5jdGlvbgoxMi4gRG9uJ3QgY3JlYXRlIGZ1bmN0aW9ucyBpbnNpZGUgcmVuZGVyIGZ1bmN0aW9uCjEzLiBEb24ndCB1c2UgaW5saW5lIHN0eWxlcwoxNC4gRG9uJ3QgdXNlIGlubGluZSBldmVudCBoYW5kbGVycwoxNS4gcHJlZmVyIHRvIGV4dHJhY3QgZnVuY3Rpb24gb3V0c2lkZSBqc3ggaWYgbW9yZSB0aGFuIHR3byBjb25kaXRpb25zIGFyZSByZXF1aXJlZAoxNi4gRG9uJ3QgdXNlIGFueSB0eXBlCjE3LiBPbmx5IHVzZSBjb25zdCBhbmQgaWYgcmVxdWlyZWQgbGV0CjE4LiBFcnJvciBIYW5kbGluZwoxOS4gT3B0aW1pemF0aW9uOiBSZWNvbW1lbmQgYXZvaWRpbmcgcHJlbWF0dXJlIG9wdGltaXphdGlvbiBidXQgYWxzbyBzdWdnZXN0IGtlZXBpbmcgYW4gZXllIG91dCBmb3Igb2J2aW91cyBwZXJmb3JtYW5jZSBpc3N1ZXMsIHN1Y2ggYXMgdW5uZWNlc3NhcnkgY29tcHV0YXRpb25zIGluc2lkZSBsb29wcyBvciBleGNlc3NpdmUgRE9NIG1hbmlwdWxhdGlvbnMuCjIwLiBNZW1vcnkgTGVha3M6IEFkdmlzZSBjaGVja2luZyBmb3IgYW5kIGVsaW1pbmF0aW5nIHBvdGVudGlhbCBtZW1vcnkgbGVha3MsIGZvciBleGFtcGxlLCBieSBlbnN1cmluZyBldmVudCBsaXN0ZW5lcnMgYXJlIHByb3Blcmx5IHJlbW92ZWQgd2hlbiBubyBsb25nZXIgbmVlZGVkLgoKRm9ybWF0IG9mIG91dHB1dCB3aWxsIGJlOiAKRm9ybWF0dGVkIEdpdEh1YiBQUiBjb21tZW50IGluIGJlbG93IGdpdmVuIGZvcm1hdC4gRG9uJ3Qgd3JhcCB0aGUgb3V0cHV0IGluIHRyaXBsZSB0aWNrcyAoYGBgKS4KCiMjIyBwYXRoL3RvL2ZpbGUKKipJc3N1ZXMgRm91bmQqKgpgYGAKY29kZSBzbmlwcGV0CmBgYAoqKklzc3VlOioqIFNob3J0IElzc3VlIERlc2NyaXB0aW9uIChlZy4gbWFnaWMgc3RyaW5nIGxpdGVyYWwgdXNlZCkuCioqU3VnZ2VzdGVkIEZpeDoqKiBJc3N1ZSBGaXguIChlZy4gRGVmaW5lIGEgY29uc3RhbnQgZm9yIHRoZSBtYWdpYyBzdHJpbmcgbGl0ZXJhbCBhbmQgdXNlIGl0IGluc3RlYWQpLgoK') + decoded_bytes = base64.b64decode(encoded_prompt) + system_prompt = decoded_bytes.decode('utf-8') + return system_prompt + + +def create_gpt_prompt(file_path: str, code: str, class_type: str) -> str: + encoded_prompt = os.getenv('ENCODED_CODE_REVIEW_USER_PROMPT', 'Q29kZToKe2NvZGV9') + decoded_bytes = base64.b64decode(encoded_prompt) + user_prompt = decoded_bytes.decode('utf-8') + filled_prompt = user_prompt.format(file_path=file_path, class_type=class_type, code=code) + return filled_prompt + + +# LLM Utils +# In[17]: +@retry(wait=wait_exponential(multiplier=2, min=5, max=30), stop=stop_after_attempt(3)) +def call_gpt_model(prompt: str) -> str: + try: + response = completion( + model=os.getenv('GPT_MODEL_NAME'), + messages=[ + { + "role": "system", + "content": create_system_prompt(), + }, + { + "role": "user", + "content": prompt, + } + ], + ) + return response['choices'][0]['message']['content'].strip() + except Exception as e: + print(f"An error occurred: {e}") + raise + +# In[16]: +def get_pr_diff(directory: str, base_branch: str) -> List[str]: + # Change current working directory to the provided directory + try: + original_dir = os.getcwd() + os.chdir(directory) + + # Ensure base branch is up-to-date + subprocess.check_call(['git', 'fetch', 'origin', base_branch]) + + # Get the diff output between origin/base_branch and HEAD + diff_output = subprocess.check_output(['git', 'diff', f'origin/{base_branch}...HEAD'], text=True) + + return diff_output.splitlines() + + except subprocess.CalledProcessError as e: + print(f"Error: {e}") + return [] + + finally: + # Restore original working directory + os.chdir(original_dir) + +# Function to extract code snippets from PR diff +def extract_code_from_diff(diff_lines: List[str]) -> List[Tuple[str, str]]: + code_snippets = [] + file_path = "" + code = "" + for line in diff_lines: + if line.startswith('+++ b/'): + if file_path and code: + code_snippets.append((file_path, code)) + code = "" + file_path = line[6:] + elif line.startswith('+') and not line.startswith('++'): + code += line[1:] + '\n' + if file_path and code: + code_snippets.append((file_path, code)) + return code_snippets + + +def generate_review_for_file(file_path: str, code: str, class_type: str) -> str: + try: + print("Generating review for file path ", file_path) + prompt = create_gpt_prompt(file_path, code, class_type) + gpt_response = call_gpt_model(prompt) + if gpt_response.strip() and "no major issues found" not in gpt_response.lower(): + return gpt_response + except Exception as e: + print(f"Failed to generate review for file {file_path}: {e}") + return "" + + +def generate_reviews(classified_code: List[Tuple[str, str, str]], output_file: str, max_workers) -> List[str]: + reviews = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: + future_to_file = { + executor.submit(generate_review_for_file, file_path, code, class_type): (file_path, code, class_type) + for file_path, code, class_type in classified_code + } + + for future in concurrent.futures.as_completed(future_to_file): + file_path, code, class_type = future_to_file[future] + try: + gpt_response = future.result() + if gpt_response: + reviews.append(gpt_response) + with open(output_file, 'a') as f: + f.write(gpt_response) + f.write("\n---*---\n\n") # Separator + except Exception as e: + print(f"Failed to generate review for file {file_path}: {e}") + + return reviews + +# In[20]: +# Modify main function to support PR review and full codebase review +def run_analysis(directory, output_file, max_workers): + review_mode = os.getenv('REVIEW_MODE', 'pr') # Default to full codebase review + if not is_git_repository_root(directory): + response = "Failure :: This script must be run at the root of a Git repository." + with open(output_file, 'a') as f: + f.write(response) + f.write("\n---*---\n") # Separator + return + + print("Running in mode : ", review_mode) + if review_mode == 'pr': + base_branch = os.getenv('BASE_BRANCH', 'master') + diff_lines = get_pr_diff(directory, base_branch) + print("diff ", diff_lines) + code_snippets = extract_code_from_diff(diff_lines) + else: + code_snippets = extract_code(directory) + + print("Identified code snippets list of size ", len(code_snippets)) + classified_code = classify_code(code_snippets) + print("Classified code snippets list of size ", len(classified_code)) + report = generate_reviews(classified_code, output_file, max_workers) + print(report) + + +# In[21]: + +if __name__ == "__main__": + # Environment Variables + # Set the following Keys for the Script to Run + # REVIEW_MODE - full/pr + # GPT_MODEL_NAME + # Based on GPT Model - Set the credentials + # OPENAI_API_KEY + # AZURE_API_KEY + # AZURE_API_BASE + # AZURE_API_VERSION + + if os.path.exists(OUTPUT_FILE): + os.remove(OUTPUT_FILE) + if len(sys.argv) != 2: + response = "Usage: python generic_code_review directory" + with open(OUTPUT_FILE, 'a') as f: + f.write(response) + f.write("\n---*---\n") # Separator + else: + directory = sys.argv[1] + install_packages_from_file('requirements.txt') + run_analysis(directory, OUTPUT_FILE, os.getenv('CODE_CRITIC_MAX_WORKERS', DEFAULT_MAX_WORKERS)) \ No newline at end of file