Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support determining activity date from default branch #79

Merged
merged 3 commits into from
Feb 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

This project identifies and reports repositories with no activity for configurable amount of time, in order to surface inactive repos to be considered for archival.
The current approach assumes that the repos that you want to evaluate are available in a single GitHub organization.
For the purpose of this action, a repository is considered inactive if it has not had a `push` in a configurable amount of days.
For the purpose of this action, a repository is considered inactive if it has not had a `push` in a configurable amount of days (can also be configured to determine activity based on default branch. See `ACTIVITY_METHOD` for more details.).

This action was developed by GitHub so that we can keep our open source projects well maintained, and it was made open source in the hopes that it would help you too!
We are actively using and are archiving things in batches since there are many repositories on our report.
Expand All @@ -30,12 +30,13 @@ Below are the allowed configuration options:

| field | required | default | description |
|-----------------------|----------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `GH_TOKEN` | true | | The GitHub Token used to scan repositories. Must have read access to all repositories you are interested in scanning |
| `ORGANIZATION` | false | | The organization to scan for stale repositories. If no organization is provided, this tool will search through repositories owned by the GH_TOKEN owner |
| `INACTIVE_DAYS` | true | | The number of days used to determine if repository is stale, based on `push` events |
| `EXEMPT_TOPICS` | false | | Comma separated list of topics to exempt from being flagged as stale |
| `EXEMPT_REPOS` | false | | Comma separated list of repositories to exempt from being flagged as stale. Supports Unix shell-style wildcards. ie. `EXEMPT_REPOS = "stale-repos,test-repo,conf-*"` |
| `GH_ENTERPRISE_URL` | false | `""` | URL of GitHub Enterprise instance to use for auth instead of github.com |
| `GH_TOKEN` | true | | The GitHub Token used to scan repositories. Must have read access to all repositories you are interested in scanning |
| `ORGANIZATION` | false | | The organization to scan for stale repositories. If no organization is provided, this tool will search through repositories owned by the GH_TOKEN owner |
| `INACTIVE_DAYS` | true | | The number of days used to determine if repository is stale, based on `push` events |
| `EXEMPT_TOPICS` | false | | Comma separated list of topics to exempt from being flagged as stale |
| `EXEMPT_REPOS` | false | | Comma separated list of repositories to exempt from being flagged as stale. Supports Unix shell-style wildcards. ie. `EXEMPT_REPOS = "stale-repos,test-repo,conf-*"` |
| `GH_ENTERPRISE_URL` | false | `""` | URL of GitHub Enterprise instance to use for auth instead of github.com |
| `ACTIVITY_METHOD` | false | `"pushed"` | How to get the last active date of the repository. Defaults to `pushed`, which is the last time any branch had a push. Can also be set to `default_branch_updated` to instead measure from the latest commit on the default branch (good for filtering out dependabot ) |

### Example workflow

Expand All @@ -62,6 +63,7 @@ jobs:
ORGANIZATION: ${{ secrets.ORGANIZATION }}
EXEMPT_TOPICS: "keep,template"
INACTIVE_DAYS: 365
ACTIVITY_METHOD: "pushed"

# This next step updates an existing issue. If you want a new issue every time, remove this step and remove the `issue-number: ${{ env.issue_number }}` line below.
- name: Check for the stale report issue
Expand Down
41 changes: 34 additions & 7 deletions stale_repos.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,16 +125,15 @@ def get_inactive_repos(github_connection, inactive_days_threshold, organization)
if is_repo_exempt(repo, exempt_repos, exempt_topics):
continue

# Get last push date
last_push_str = repo.pushed_at # type: ignore
if last_push_str is None:
# Get last active date
active_date = get_active_date(repo)
if active_date is None:
continue
last_push = parse(last_push_str)
last_push_disp_date = last_push.date().isoformat()

days_inactive = (datetime.now(timezone.utc) - last_push).days
active_date_disp = active_date.date().isoformat()
days_inactive = (datetime.now(timezone.utc) - active_date).days
if days_inactive > int(inactive_days_threshold) and not repo.archived:
inactive_repos.append((repo.html_url, days_inactive, last_push_disp_date))
inactive_repos.append((repo.html_url, days_inactive, active_date_disp))
print(f"{repo.html_url}: {days_inactive} days inactive") # type: ignore
if organization:
print(f"Found {len(inactive_repos)} stale repos in {organization}")
Expand All @@ -143,6 +142,34 @@ def get_inactive_repos(github_connection, inactive_days_threshold, organization)
return inactive_repos


def get_active_date(repo):
"""Get the last activity date of the repository.

Args:
repo: A Github repository object.

Returns:
A date object representing the last activity date of the repository.
"""
activity_method = os.getenv("ACTIVITY_METHOD", "pushed").lower()
if activity_method == "default_branch_updated":
commit = repo.branch(repo.default_branch).commit
active_date = parse(commit.commit.as_dict()["committer"]["date"])
elif activity_method == "pushed":
last_push_str = repo.pushed_at # type: ignored
if last_push_str is None:
return None
active_date = parse(last_push_str)
else:
raise ValueError(
f"""
ACTIVITY_METHOD environment variable has unsupported value: '{activity_method}'.
Allowed values are: 'pushed' and 'default_branch_updated'
"""
)
return active_date


def write_to_markdown(inactive_repos, inactive_days_threshold, file=None):
"""Write the list of inactive repos to a markdown file.

Expand Down
66 changes: 66 additions & 0 deletions test_stale_repos.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,72 @@ def test_get_inactive_repos_with_no_organization_set(self):
]
assert inactive_repos == expected_inactive_repos

@patch.dict(os.environ, {"ACTIVITY_METHOD": "default_branch_updated"})
def test_get_inactive_repos_with_default_branch_updated(self):
"""Test that get_inactive_repos works with alternative method.

This test uses a MagicMock object to simulate a GitHub API connection with a list
of repositories with varying levels of inactivity. It then calls the get_inactive_repos
function with the mock GitHub API connection, a threshold of 30 days, and the
default_branch_updated setting. It mocks the branch method on the repo object to return
the necessary data for the active_date determination Finally, it checks that the function
returns the expected list of inactive repos.

"""
# Create a MagicMock object to simulate a GitHub API connection
mock_github = MagicMock()

# Create a MagicMock object to simulate the organization object returned by the
# GitHub API connection
mock_org = MagicMock()

# Create MagicMock objects to simulate the repositories returned by the organization object
forty_days_ago = datetime.now(timezone.utc) - timedelta(days=40)
twenty_days_ago = datetime.now(timezone.utc) - timedelta(days=20)
mock_repo1 = MagicMock(
html_url="https://github.com/example/repo1",
default_branch="master",
archived=False,
)
mock_repo1.topics().names = []
mock_repo1.branch().commit.commit.as_dict = MagicMock(
return_value={"committer": {"date": twenty_days_ago.isoformat()}}
)
mock_repo2 = MagicMock(
html_url="https://github.com/example/repo2",
archived=False,
)
mock_repo2.topics().names = []
mock_repo2.branch().commit.commit.as_dict = MagicMock(
return_value={"committer": {"date": forty_days_ago.isoformat()}}
)
mock_repo3 = MagicMock(
html_url="https://github.com/example/repo3",
archived=True,
)
mock_repo3.topics().names = []
mock_repo3.branch().commit.commit.as_dict = MagicMock(
return_value={"committer": {"date": forty_days_ago.isoformat()}}
)

# Set up the MagicMock objects to return the expected values when called
mock_github.organization.return_value = mock_org
mock_org.repositories.return_value = [
mock_repo1,
mock_repo2,
mock_repo3,
]

# Call the get_inactive_repos function with the mock GitHub API
# connection and a threshold of 30 days
inactive_repos = get_inactive_repos(mock_github, 30, "example")

# Check that the function returns the expected list of inactive repos
expected_inactive_repos = [
("https://github.com/example/repo2", 40, forty_days_ago.date().isoformat()),
]
assert inactive_repos == expected_inactive_repos


class WriteToMarkdownTestCase(unittest.TestCase):
"""
Expand Down