recent-github-commits.ipynb (Source)
List all commits in git repositories since a given time¶
This script will find all git repositories under a given root directory, and then list all commits made to those repositories since a given time.
Repositories are identified by the GitHub remote URL: when you have more than one copy of a repository, the one closest to the root will be used and the rest will be ignored.
In [ ]:
from pathlib import Path
from datetime import datetime, timezone
# Only show commits after this time.
last_blog = datetime(2023,1,1, tzinfo=timezone.utc)
# The directory to look in.
root = Path('/home/christian/numbas')
# Organisations whose repositories should be included. Repositories belonging to any other organisation will be ignored.
want_organisations = {'numbas', 'christianp'}
# Repositories to ignore, in the form `(organisation, repository)`
ignore_repos = []
In [ ]:
def find_git_repos(root):
queue = [root]
while len(queue):
path = queue.pop()
try:
if (path / '.git').exists():
yield path
for p in path.iterdir():
if not p.is_dir() or p.is_symlink() or p.name.startswith('.'):
continue
queue.append(p)
except PermissionError:
continue
all_repos = sorted(find_git_repos(root),key=lambda p: len(p.parents))
In [ ]:
import configparser
import re
repos = {}
all_organisations = set()
for r in all_repos:
cp = configparser.ConfigParser()
cp.read(r / '.git' / 'config')
for sname in cp.sections():
m = re.match(r'remote "(.*?)"',sname)
if not m:
continue
remote = m.group(1)
remote_url = cp[sname]['url']
#print(remote_url)
m = re.match(r'(?:git@.*:|https://github.com/)(?P<org>.*)/(?P<repo>.*)$',remote_url)
if not m:
continue
organisation = m.group('org')
all_organisations.add(organisation)
repo = m.group('repo')
if organisation not in want_organisations:
continue
key = (organisation, repo)
#print(key)
if key not in repos and key not in ignore_repos:
repos[key] = r
break
print("Found repos belonging to the following organisations:")
all_organisations
In [ ]:
import subprocess
delimiter = '\t!\t'
commits = []
for ((org,repo),r) in repos.items():
logs = subprocess.run(['git','log',f'--format=format:%H{delimiter}%aI{delimiter}%s'],cwd=r, capture_output=True, encoding='utf-8').stdout.split('\n')
for sha, datestr, message in [x.split(delimiter) for x in logs]:
#print(sha)
date = datetime.fromisoformat(datestr)
if date < last_blog:
continue
commits.append( (org,repo,date,sha,message) )
commits.sort(key=lambda x:x[2])
In [ ]:
from IPython.display import HTML
table = ''
for org,repo,date,sha,message in commits:
url = f'''https://github.com/{org}/{repo}/commit/{sha}'''
table += f'''<tr>
<td>{org}/{repo}</td>
<td><a href="{url}" target="_blank">{sha[:8]}</a></td>
<td style="text-align: left; width: 40em;font-family: monospace;">{message}</td>
<td>{date}</td>
</tr>'''
HTML('<table>'+table+'</table>')