Skip to main content

recent-github-commits.ipynb (Source)

List all commits in git repositories since a given time

This script will find all git repositories under a given root directory, and then list all commits made to those repositories since a given time.

Repositories are identified by the GitHub remote URL: when you have more than one copy of a repository, the one closest to the root will be used and the rest will be ignored.

In [ ]:
from pathlib import Path
from datetime import datetime, timezone

# Only show commits after this time.
last_blog = datetime(2023,1,1, tzinfo=timezone.utc)

# The directory to look in.
root = Path('/home/christian/numbas')

# Organisations whose repositories should be included. Repositories belonging to any other organisation will be ignored.
want_organisations = {'numbas', 'christianp'}

# Repositories to ignore, in the form `(organisation, repository)`
ignore_repos = []
In [ ]:
def find_git_repos(root):
    queue = [root]
    while len(queue):
        path = queue.pop()
            if (path / '.git').exists():
                yield path

            for p in path.iterdir():
                if not p.is_dir() or p.is_symlink() or'.'):
        except PermissionError:
all_repos = sorted(find_git_repos(root),key=lambda p: len(p.parents))
In [ ]:
import configparser
import re

repos = {}
all_organisations = set()

for r in all_repos:
    cp = configparser.ConfigParser() / '.git' / 'config')
    for sname in cp.sections():
        m = re.match(r'remote "(.*?)"',sname)
        if not m:
        remote =
        remote_url = cp[sname]['url']
        m = re.match(r'(?:git@.*:|<org>.*)/(?P<repo>.*)$',remote_url)
        if not m:
        organisation ='org')
        repo ='repo')
        if organisation not in want_organisations:
        key = (organisation, repo)
        if key not in repos and key not in ignore_repos:
            repos[key] = r
print("Found repos belonging to the following organisations:")
In [ ]:
import subprocess
delimiter = '\t!\t'
commits = []
for ((org,repo),r) in repos.items():
    logs =['git','log',f'--format=format:%H{delimiter}%aI{delimiter}%s'],cwd=r, capture_output=True, encoding='utf-8').stdout.split('\n')
    for sha, datestr, message in [x.split(delimiter) for x in logs]:
        date = datetime.fromisoformat(datestr)
        if date < last_blog:
        commits.append( (org,repo,date,sha,message) )
commits.sort(key=lambda x:x[2])
In [ ]:
from IPython.display import HTML

table = ''
for org,repo,date,sha,message in commits:
    url = f'''{org}/{repo}/commit/{sha}'''
    table += f'''<tr>
    <td><a href="{url}" target="_blank">{sha[:8]}</a></td>
    <td style="text-align: left; width: 40em;font-family: monospace;">{message}</td>