diff --git a/.gitignore b/.gitignore index 9f7550b..68bc17f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,160 @@ -__pycache__ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env .venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/requirements.txt b/requirements.txt index bdd037a..44bb4cd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ pandas numpy -GitPython \ No newline at end of file +GitPython +typer[all] +calmap \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..74ab847 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,30 @@ +[metadata] +name = gitstats +version = attr: gitstats.__version__ +author = Samuel Ortion +author_email = samuel+git@ortion.fr +keywords = git, contributions +license = GPLv3 +license_files = LICENSE +long_description = file: README.md +long_description_content_type = text/markdown +classifiers = + Development Status :: 3 - Alpha + Intended Audience :: Developers + License :: OSI Approved :: GNU General Public License v3 + Programming Language :: Python :: 3 + Topic :: Scientific/Engineering :: Bio-Informatics + +[options] +packages = find: +package_dir = + =src +python_require = >= 3.7 # What is the minimum version of python required? +install_required = file: requirements.txt + +[options.entry_points] +console_scripts = + git.stats=gitstats.__main__:main + +[options.packages.find] +where = src diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..229b2eb --- /dev/null +++ b/setup.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 + +from setuptools import setup + +setup() diff --git a/src/gitstats/__init__.py b/src/gitstats/__init__.py new file mode 100644 index 0000000..a68927d --- /dev/null +++ b/src/gitstats/__init__.py @@ -0,0 +1 @@ +__version__ = "0.1.0" \ No newline at end of file diff --git a/src/gitstats/__main__.py b/src/gitstats/__main__.py new file mode 100644 index 0000000..fb3258a --- /dev/null +++ b/src/gitstats/__main__.py @@ -0,0 +1,57 @@ +import logging +from collections import defaultdict + +import typer +from typing import Optional +from git import Repo +import matplotlib.pyplot as plt +import pandas as pd + +from . import commits +from . import plots + +app = typer.Typer() + + +@app.command() +def count(dir_name: str, year: int = 2023, author_name: str = None, output_csv: Optional[str] = None) -> pd.DataFrame: + # Count the number of commits each days for git repositories in the folder + commit_date_counter = defaultdict(int) + for repo_dir in commits.iter_repos(dir_name): + repo = Repo(repo_dir) + if repo.heads != []: + repo_commit_date_counter = commits.repo_daily_commits(repo, year, author_name) + for key, count in repo_commit_date_counter.items(): + commit_date_counter[key] += 1 + # Construct a pandas DataFrame with this data + df = pd.DataFrame({"date": commit_date_counter.keys(), "count": commit_date_counter.values()}) + df["date"] = pd.to_datetime(df["date"]) + + if output_csv is not None: + df.to_csv(output_csv, sep=";", index=False) + return df + +@app.command() +def plot(input_csv: str, output_plot: Optional[str] = None): + df = pd.read_csv(input_csv, sep=";") + df["date"] = pd.to_datetime(df["date"]) + plots.year_of_contributions(df) + plt.show() + + +@app.command() +def pipe(dir: str = ".", author: Optional[str] = None, year: Optional[int] = None, output_csv: Optional[str] = None, output_plot: Optional[str] = None): + df = count(dir_name=dir, year=year, author_name=author, output_csv=output_csv) + if output_csv: + df.to_csv(output_csv, sep=";") + plots.year_of_contributions(df) + if output_plot: + plt.savefig(output_plot) + + +def main(): + app() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/gitstats/commits.py b/src/gitstats/commits.py index e812192..02ede41 100644 --- a/src/gitstats/commits.py +++ b/src/gitstats/commits.py @@ -2,24 +2,56 @@ Generate a CSV file with daily stats commit count from a set of repositories """ -from git import Repo import datetime +import os +import logging +from collections import defaultdict +from typing import Iterable, Optional + +from git import Repo REPO = "." -repo = Repo(REPO) +logger = logging.getLogger(__name__) -def repo_daily_commits(repo: Repo, year: int): +def wrapper(gen: Iterable, repo: Repo): + while True: + try: + yield next(gen) + except StopIteration: + break + except Exception as e: + logger.error(e) + logger.error(f"Occured in this repo: {repo.working_dir}") + logger.error("Will probably omit anterior commits") + +def repo_daily_commits(repo: Repo, year: Optional[int] = None, author_name: Optional[str] = None): """Count how many commit was performed for each days in the year""" - data = {} - for commit in repo.iter_commits(all=True): - timestamp = commit.authored_date - date = datetime.date.fromtimestamp(timestamp) - if int(date.year) != year: - continue - if date not in data: - data[date] = 0 - data[date] += 1 + if author_name is not None: + author_name = author_name.lower() + other_author_names: set = set() + data = defaultdict(int) + for commit in wrapper(repo.iter_commits(all=True), repo): + if author_name is not None: + commit_author: str = commit.author.name + if commit_author.lower() != author_name: + other_author_names.add(commit_author) + commit_timestamp: int = commit.authored_date + commit_date: datetime.date = datetime.date.fromtimestamp( + commit_timestamp) + if year is not None: + if commit_date.year != year: + continue + data[commit_date] += 1 + if author_name is not None: + # Warn for all other author names + for other_author_name in other_author_names: + logger.info(f"Other commit author in this folder of repositories:\t{other_author_name}") return data -print(repo_daily_commits(repo, 2023)) \ No newline at end of file +def iter_repos(dir_name: str): + pys = [] + for root, dirs, files in os.walk(dir_name): + for directory in dirs: + if directory == '.git': + yield os.path.join(root, directory) diff --git a/src/gitstats/plots.py b/src/gitstats/plots.py new file mode 100644 index 0000000..0cfdbfc --- /dev/null +++ b/src/gitstats/plots.py @@ -0,0 +1,20 @@ +"""Plot contribution heatmap using calmap + +ref. https://builtin.com/data-science/github-contribution-plot""" + +import calmap +import pandas as pd +import matplotlib.pyplot as plt + +def year_of_contributions(df: pd.DataFrame): + events = pd.Series(df["count"], index=df["date"]) + fig = plt.figure(figsize=(20,8)) + ax = fig.add_subplot(111) + cax = calmap.yearplot(events, # cmap='YlGn', + fillcolor='lightgrey', + daylabels="MTWTFSS", + dayticks=[0, 2, 4, 6], + linewidth=2, + ax=ax + ) + # fig.colorbar(cax.get_children()[1], ax=cax, orientation='horizontal') # FIXME: this is ugly. \ No newline at end of file diff --git a/tmp.csv b/tmp.csv new file mode 100644 index 0000000..5963c24 --- /dev/null +++ b/tmp.csv @@ -0,0 +1,198 @@ +;date;count +0;2023-04-12;5 +1;2023-01-12;3 +2;2023-03-28;4 +3;2023-01-28;3 +4;2023-01-16;1 +5;2023-01-13;2 +6;2023-03-29;6 +7;2023-04-23;4 +8;2023-02-23;2 +9;2023-02-02;3 +10;2023-01-15;5 +11;2023-01-14;4 +12;2023-08-26;4 +13;2023-05-29;4 +14;2023-03-25;2 +15;2023-01-08;3 +16;2023-01-06;4 +17;2023-01-05;3 +18;2023-01-04;3 +19;2023-06-23;2 +20;2023-06-19;2 +21;2023-06-15;5 +22;2023-05-31;4 +23;2023-05-28;2 +24;2023-05-17;3 +25;2023-05-14;3 +26;2023-05-08;4 +27;2023-05-07;4 +28;2023-05-06;2 +29;2023-04-19;2 +30;2023-04-18;3 +31;2023-04-09;4 +32;2023-04-01;3 +33;2023-03-12;3 +34;2023-03-11;2 +35;2023-03-05;2 +36;2023-01-19;1 +37;2023-04-03;3 +38;2023-02-07;3 +39;2023-02-04;4 +40;2023-02-18;3 +41;2023-02-17;4 +42;2023-01-23;2 +43;2023-01-17;3 +44;2023-01-09;2 +45;2023-08-11;2 +46;2023-07-25;2 +47;2023-08-07;1 +48;2023-08-05;2 +49;2023-07-31;3 +50;2023-07-19;1 +51;2023-07-13;1 +52;2023-07-12;1 +53;2023-07-04;3 +54;2023-06-14;1 +55;2023-06-11;2 +56;2023-06-01;2 +57;2023-05-15;1 +58;2023-05-12;1 +59;2023-05-27;2 +60;2023-05-26;3 +61;2023-05-22;3 +62;2023-05-20;2 +63;2023-05-16;2 +64;2023-05-13;1 +65;2023-05-09;3 +66;2023-05-01;4 +67;2023-04-29;2 +68;2023-04-10;3 +69;2023-04-20;3 +70;2023-04-25;3 +71;2023-04-11;4 +72;2023-04-04;4 +73;2023-03-31;3 +74;2023-03-23;2 +75;2023-03-13;1 +76;2023-03-22;1 +77;2023-03-16;2 +78;2023-03-15;1 +79;2023-03-14;2 +80;2023-03-20;2 +81;2023-03-08;1 +82;2023-02-28;2 +83;2023-02-27;3 +84;2023-02-26;3 +85;2023-02-22;1 +86;2023-02-20;2 +87;2023-02-19;6 +88;2023-02-13;3 +89;2023-02-10;3 +90;2023-02-08;3 +91;2023-02-06;2 +92;2023-02-05;8 +93;2023-02-03;2 +94;2023-02-01;2 +95;2023-01-30;1 +96;2023-01-29;3 +97;2023-01-27;1 +98;2023-01-22;2 +99;2023-01-21;1 +100;2023-01-18;2 +101;2023-07-06;2 +102;2023-04-21;4 +103;2023-02-12;2 +104;2023-01-24;1 +105;2023-08-19;1 +106;2023-06-29;1 +107;2023-06-07;3 +108;2023-08-04;2 +109;2023-08-28;2 +110;2023-08-25;2 +111;2023-08-23;1 +112;2023-04-07;4 +113;2023-05-05;1 +114;2023-04-26;2 +115;2023-03-04;1 +116;2023-03-03;3 +117;2023-03-02;1 +118;2023-04-16;2 +119;2023-04-08;3 +120;2023-04-06;2 +121;2023-01-02;3 +122;2023-08-01;2 +123;2023-07-28;1 +124;2023-07-24;3 +125;2023-07-21;2 +126;2023-07-17;1 +127;2023-07-14;1 +128;2023-07-03;2 +129;2023-06-26;2 +130;2023-06-22;1 +131;2023-06-12;1 +132;2023-06-08;1 +133;2023-06-03;2 +134;2023-05-23;3 +135;2023-05-21;2 +136;2023-05-10;1 +137;2023-05-02;2 +138;2023-04-28;1 +139;2023-04-22;2 +140;2023-04-17;2 +141;2023-04-15;3 +142;2023-04-05;2 +143;2023-04-02;2 +144;2023-03-30;2 +145;2023-03-27;4 +146;2023-03-26;4 +147;2023-03-24;2 +148;2023-03-10;1 +149;2023-02-25;2 +150;2023-02-24;2 +151;2023-02-21;1 +152;2023-02-15;2 +153;2023-02-14;1 +154;2023-01-26;1 +155;2023-01-11;3 +156;2023-01-10;2 +157;2023-01-03;3 +158;2023-01-01;1 +159;2023-07-29;5 +160;2023-09-01;4 +161;2023-08-31;1 +162;2023-08-29;2 +163;2023-08-15;1 +164;2023-09-02;3 +165;2023-02-11;2 +166;2023-05-19;1 +167;2023-06-06;2 +168;2023-07-27;1 +169;2023-06-21;1 +170;2023-06-05;1 +171;2023-06-04;1 +172;2023-05-03;1 +173;2023-04-24;2 +174;2023-04-14;2 +175;2023-04-13;1 +176;2023-02-09;2 +177;2023-08-09;1 +178;2023-08-08;2 +179;2023-08-06;1 +180;2023-08-12;1 +181;2023-08-03;1 +182;2023-07-30;1 +183;2023-07-23;1 +184;2023-07-05;1 +185;2023-06-25;1 +186;2023-06-18;1 +187;2023-06-17;1 +188;2023-05-30;1 +189;2023-05-11;1 +190;2023-03-09;1 +191;2023-03-06;1 +192;2023-02-16;1 +193;2023-01-20;1 +194;2023-07-16;1 +195;2023-07-08;1 +196;2023-08-24;2