Blame view

scripts/mailmapper 4.71 KB
94b13bbae   Masahiro Yamada   host-tools: use p...
1
  #!/usr/bin/env python2
6bfa0367a   Heinrich Schuchardt   scripts: mailmapp...
2
  # SPDX-License-Identifier: GPL-2.0+
45765eeda   Masahiro Yamada   scripts: add mail...
3
4
  #
  # Copyright (C) 2014, Masahiro Yamada <yamada.m@jp.panasonic.com>
45765eeda   Masahiro Yamada   scripts: add mail...
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
  
  '''
  A tool to create/update the mailmap file
  
  The command 'git shortlog' summarizes git log output in a format suitable
  for inclusion in release announcements. Each commit will be grouped by
  author and title.
  
  One problem is that the authors' name and/or email address is sometimes
  spelled differently. The .mailmap feature can be used to coalesce together
  commits by the same persion.
  (See 'man git-shortlog' for furthur information of this feature.)
  
  This tool helps to create/update the mailmap file.
  
  It runs 'git shortlog' internally and searches differently spelled author
  names which share the same email address. The author name with the most
  commits is asuumed to be a canonical real name. If the number of commits
  from the cananonical name is equal to or greater than 'MIN_COMMITS',
  the entry for the cananical name will be output. ('MIN_COMMITS' is used
  here because we do not want to create a fat mailmap by adding every author
  with only a few commits.)
  
  If there exists a mailmap file specified by the mailmap.file configuration
  options or '.mailmap' at the toplevel of the repository, it is used as
  a base file. (The mailmap.file configuration takes precedence over the
  '.mailmap' file if both exist.)
  
  The base file and the newly added entries are merged together and sorted
  alphabetically (but the comment block is kept untouched), and then printed
  to standard output.
  
  Usage
  -----
  
    scripts/mailmapper
  
  prints the mailmapping to standard output.
  
    scripts/mailmapper > tmp; mv tmp .mailmap
  
  will be useful for updating '.mailmap' file.
  '''
  
  import sys
  import os
  import subprocess
  
  # The entries only for the canonical names with MIN_COMMITS or more commits.
  # This limitation is used so as not to create a too big mailmap file.
  MIN_COMMITS = 50
  
  try:
      toplevel = subprocess.check_output(['git', 'rev-parse', '--show-toplevel'])
  except subprocess.CalledProcessError:
31e2141d5   Masahiro Yamada   tools, scripts: r...
60
      sys.exit('Please run in a git repository.')
45765eeda   Masahiro Yamada   scripts: add mail...
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
  
  # strip '
  '
  toplevel = toplevel.rstrip()
  
  # Change the current working directory to the toplevel of the respository
  # for our easier life.
  os.chdir(toplevel)
  
  # First, create 'auther name' vs 'number of commits' database.
  # We assume the name with the most commits as the canonical real name.
  shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n'])
  
  commits_per_name = {}
  
  for line in shortlog.splitlines():
      try:
          commits, name = line.split(None, 1)
      except ValueError:
          # ignore lines with an empty author name
          pass
      commits_per_name[name] = int(commits)
  
  # Next, coalesce the auther names with the same email address
  shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n', '-e'])
  
  mail_vs_name = {}
  output = {}
  
  for line in shortlog.splitlines():
      # tmp, mail = line.rsplit(None, 1) is not safe
      # because weird email addresses might include whitespaces
      tmp, mail = line.split('<')
      mail = '<' + mail.rstrip()
      try:
          _, name = tmp.rstrip().split(None, 1)
      except ValueError:
          # author name is empty
          name = ''
      if mail in mail_vs_name:
          # another name for the same email address
          prev_name = mail_vs_name[mail]
          # Take the name with more commits
          major_name = sorted([prev_name, name],
                              key=lambda x: commits_per_name[x] if x else 0)[1]
          mail_vs_name[mail] = major_name
          if commits_per_name[major_name] > MIN_COMMITS:
              output[mail] = major_name
      else:
          mail_vs_name[mail] = name
  
  # [1] If there exists a mailmap file at the location pointed to
  #     by the mailmap.file configuration option, update it.
  # [2] If the file .mailmap exists at the toplevel of the repository, update it.
  # [3] Otherwise, create a new mailmap file.
  mailmap_files = []
  
  try:
      config_mailmap = subprocess.check_output(['git', 'config', 'mailmap.file'])
  except subprocess.CalledProcessError:
      config_mailmap = ''
  
  config_mailmap = config_mailmap.rstrip()
  if config_mailmap:
      mailmap_files.append(config_mailmap)
  
  mailmap_files.append('.mailmap')
  
  infile = None
  
  for map_file in mailmap_files:
      try:
          infile = open(map_file)
      except:
          # Failed to open. Try next.
          continue
      break
  
  comment_block = []
  output_lines = []
  
  if infile:
      for line in infile:
          if line[0] == '#' or line[0] == '
  ':
              comment_block.append(line)
          else:
              output_lines.append(line)
              break
      for line in infile:
          output_lines.append(line)
      infile.close()
  
  for mail, name in output.items():
      output_lines.append(name + ' ' + mail + '
  ')
  
  output_lines.sort()
  
  sys.stdout.write(''.join(comment_block + output_lines))