Blame view

scripts/spdxcheck.py 10.1 KB
5385a295e   Thomas Gleixner   scripts: Add SPDX...
1
2
3
4
5
6
  #!/usr/bin/env python
  # SPDX-License-Identifier: GPL-2.0
  # Copyright Thomas Gleixner <tglx@linutronix.de>
  
  from argparse import ArgumentParser
  from ply import lex, yacc
bed95c43c   Jeremy Cline   scripts: add Pyth...
7
  import locale
5385a295e   Thomas Gleixner   scripts: Add SPDX...
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
  import traceback
  import sys
  import git
  import re
  import os
  
  class ParserException(Exception):
      def __init__(self, tok, txt):
          self.tok = tok
          self.txt = txt
  
  class SPDXException(Exception):
      def __init__(self, el, txt):
          self.el = el
          self.txt = txt
  
  class SPDXdata(object):
      def __init__(self):
          self.license_files = 0
          self.exception_files = 0
          self.licenses = [ ]
          self.exceptions = { }
  
  # Read the spdx data from the LICENSES directory
  def read_spdxdata(repo):
  
      # The subdirectories of LICENSES in the kernel source
8d7a7abfc   Vincenzo Frascino   spdxcheck.py: fix...
35
36
      # Note: exceptions needs to be parsed as last directory.
      license_dirs = [ "preferred", "dual", "deprecated", "exceptions" ]
fde5e903f   Joe Perches   scripts/spdxcheck...
37
      lictree = repo.head.commit.tree['LICENSES']
5385a295e   Thomas Gleixner   scripts: Add SPDX...
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
  
      spdx = SPDXdata()
  
      for d in license_dirs:
          for el in lictree[d].traverse():
              if not os.path.isfile(el.path):
                  continue
  
              exception = None
              for l in open(el.path).readlines():
                  if l.startswith('Valid-License-Identifier:'):
                      lid = l.split(':')[1].strip().upper()
                      if lid in spdx.licenses:
                          raise SPDXException(el, 'Duplicate License Identifier: %s' %lid)
                      else:
                          spdx.licenses.append(lid)
  
                  elif l.startswith('SPDX-Exception-Identifier:'):
                      exception = l.split(':')[1].strip().upper()
                      spdx.exceptions[exception] = []
  
                  elif l.startswith('SPDX-Licenses:'):
                      for lic in l.split(':')[1].upper().strip().replace(' ', '').replace('\t', '').split(','):
                          if not lic in spdx.licenses:
8d7a7abfc   Vincenzo Frascino   spdxcheck.py: fix...
62
                              raise SPDXException(None, 'Exception %s missing license %s' %(exception, lic))
5385a295e   Thomas Gleixner   scripts: Add SPDX...
63
64
65
66
67
                          spdx.exceptions[exception].append(lic)
  
                  elif l.startswith("License-Text:"):
                      if exception:
                          if not len(spdx.exceptions[exception]):
8d7a7abfc   Vincenzo Frascino   spdxcheck.py: fix...
68
                              raise SPDXException(el, 'Exception %s is missing SPDX-Licenses' %exception)
5385a295e   Thomas Gleixner   scripts: Add SPDX...
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
                          spdx.exception_files += 1
                      else:
                          spdx.license_files += 1
                      break
      return spdx
  
  class id_parser(object):
  
      reserved = [ 'AND', 'OR', 'WITH' ]
      tokens = [ 'LPAR', 'RPAR', 'ID', 'EXC' ] + reserved
  
      precedence = ( ('nonassoc', 'AND', 'OR'), )
  
      t_ignore = ' \t'
  
      def __init__(self, spdx):
          self.spdx = spdx
          self.lasttok = None
          self.lastid = None
          self.lexer = lex.lex(module = self, reflags = re.UNICODE)
          # Initialize the parser. No debug file and no parser rules stored on disk
          # The rules are small enough to be generated on the fly
          self.parser = yacc.yacc(module = self, write_tables = False, debug = False)
          self.lines_checked = 0
          self.checked = 0
          self.spdx_valid = 0
          self.spdx_errors = 0
          self.curline = 0
          self.deepest = 0
  
      # Validate License and Exception IDs
      def validate(self, tok):
          id = tok.value.upper()
          if tok.type == 'ID':
              if not id in self.spdx.licenses:
                  raise ParserException(tok, 'Invalid License ID')
              self.lastid = id
          elif tok.type == 'EXC':
bed95c43c   Jeremy Cline   scripts: add Pyth...
107
              if id not in self.spdx.exceptions:
5385a295e   Thomas Gleixner   scripts: Add SPDX...
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
                  raise ParserException(tok, 'Invalid Exception ID')
              if self.lastid not in self.spdx.exceptions[id]:
                  raise ParserException(tok, 'Exception not valid for license %s' %self.lastid)
              self.lastid = None
          elif tok.type != 'WITH':
              self.lastid = None
  
      # Lexer functions
      def t_RPAR(self, tok):
          r'\)'
          self.lasttok = tok.type
          return tok
  
      def t_LPAR(self, tok):
          r'\('
          self.lasttok = tok.type
          return tok
  
      def t_ID(self, tok):
          r'[A-Za-z.0-9\-+]+'
  
          if self.lasttok == 'EXC':
              print(tok)
              raise ParserException(tok, 'Missing parentheses')
  
          tok.value = tok.value.strip()
          val = tok.value.upper()
  
          if val in self.reserved:
              tok.type = val
          elif self.lasttok == 'WITH':
              tok.type = 'EXC'
  
          self.lasttok = tok.type
          self.validate(tok)
          return tok
  
      def t_error(self, tok):
          raise ParserException(tok, 'Invalid token')
  
      def p_expr(self, p):
          '''expr : ID
                  | ID WITH EXC
                  | expr AND expr
                  | expr OR expr
                  | LPAR expr RPAR'''
          pass
  
      def p_error(self, p):
          if not p:
              raise ParserException(None, 'Unfinished license expression')
          else:
              raise ParserException(p, 'Syntax error')
  
      def parse(self, expr):
          self.lasttok = None
          self.lastid = None
          self.parser.parse(expr, lexer = self.lexer)
  
      def parse_lines(self, fd, maxlines, fname):
          self.checked += 1
          self.curline = 0
          try:
              for line in fd:
3a6ab5c7d   Thierry Reding   scripts/spdxcheck...
172
                  line = line.decode(locale.getpreferredencoding(False), errors='ignore')
5385a295e   Thomas Gleixner   scripts: Add SPDX...
173
174
175
176
177
178
                  self.curline += 1
                  if self.curline > maxlines:
                      break
                  self.lines_checked += 1
                  if line.find("SPDX-License-Identifier:") < 0:
                      continue
959b49687   Thomas Gleixner   scripts/spdxcheck...
179
180
                  expr = line.split(':')[1].strip()
                  # Remove trailing comment closure
a5f4cb428   AurĂ©lien Cedeyn   scripts/spdxcheck...
181
                  if line.strip().endswith('*/'):
959b49687   Thomas Gleixner   scripts/spdxcheck...
182
                      expr = expr.rstrip('*/').strip()
c5c553850   Lukas Bulwahn   scripts/spdxcheck...
183
184
185
                  # Remove trailing xml comment closure
                  if line.strip().endswith('-->'):
                      expr = expr.rstrip('-->').strip()
959b49687   Thomas Gleixner   scripts/spdxcheck...
186
187
188
                  # Special case for SH magic boot code files
                  if line.startswith('LIST \"'):
                      expr = expr.rstrip('\"').strip()
5385a295e   Thomas Gleixner   scripts: Add SPDX...
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
                  self.parse(expr)
                  self.spdx_valid += 1
                  #
                  # Should we check for more SPDX ids in the same file and
                  # complain if there are any?
                  #
                  break
  
          except ParserException as pe:
              if pe.tok:
                  col = line.find(expr) + pe.tok.lexpos
                  tok = pe.tok.value
                  sys.stdout.write('%s: %d:%d %s: %s
  ' %(fname, self.curline, col, pe.txt, tok))
              else:
                  sys.stdout.write('%s: %d:0 %s
  ' %(fname, self.curline, col, pe.txt))
              self.spdx_errors += 1
  
  def scan_git_tree(tree):
      for el in tree.traverse():
          # Exclude stuff which would make pointless noise
          # FIXME: Put this somewhere more sensible
          if el.path.startswith("LICENSES"):
              continue
          if el.path.find("license-rules.rst") >= 0:
              continue
5385a295e   Thomas Gleixner   scripts: Add SPDX...
216
217
          if not os.path.isfile(el.path):
              continue
bed95c43c   Jeremy Cline   scripts: add Pyth...
218
219
          with open(el.path, 'rb') as fd:
              parser.parse_lines(fd, args.maxlines, el.path)
5385a295e   Thomas Gleixner   scripts: Add SPDX...
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
  
  def scan_git_subtree(tree, path):
      for p in path.strip('/').split('/'):
          tree = tree[p]
      scan_git_tree(tree)
  
  if __name__ == '__main__':
  
      ap = ArgumentParser(description='SPDX expression checker')
      ap.add_argument('path', nargs='*', help='Check path or file. If not given full git tree scan. For stdin use "-"')
      ap.add_argument('-m', '--maxlines', type=int, default=15,
                      help='Maximum number of lines to scan in a file. Default 15')
      ap.add_argument('-v', '--verbose', action='store_true', help='Verbose statistics output')
      args = ap.parse_args()
  
      # Sanity check path arguments
      if '-' in args.path and len(args.path) > 1:
          sys.stderr.write('stdin input "-" must be the only path argument
  ')
          sys.exit(1)
  
      try:
          # Use git to get the valid license expressions
          repo = git.Repo(os.getcwd())
          assert not repo.bare
  
          # Initialize SPDX data
          spdx = read_spdxdata(repo)
  
          # Initilize the parser
          parser = id_parser(spdx)
  
      except SPDXException as se:
          if se.el:
              sys.stderr.write('%s: %s
  ' %(se.el.path, se.txt))
          else:
              sys.stderr.write('%s
  ' %se.txt)
          sys.exit(1)
  
      except Exception as ex:
          sys.stderr.write('FAIL: %s
  ' %ex)
          sys.stderr.write('%s
  ' %traceback.format_exc())
          sys.exit(1)
  
      try:
          if len(args.path) and args.path[0] == '-':
3a6ab5c7d   Thierry Reding   scripts/spdxcheck...
270
271
              stdin = os.fdopen(sys.stdin.fileno(), 'rb')
              parser.parse_lines(stdin, args.maxlines, '-')
5385a295e   Thomas Gleixner   scripts: Add SPDX...
272
273
274
275
          else:
              if args.path:
                  for p in args.path:
                      if os.path.isfile(p):
3a6ab5c7d   Thierry Reding   scripts/spdxcheck...
276
                          parser.parse_lines(open(p, 'rb'), args.maxlines, p)
5385a295e   Thomas Gleixner   scripts: Add SPDX...
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
                      elif os.path.isdir(p):
                          scan_git_subtree(repo.head.reference.commit.tree, p)
                      else:
                          sys.stderr.write('path %s does not exist
  ' %p)
                          sys.exit(1)
              else:
                  # Full git tree scan
                  scan_git_tree(repo.head.commit.tree)
  
              if args.verbose:
                  sys.stderr.write('
  ')
                  sys.stderr.write('License files:     %12d
  ' %spdx.license_files)
                  sys.stderr.write('Exception files:   %12d
  ' %spdx.exception_files)
                  sys.stderr.write('License IDs        %12d
  ' %len(spdx.licenses))
                  sys.stderr.write('Exception IDs      %12d
  ' %len(spdx.exceptions))
                  sys.stderr.write('
  ')
                  sys.stderr.write('Files checked:     %12d
  ' %parser.checked)
                  sys.stderr.write('Lines checked:     %12d
  ' %parser.lines_checked)
                  sys.stderr.write('Files with SPDX:   %12d
  ' %parser.spdx_valid)
                  sys.stderr.write('Files with errors: %12d
  ' %parser.spdx_errors)
  
              sys.exit(0)
  
      except Exception as ex:
          sys.stderr.write('FAIL: %s
  ' %ex)
          sys.stderr.write('%s
  ' %traceback.format_exc())
          sys.exit(1)