rapps-db/.github/validate-rapps.py
2024-01-08 09:44:34 +01:00

294 lines
9.1 KiB
Python

'''
PROJECT: ReactOS rapps-db validator
LICENSE: MIT (https://spdx.org/licenses/MIT)
PURPOSE: Validate all rapps-db files
COPYRIGHT: Copyright 2020-2023 Mark Jansen <mark.jansen@reactos.org>
'''
import os
import sys
from enum import Enum, unique
# TODO: make this even nicer by using https://github.com/pytorch/add-annotations-github-action
REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
ALL_KEYS = [
b'Name',
b'Version',
b'License',
b'Description',
b'Category',
b'URLSite',
b'URLDownload',
b'SHA1',
b'SizeBytes',
b'Icon',
b'Screenshot1',
b'LicenseType',
b'Languages',
b'RegName',
]
REQUIRED_KEYS = [
b'Name',
b'Category',
b'URLDownload',
]
ALL_ARCH = [
b'x86',
b'amd64',
b'arm',
b'arm64',
b'ia64',
b'ppc',
]
LICENSE_TYPES = [
1, # Open source
2, # Freeware
3, # Trial/Demo
]
all_names = {}
all_urls = {}
HEXDIGITS = b'0123456789abcdef'
@unique
class LineType(Enum):
Section = 1
KeyValue = 2
Comment = 3
class Reporter:
def __init__(self):
self._problems = 0
def add(self, line, column, problem):
self._problems += 1
print('{col}: {msg}'.format(col = line.location(column), msg = problem))
print(line.text())
idx = column - 1 + len("b'") # Offset the b' prefix
print(' ' * idx + '^')
def problems(self):
return self._problems > 0
class RappsLine:
def __init__(self, file, lineno, text):
self._file = file
self._lineno = lineno
self._text = text
self._last_col = len(self._text.rstrip())
self.main_section = False
self.key = None
self.val = None
self._entries = []
def add(self, line):
# Cannot add keyvalues if this is a keyvalue!
assert not self.key
self._entries.append(line)
def __getitem__(self, key):
for entry in self._entries:
if entry.key == key:
return entry
return None
def parse(self, reporter):
if not self._text.endswith(b'\r\n'):
reporter.add(self, self._last_col, "Invalid line ending")
parts = [part.strip() for part in self._text.split(b'=', 1)]
first = parts[0]
if first.startswith(b';') or (len(parts) == 1 and len(first) == 0):
# comment or empty line, no further processing required!
return LineType.Comment
elif len(parts) == 1:
self._parse_section(reporter, first)
return LineType.Section
else:
self._parse_key_value(reporter, parts)
return LineType.KeyValue
def _parse_section(self, reporter, stripped):
# [Header]
if not stripped.startswith(b'['):
reporter.add(self, 0, "Expected [")
stripped = b'[' + stripped # Add it so we can continue
if not stripped.endswith(b']'):
reporter.add(self, self._last_col, "Expected ]")
stripped = stripped + b']' # Add it so we can continue
section_name, locale, extra_locale, arch = self._extract_section_info(stripped, reporter)
if section_name != b'Section':
help = 'should always be "Section"'
reporter.add(self, self._text.index(section_name) + 1,
'Invalid section name: "{sec}", {msg}'.format(sec = section_name, msg = help))
elif not locale:
self.main_section = True
if locale:
if len(locale) not in (2, 4) or not all(c in HEXDIGITS for c in locale):
reporter.add(self, self._text.index(locale) + 1,
'Invalid locale{extra}: "{loc}"'.format(extra = extra_locale, loc = locale))
if arch:
if arch not in ALL_ARCH:
reporter.add(self, self._text.index(arch) + 1, 'Unknown architecture: "%s"' % arch)
def _extract_section_info(self, text, reporter):
text = text[1:-1]
parts = text.split(b'.')
section_name = parts[0]
extra_locale = ''
if len(parts) == 1:
locale = arch = None
elif len(parts) == 2:
if parts[1] in ALL_ARCH:
locale = None
arch = parts[1]
else:
locale = parts[1]
arch = None
extra_locale = ' (and unknown architecture)'
elif len(parts) == 3:
locale = parts[1]
arch = parts[2]
else:
locale = arch = None
reporter.add(self, self._text.index(b'[') + 1, 'Unknown section format: "%s"' % text)
return section_name, locale, extra_locale, arch
def _parse_key_value(self, reporter, parts):
# key = value
assert len(parts) == 2, self
self.key = parts[0]
self.val = parts[1]
textkey = self.key.decode()
textval = self.val.decode()
if self.key not in ALL_KEYS:
reporter.add(self, 0, 'Unknown key: "{key}"'.format(key = textkey))
if self.key in [b'LicenseType']:
v = int(textval, base=10)
if v not in LICENSE_TYPES:
reporter.add(self, 0, 'Invalid value: "{val}" in {key}'.format(val = v, key = textkey))
if self.key in [b'License']:
v = textval
if v.casefold() == 'Unknown'.casefold():
# TODO: Reporter should be enabled when the existing DB entries are fixed:
# reporter.add(self, 0, 'Invalid value: "{val}" in {key}'.format(val = v, key = textkey))
print('Warning: {key} is "{val}" ({file})'.format(val = v, key = textkey, file = self._file.filename))
def location(self, column):
return '{file}({line}:{col})'.format(file = self._file.filename, line = self._lineno, col = column)
def text(self):
return self._text
class RappsFile:
def __init__(self, fullname):
self.path = fullname
self.filename = os.path.basename(fullname)
self._sections = []
def parse(self, reporter):
with open(self.path, 'rb') as f:
lines = [RappsLine(self, idx + 1, line) for idx, line in enumerate(f.readlines())]
# Create sections from all lines, and add keyvalue entries in their own section
section = None
for line in lines:
linetype = line.parse(reporter)
if linetype == LineType.Comment:
continue
if linetype == LineType.Section:
section = line
self._sections.append(section)
elif linetype == LineType.KeyValue:
assert section, "Got no section yet?"
section.add(line)
all_sections = []
main_section = None
name = None
ver = None
url = None
for section in self._sections:
uniq_section = section._text.strip().upper()
if uniq_section in all_sections:
reporter.add(section, 0, 'Duplicate section found!')
else:
all_sections.append(uniq_section)
if not main_section and section.main_section:
main_section = section
for key in REQUIRED_KEYS:
if not section[key]:
reporter.add(section, 0, 'Main section has no {key} key!'.format(key = key))
if section[b'URLDownload'] and not section[b'SizeBytes']:
# We allow this, if the main section has a SizeBytes (alternate mirror without duplicating the info)
if section == main_section or main_section and not main_section[b'SizeBytes']:
reporter.add(section, 0, 'Section has URLDownload but no SizeBytes!')
if section[b'Name'] and not name:
name = section[b'Name']
if section[b'Version'] and not ver:
ver = section[b'Version']
if section[b'URLDownload'] and not url:
url = section[b'URLDownload']
# Verify that the application name and version is unique
if name:
global all_names
if ver:
verify_unique(reporter, all_names, name, name.val + b', version ' + ver.val)
else:
verify_unique(reporter, all_names, name, name.val)
# Verify that the download URL is unique
if url:
global all_urls
verify_unique(reporter, all_urls, url, url.val)
def verify_unique(reporter, lines, line, name):
first = lines.get(name, None)
if first:
reporter.add(line, 0, 'Duplicate value found: {name}'.format(name = name))
reporter.add(first, 0, 'First occurence:')
else:
lines[name] = line
def validate_repo(dirname):
reporter = Reporter()
all_files = [RappsFile(filename) for filename in os.listdir(dirname) if filename.endswith('.txt')]
for entry in all_files:
entry.parse(reporter)
if reporter.problems():
print('Please check https://reactos.org/wiki/RAPPS for details on the file format.')
sys.exit(1)
else:
print('No problems found.')
if __name__ == '__main__':
validate_repo(REPO_ROOT)