From 5c7badc5ac0c76b65a6287f8e505b96a72f896a3 Mon Sep 17 00:00:00 2001 From: Alexander Zhogov Date: Fri, 4 Jun 2021 13:28:19 +0300 Subject: [PATCH] CI: Update org control scripts (#6035) --- .github/org_control/__init__.py | 3 - .github/org_control/check_org.py | 75 +++++-- .github/org_control/check_pr.py | 57 ++++-- .github/org_control/config.json | 4 +- .github/org_control/configs.py | 18 +- .github/org_control/github_api.py | 15 +- .github/org_control/ldap_api.py | 236 +++++++++++++++++++++++ .github/org_control/requirements-dev.txt | 1 + .github/org_control/requirements.txt | 1 + .github/org_control/requirements_dev.txt | 1 - 10 files changed, 354 insertions(+), 57 deletions(-) create mode 100644 .github/org_control/ldap_api.py create mode 100644 .github/org_control/requirements-dev.txt delete mode 100644 .github/org_control/requirements_dev.txt diff --git a/.github/org_control/__init__.py b/.github/org_control/__init__.py index 60324954ef522e..e69de29bb2d1d6 100644 --- a/.github/org_control/__init__.py +++ b/.github/org_control/__init__.py @@ -1,3 +0,0 @@ -# Copyright (C) 2018-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - diff --git a/.github/org_control/check_org.py b/.github/org_control/check_org.py index f6858586f47ec2..60ec464b621442 100644 --- a/.github/org_control/check_org.py +++ b/.github/org_control/check_org.py @@ -5,12 +5,13 @@ Check GitHub organization and invite members """ -# pylint: disable=fixme,no-member +# pylint: disable=fixme,no-member,too-many-locals from argparse import ArgumentParser -import github_api from configs import Config +from github_api import GithubOrgApi, get_dev_emails +from ldap_api import LdapApi, print_user_info, InfoLevel def main(): @@ -19,32 +20,74 @@ def main(): arg_parser.add_argument("--cfg-file", metavar="PATH", default=Config.default_cfg_path, help=f"Path to json configuration file, e.g. {Config.default_cfg_path}") arg_parser.add_argument("--teams", action="store_true", help="Check GitHub teams") + arg_parser.add_argument("--no-ldap", action="store_true", help="Don't use LDAP info") args, unknown_args = arg_parser.parse_known_args() Config(args.cfg_file, unknown_args) - gh_api = github_api.GithubOrgApi() + gh_api = GithubOrgApi() if args.teams: gh_api.get_org_teams() - else: - dev_emails = github_api.get_dev_emails() - print(f'\nDeveloper emails {len(dev_emails)}:', '; '.join(dev_emails)) + return - org_emails = gh_api.get_org_emails() - print(f'\nOrg emails {len(org_emails)}:', '; '.join(org_emails)) + cfg_emails = get_dev_emails() + print(f'\nCfg developer emails {len(cfg_emails)}:', '; '.join(sorted(cfg_emails))) - org_pendig_invitation_emails = gh_api.get_org_invitation_emails() + dev_emails = set() + dev_emails.update(cfg_emails) - invite_emails = dev_emails.difference(org_emails).difference(org_pendig_invitation_emails) - print(f'\nInvite emails {len(invite_emails)}:', '; '.join(invite_emails)) + if not args.no_ldap: + ldap_api = LdapApi() + ldap_emails = ldap_api.get_user_emails() + dev_emails.update(ldap_emails) + print(f'\nLDAP developer emails {len(ldap_emails)}:', '; '.join(sorted(ldap_emails))) - no_in_dev_emails = org_emails.difference(dev_emails) - print(f'\nOrg members - no in developers list {len(no_in_dev_emails)}:', - '; '.join(no_in_dev_emails)) + cfg_emails_no_in_ldap = ldap_api.get_absent_emails(cfg_emails) + print(f'\nCfg developer emails - absent in LDAP at all {len(cfg_emails_no_in_ldap)}:', + '; '.join(sorted(cfg_emails_no_in_ldap))) - valid_github_users = gh_api.get_valid_github_users(invite_emails) + cfg_ldap_inters = cfg_emails.intersection(ldap_emails) + print(f'\nCfg developer emails - present in LDAP developers {len(cfg_ldap_inters)}:', + '; '.join(sorted(cfg_ldap_inters))) - gh_api.invite_users(valid_github_users) + org_emails, org_logins_no_intel_email = gh_api.get_org_emails() + print(f'\nOrg emails {len(org_emails)}:', '; '.join(sorted(org_emails))) + + org_emails_no_in_ldap = set() + if not args.no_ldap: + org_ldap_diff = org_emails.difference(ldap_emails) + print(f'\nOrg member emails - absent in LDAP developers {len(org_ldap_diff)}:', + '; '.join(sorted(org_ldap_diff))) + + for email in org_ldap_diff: + user_info = ldap_api.get_user_info_by_email(email) + if user_info: + print_user_info(user_info, InfoLevel.PDL) + else: + org_emails_no_in_ldap.add(email) + + org_pendig_invitation_emails = gh_api.get_org_invitation_emails() + invite_emails = dev_emails.difference(org_emails).difference(org_pendig_invitation_emails) + print(f'\nInvite emails {len(invite_emails)}:', '; '.join(sorted(invite_emails))) + + valid_github_users = gh_api.get_valid_github_users(invite_emails) + gh_api.invite_users(valid_github_users) + + print('\nCheck accounts below and remove from the GitHub organization and cfg list') + + cfg_emails_no_in_org = sorted(cfg_emails.difference(org_emails)) + print(f'\nCfg developer emails - absent in GitHub organization {len(cfg_emails_no_in_org)}:', + '; '.join(cfg_emails_no_in_org)) + + org_emails_no_in_dev = sorted(org_emails.difference(dev_emails)) + print(f'\nOrg member emails - absent in cfg and LDAP developers {len(org_emails_no_in_dev)}:', + '; '.join(org_emails_no_in_dev)) + + print(f'\nOrg member emails - absent in LDAP at all {len(org_emails_no_in_ldap)}:', + '; '.join(sorted(org_emails_no_in_ldap))) + + print(f'\nOrg member logins - absent Intel email {len(org_logins_no_intel_email)}:', + '; '.join(sorted(org_logins_no_intel_email))) if __name__ == '__main__': diff --git a/.github/org_control/check_pr.py b/.github/org_control/check_pr.py index 46107ae101b472..f6aea0a7d6cd59 100644 --- a/.github/org_control/check_pr.py +++ b/.github/org_control/check_pr.py @@ -33,13 +33,23 @@ def get_pr_labels(pull): def set_pr_labels(pull, labels): - """Sets PR labels""" + """Sets new PR labels (all previously set labels are removed)""" if not labels or Config().DRY_RUN: return - print(f'Set PR labels:', labels) + print('Set PR labels:', labels) + # set_labels() should accept list but fails with empty "AssertionError:" pull.set_labels(labels) +def add_pr_labels(pull, labels): + """Adds PR labels""" + if not labels or Config().DRY_RUN: + return + print('Add PR labels:', labels) + for label in labels: + pull.add_to_labels(label) + + def get_pr_type_by_labels(pull): """Gets PR type using labels""" pr_lables = get_pr_labels(pull) @@ -80,6 +90,17 @@ def get_category_labels(pull): return labels +def get_pr_info_str(pull): + """Gets info about PR using a few workarounds""" + pr_title = pull.title.encode("ASCII", "ignore").decode() + + # Workaround for PyGithub issue: https://github.com/PyGithub/PyGithub/issues/512 + pr_created_at = pull.created_at.replace(tzinfo=datetime.timezone.utc).astimezone() + + return f'PR: {pull.number} - {pr_title} - Created: {pr_created_at} - ' \ + f'Labels: {get_pr_labels(pull)} - Type: {get_pr_type_by_labels(pull)}' + + def main(): """The main entry point function""" arg_parser = ArgumentParser() @@ -103,19 +124,19 @@ def main(): print(f'\nPRs count ({args.pr_state}):', pulls.totalCount) if args.newer: - pr_created_after = datetime.datetime.now() - datetime.timedelta(minutes=int(args.newer)) - print('PRs created after:', pr_created_after) + pr_created_after = (datetime.datetime.now() - + datetime.timedelta(minutes=int(args.newer))).astimezone() + print('Checking PRs created after:', pr_created_after) non_org_intel_pr_users = set() non_org_pr_users = set() for pull in pulls: - if args.newer and pull.created_at <= pr_created_after: - print(f'\nIGNORE: {pull} - Created: {pull.created_at}') + pr_created_at = pull.created_at.replace(tzinfo=datetime.timezone.utc).astimezone() + if args.newer and pr_created_at <= pr_created_after: + print(f'\nIGNORE: {get_pr_info_str(pull)}') continue - pr_lables = get_pr_labels(pull) pr_type_by_labels = get_pr_type_by_labels(pull) - set_labels = [] - print(f'\n{pull} - Created: {pull.created_at} - Labels: {pr_lables} -', - f'Type: {pr_type_by_labels}', end='') + add_labels = [] + print(f'\n{get_pr_info_str(pull)}', end='') # Checks PR source type if gh_api.is_org_user(pull.user): @@ -127,21 +148,23 @@ def main(): if pr_type_by_labels is not PrType.INTEL: print(f'NO "{PrType.INTEL.value}" label: ', end='') github_api.print_users(pull.user) - set_labels.append(PrType.INTEL.value) + add_labels.append(PrType.INTEL.value) + elif github_api.is_user_ignored(pull.user): + print(' - IGNORED non org user with NO Intel email or company') else: - print(f' - Non org user with NO Intel email or company') + print(' - Non org user with NO Intel email or company') non_org_pr_users.add(pull.user) if pr_type_by_labels is not PrType.EXTERNAL: print(f'NO "{PrType.EXTERNAL.value}" label: ', end='') github_api.print_users(pull.user) - set_labels.append(PrType.EXTERNAL.value) + add_labels.append(PrType.EXTERNAL.value) - set_labels += get_category_labels(pull) - set_pr_labels(pull, set_labels) + add_labels += get_category_labels(pull) + add_pr_labels(pull, add_labels) - print(f'\nNon org user with Intel email or company:') + print('\nNon org user with Intel email or company:') github_api.print_users(non_org_intel_pr_users) - print(f'\nNon org user with NO Intel email or company:') + print('\nNon org user with NO Intel email or company:') github_api.print_users(non_org_pr_users) diff --git a/.github/org_control/config.json b/.github/org_control/config.json index 01dcad55badc04..ed6a1e1f5aacdc 100644 --- a/.github/org_control/config.json +++ b/.github/org_control/config.json @@ -6,7 +6,9 @@ "openvino-ci", "openvino-pushbot", "lab-nerval", - "lab-nerval-onnx-ci" + "lab-nerval-onnx-ci", + "onnx-watchdog-agent", + "dependabot" ], "EMAILS_FILE_PATH": "dev_emails-test.txt", "PROXIES": { diff --git a/.github/org_control/configs.py b/.github/org_control/configs.py index e0daa1a4389afb..79c96c90abd9f8 100644 --- a/.github/org_control/configs.py +++ b/.github/org_control/configs.py @@ -57,19 +57,19 @@ def __init__(self, file_path=None, cli_args=None): for name, value in self._json_cfg.items(): if hasattr(self, name): raise ConfigException(f'Duplicating prosperity: {name}') - prosperity_value = self._args.get(name) or os.getenv(name) - if prosperity_value: + property_value = self._args.get(name) or os.getenv(name) + if property_value: # Try to set prosperity_value as Python literal structures, e.g. DRY_RUN=False try: - prosperity_value = ast.literal_eval(prosperity_value) + property_value = ast.literal_eval(property_value) except Exception: pass - if not isinstance(prosperity_value, type(value)): + if not isinstance(property_value, type(value)): raise ConfigException(f'Python type of {name} parameter must be {type(value)}') else: - prosperity_value = value - setattr(self, name, prosperity_value) - Config.properties[name] = prosperity_value + property_value = value + setattr(self, name, property_value) + Config.properties[name] = property_value self.set_proxy() @@ -78,7 +78,7 @@ def _load_cfg(self): try: with open(self._file_path) as conf: self._json_cfg = json.load(conf) - except: + except Exception: print('Failed to load configuration from:', self._file_path) raise @@ -105,7 +105,7 @@ def set_proxy(self): def _test(): """Test and debug""" print('Config.default_cfg_path:', Config.default_cfg_path) - cfg = Config(cli_args=['DRY_RUN=True']) + cfg = Config(cli_args=['DRY_RUN', 'PROXIES={"NO_PROXY": "localhost"}']) print('Config.properties:', cfg.get_properties()) diff --git a/.github/org_control/github_api.py b/.github/org_control/github_api.py index ddb0595104a840..dcea9d9f2b23ec 100644 --- a/.github/org_control/github_api.py +++ b/.github/org_control/github_api.py @@ -11,7 +11,6 @@ import time from github import Github, GithubException, RateLimitExceededException, IncompletableObject -from github import UnknownObjectException from github.PaginatedList import PaginatedList from configs import Config @@ -110,17 +109,13 @@ def __init__(self): def is_org_user(self, user): """Checks that user is a member of GitHub organization""" if is_valid_user(user): - try: - membership = user.get_organization_membership(self.github_org) - # membership.role can be 'member' or 'admin' - if membership.state == 'active' and membership.role: - return True - except UnknownObjectException: - pass + # user.get_organization_membership(self.github_org) doesn't work with org members + # permissions, GITHUB_TOKEN must be org owner now + return self.github_org.has_in_members(user) return False def get_org_emails(self): - """Gets and prints all emails of GitHub organization members""" + """Gets and prints emails of all GitHub organization members""" org_members = self.github_org.get_members() org_emails = set() org_members_fix = set() @@ -146,7 +141,7 @@ def get_org_emails(self): '; '.join(org_logins_fix_intel_email)) print(f'\nOrg members - no real name {len(org_emails_fix_name)}:', '; '.join(org_emails_fix_name)) - return org_emails + return (org_emails, org_logins_fix_intel_email) def get_org_invitation_emails(self): """Gets GitHub organization teams prints info""" diff --git a/.github/org_control/ldap_api.py b/.github/org_control/ldap_api.py new file mode 100644 index 00000000000000..44a0eacf37946c --- /dev/null +++ b/.github/org_control/ldap_api.py @@ -0,0 +1,236 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +""" +Gets info about users and groups via LDAP +""" + +# pylint: disable=fixme,no-member + +from enum import Enum + +from ldap3 import Server, Connection, ALL, SUBTREE + +from configs import Config + + +class LdapApiException(Exception): + """Base LDAP API exception""" + + +class InfoLevel(Enum): + """Constants for printing user info from LDAP""" + PDL = 'PDL' # Public Distribution List (group of e-mail addresses) + FULL = 'Full' + + +def print_user_info(info, info_level=None): + """Pretty-print of a user info data structure (dict). info_level is the InfoLevel Enum""" + if not info or not info.get('mail'): + raise LdapApiException('ERROR: No info or absent mail') + + def get_membership(): + if info_level == InfoLevel.PDL: + membership_info = ' PDLs:' + elif info_level == InfoLevel.FULL: + membership_info = ' memberOf :' + else: + return '' + # Grouping groups by purpose + if info_level == InfoLevel.PDL: + sort_key = lambda i: i.split(',', 1)[0].lower() + else: + sort_key = lambda i: i.split(',', 1)[1] + i.split(',', 1)[0].lower() + for item in sorted(info['memberOf'], key=sort_key): + if info_level == InfoLevel.PDL and 'OU=Delegated' not in item: + continue + membership_info += f'\n {item}' + return membership_info + + try: + text_info = \ + f'\n{info["cn"]} <{info["mail"]}>; {info["sAMAccountName"]}; {info["employeeID"]}' \ + f'\n Org group: {info["intelSuperGroupDescr"]} ({info["intelSuperGroupShortName"]}) /'\ + f' {info["intelGroupDescr"]} ({info["intelGroupShortName"]}) /' \ + f' {info["intelDivisionDescr"]} ({info["intelDivisionShortName"]}) /' \ + f' {info["intelOrgUnitDescr"]}' \ + f'\n Manager: {info["manager"]}' \ + f'\n Location: {info["intelRegionCode"]} / {info["co"]} / {info["intelSiteCode"]} /' \ + f' {info["intelBldgCode"]} ({info["intelSiteName"]}) /' \ + f' {info["physicalDeliveryOfficeName"]}' \ + f'\n Other: {info["employeeType"]} | {info["intelExportCountryGroup"]} |' \ + f' {info["whenCreated"]} | {info["intelCostCenterDescr"]} | {info["jobDescription"]}' + except Exception as exc: + raise LdapApiException(f'ERROR: Failed to get info about "{info["mail"]}". ' \ + f'Exception occurred:\n{repr(exc)}') from exc + print(text_info) + + membership = get_membership() + if info_level == InfoLevel.PDL and membership: + print(membership) + elif info_level == InfoLevel.FULL: + for key in sorted(info): + if isinstance(info[key], list): + if key == 'memberOf': + print(membership) + else: + print(f' {key} :') + for item in info[key]: + print(' ', item) + else: + print(f' {key} : {info[key]}') + + +class LdapApi: + """LDAP API for getting user info and emails""" + + _binary_blobs = ['thumbnailPhoto', 'msExchUMSpokenName', 'msExchBlockedSendersHash'] + _check_existing = [ + 'intelExportCountryGroup', + 'physicalDeliveryOfficeName', + 'intelSuperGroupShortName', + 'intelGroupShortName', + 'intelDivisionShortName', + ] + + null = '' + + def __init__(self): + self._cfg = Config() + self.server = Server(self._cfg.LDAP_SERVER, get_info=ALL) + self.connection = Connection(self.server, + user=self._cfg.LDAP_USER, + password=self._cfg.LDAP_PASSWORD, + auto_bind=True) + self.connection.bind() + + def get_user_emails(self, groups=None): + """Gets emails of LDAP groups and sub-groups""" + print('\nGet emails from LDAP groups:') + processed_ldap_members = {} + + def process_group_members(member, parent_group): + if member in processed_ldap_members: + processed_ldap_members[member]['parent_groups'].append(parent_group) + print('\nWARNING: Ignore LDAP member to avoid duplication and recursive cycling ' + f'of PDLs: {member}\n ' + f'email: {processed_ldap_members[member].get("email")}\n parent_groups:') + for group in processed_ldap_members[member].get('parent_groups', []): + print(7 * ' ', group) + + return + processed_ldap_members[member] = {'email': None, 'parent_groups': [parent_group]} + + # AD moves terminated users to the boneyard OU in case the user returns, + # so it can be reactivated with little effort. + # After 30 days it is removed and the unix personality becomes unlinked. + if 'OU=Boneyard' in member: + return + self.connection.search(member, r'(objectClass=*)', SUBTREE, + attributes=['cn', 'member', 'mail']) + + #print(self.connection.entries) + if not self.connection.response: + raise LdapApiException(f'ERROR: empty response. LDAP member: {member}') + + # Check that the member is worker. + # The response can contain several items, but the first item is valid only + if 'OU=Workers' in member: + if self.connection.response[0]['attributes']['mail']: + processed_ldap_members[member]['email'] = \ + self.connection.response[0]['attributes']['mail'].lower() + return + raise LdapApiException(f'ERROR: no mail. LDAP worker: {member}\n' + f'{self.connection.entries}') + + if len(self.connection.response) > 1: + raise LdapApiException(f'ERROR: multiple responses for {member}: ' + f'{len(self.connection.response)}\n' + f'{self.connection.entries}') + + if self.connection.response[0]['attributes']['member']: + for group_member in self.connection.response[0]['attributes']['member']: + process_group_members(group_member, member) + else: + print(f'\nERROR: no members in LDAP group: {member}\n{self.connection.entries}') + + for group in groups or self._cfg.LDAP_PDLs: + print('\nProcess ROOT LDAP group:', group) + process_group_members(group, 'ROOT') + return { + member.get('email') for member in processed_ldap_members.values() if member.get('email') + } + + + def _get_user_info(self, query): + """Gets user info from LDAP as dict matching key and values pairs from query""" + query_filter = ''.join(f'({key}={value})' for key, value in query.items()) + + for domain in self._cfg.LDAP_DOMAINS: + search_base = f'OU=Workers,DC={domain},DC=corp,DC=intel,DC=com' + self.connection.search( + search_base, + f'(&(objectcategory=person)(objectclass=user)(intelflags=1){query_filter})', + SUBTREE, + attributes=['*']) + + if self.connection.response: + if len(self.connection.response) > 1: + raise LdapApiException(f'ERROR: multiple responses for {query_filter}: ' + f'{len(self.connection.response)}\n' + f'{self.connection.entries}') + info = self.connection.response[0]['attributes'] + + # remove long binary blobs + for blob in LdapApi._binary_blobs: + info[blob] = b'' + for key in LdapApi._check_existing: + if not info.get(key): + info[key] = LdapApi.null + return info + return {} + + + def get_user_info_by_idsid(self, idsid): + """Gets user info from LDAP as dict using account name for searching""" + return self._get_user_info({'sAMAccountName': idsid}) + + + def get_user_info_by_name(self, name): + """Gets user info from LDAP as dict using common name for searching""" + return self._get_user_info({'cn': name}) + + + def get_user_info_by_email(self, email): + """Gets user info from LDAP as dict using emails for searching""" + return self._get_user_info({'mail': email}) + + + def get_absent_emails(self, emails): + """Checks users by email in LDAP and returns absent emails""" + absent_emails = set() + for email in emails: + if not self.get_user_info_by_email(email): + absent_emails.add(email) + return absent_emails + + +def _test(): + """Test and debug""" + ldap = LdapApi() + + emails = ldap.get_user_emails() + print(f'\nLDAP emails count: {len(emails)}\n{"; ".join(emails)}') + + emails = ['foo@intel.com'] + + for email in emails: + info = ldap.get_user_info_by_email(email) + if info: + print_user_info(info, InfoLevel.PDL) + else: + print(f'\n{email} - not found') + + +if __name__ == '__main__': + _test() diff --git a/.github/org_control/requirements-dev.txt b/.github/org_control/requirements-dev.txt new file mode 100644 index 00000000000000..36d944e3a509a6 --- /dev/null +++ b/.github/org_control/requirements-dev.txt @@ -0,0 +1 @@ +pylint==2.5.3 diff --git a/.github/org_control/requirements.txt b/.github/org_control/requirements.txt index 625c5c263e190f..57c70e74bd3c97 100644 --- a/.github/org_control/requirements.txt +++ b/.github/org_control/requirements.txt @@ -1 +1,2 @@ PyGithub==1.51 +ldap3==2.7 diff --git a/.github/org_control/requirements_dev.txt b/.github/org_control/requirements_dev.txt deleted file mode 100644 index cbc9f5a6d0c30f..00000000000000 --- a/.github/org_control/requirements_dev.txt +++ /dev/null @@ -1 +0,0 @@ -pylint==2.3.0