From 5f3073d21e0748a9414fbd516c3e032d0456ab35 Mon Sep 17 00:00:00 2001 From: Mart Raudsepp Date: Wed, 7 Dec 2016 06:41:46 +0200 Subject: sync: Always handle e-mails in lower case to not end up with duplicates Suggested-by: Doug Freed --- backend/lib/models.py | 1 + backend/lib/sync.py | 24 ++++++++++++++---------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/backend/lib/models.py b/backend/lib/models.py index ba20622..2eb9e8c 100644 --- a/backend/lib/models.py +++ b/backend/lib/models.py @@ -50,6 +50,7 @@ maintainer_project_membership_rel_table = db.Table('maintainer_project_membershi class Maintainer(db.Model): id = db.Column(db.Integer, primary_key=True) + # TODO: This has to be unique case insensitive. Currently we have to always force lower() to guarantee this and find the proper maintainer entry; later we might want to use some sort of NOCASE collate rules here to keep the capitalization as preferred per master data email = db.Column(db.Unicode(50), nullable=False, unique=True) is_project = db.Column(db.Boolean, nullable=False, server_default='f', default=False) name = db.Column(db.Unicode(128)) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 7ba583d..744811b 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -28,7 +28,9 @@ def get_project_data(): proj = {} for elem in proj_elem: tag = elem.tag.lower() - if tag in ['email', 'name', 'url', 'description']: + if tag in ['email']: + proj[tag] = elem.text.lower() + if tag in ['name', 'url', 'description']: proj[tag] = elem.text elif tag == 'member': member = {} @@ -36,19 +38,20 @@ def get_project_data(): member['is_lead'] = True for member_elem in elem: member_tag = member_elem.tag.lower() - if member_tag in ['email', 'name', 'role']: + if member_tag in ['email']: + member[member_tag] = member_elem.text.lower() + if member_tag in ['name', 'role']: member[member_tag] = member_elem.text if 'email' in member: if 'members' not in proj: proj['members'] = [] proj['members'].append(member) - pass elif tag == 'subproject': if 'ref' in elem.attrib: if 'subprojects' not in proj: proj['subprojects'] = [] # subprojects will be a list of (subproject_email, inherit-members) tuples where inherit-members is True or False. TODO: Might change if sync code will want it differently - proj['subprojects'].append((elem.attrib['ref'], True if ('inherit-members' in elem.attrib and elem.attrib['inherit-members'] == '1') else False)) + proj['subprojects'].append((elem.attrib['ref'].lower(), True if ('inherit-members' in elem.attrib and elem.attrib['inherit-members'] == '1') else False)) else: print("Invalid tag inside project %s - required 'ref' attribute missing" % proj['email'] if 'email' in proj else "") else: @@ -77,7 +80,7 @@ def sync_projects(): existing_maintainers[email].url = data['url'] else: print ("Adding project %s" % email) - new_maintainer = Maintainer(email=data['email'], is_project=True, description=data['description'], name=data['name'], url=data['url']) + new_maintainer = Maintainer(email=email, is_project=True, description=data['description'], name=data['name'], url=data['url']) db.session.add(new_maintainer) existing_maintainers[email] = new_maintainer members = [] @@ -176,16 +179,17 @@ def sync_versions(): if 'email' not in maint: print("WARNING: Package %s was told to have a maintainer without an e-mail identifier" % package.full_name) continue - if maint['email'] in existing_maintainers: # FIXME: Some proxy-maintainers are using mixed case e-mail address, right now we'd be creating duplicates right now if the case is different across different packages - maintainers.append(existing_maintainers[maint['email']]) + email = maint['email'].lower() + if email in existing_maintainers: + maintainers.append(existing_maintainers[email]) else: is_project = False if 'type' in maint and maint['type'] == 'project': is_project = True - print("Adding %s maintainer %s" % ("project" if is_project else "individual", maint['email'])) - new_maintainer = Maintainer(email=maint['email'], is_project=is_project, name=maint['name'] if 'name' in maint else None) + print("Adding %s maintainer %s" % ("project" if is_project else "individual", email)) + new_maintainer = Maintainer(email=email, is_project=is_project, name=maint['name'] if 'name' in maint else None) db.session.add(new_maintainer) - existing_maintainers[maint['email']] = new_maintainer + existing_maintainers[email] = new_maintainer maintainers.append(new_maintainer) # Intentionally outside if 'maintainers' in pkg, because if there are no maintainers in JSON, it's falled to maintainer-needed and we need to clean out old maintainer entries -- cgit v1.2.3-65-gdbad