summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Jolly <kangie@gentoo.org>2024-09-27 10:46:24 +1000
committerMatt Jolly <kangie@gentoo.org>2024-09-27 10:52:11 +1000
commite0adc1721f392c89c8262c4f864f6b1edf796edc (patch)
treed1dd7e9a760622d789488440825f50c8fcc23414
parentflake8: Add config and do some trivial style changes (diff)
downloadchromium-tools-e0adc1721f392c89c8262c4f864f6b1edf796edc.tar.gz
chromium-tools-e0adc1721f392c89c8262c4f864f6b1edf796edc.tar.bz2
chromium-tools-e0adc1721f392c89c8262c4f864f6b1edf796edc.zip
get-opera-version-mapping: major refactor
- Rework the logic to get a better result when remediating - Also store the version mapping in a dataclass (why not). - Use packaging.version.Version to make sorting versions trivial - Accept positional arguments for the max and min versions. Signed-off-by: Matt Jolly <kangie@gentoo.org>
-rwxr-xr-xget-opera-version-mapping.py118
1 files changed, 86 insertions, 32 deletions
diff --git a/get-opera-version-mapping.py b/get-opera-version-mapping.py
index 6d6f3de..015fd21 100755
--- a/get-opera-version-mapping.py
+++ b/get-opera-version-mapping.py
@@ -1,6 +1,32 @@
#!/usr/bin/env python
+
+# SPDX-License-Identifier: GPL-2.0-or-later
+# This script is used to extract Opera and Chromium versions from the Opera changelog (blog)
+# This is incomplete data, so we need to fill in the gaps with the Chromium version from the previous known version
+# The intent here is to have _some_ sort of datasource to identify a potentially-fixed version of Opera based on
+# the Chromium version it includes.
+# High level logic:
+# We can fetch the opera blog posts that relate to a major version of Opera as long as they don't change their URIs.
+# We iterate over H4 elements to get the Opera version (and date, though we throw that away)
+# We then iterate over child elements until we find an "Update Chromium" entry, which we can use to get the
+# Chromium version (in which case we bail early) Or we exhaust the children and give up.
+# Lather, rinse, repeat.
+
+import argparse, dataclasses
+
import requests
from bs4 import BeautifulSoup
+from packaging.version import Version
+
+
+@dataclasses.dataclass
+class OperaChromiumVersion:
+ opera_version: Version
+ chromium_version: Version
+
+ def __str__(self):
+ chromium_version_str = 'unknown' if self.chromium_version == Version('0.0.0.0') else str(self.chromium_version)
+ return f"Opera Version: {self.opera_version}, Chromium Version: {chromium_version_str}"
def get_opera_chromium_versions(base_url, start_version, end_version):
@@ -15,16 +41,11 @@ def get_opera_chromium_versions(base_url, start_version, end_version):
end_version: The ending version to extract information for (inclusive).
Returns:
- A dictionary mapping Opera version to Chromium version.
- If no update is mentioned, the previous Chromium version is used.
- For missing data or errors, "unknown" is used.
+ A list of OperaChromiumVersion objects containing the extracted version information.
"""
- versions = {}
- chromium_version = None
+ versions: list[OperaChromiumVersion] = []
for version in range(start_version, end_version + 1):
- # Fix formatting issue:
- # OR url = base_url.format(version)
url = base_url.format(version)
print(f"Processing version {version}")
@@ -38,8 +59,8 @@ def get_opera_chromium_versions(base_url, start_version, end_version):
# Iterate through each section starting with an H4 element
for section in content.find_all('h4'):
+ chromium_version = None
version_str, date_str = section.text.strip().split(' – ')
- versions[version_str] = chromium_version
# Process all content elements (including nested ones) until the next H4
next_sibling = section.find_next_sibling(
@@ -63,7 +84,12 @@ def get_opera_chromium_versions(base_url, start_version, end_version):
# Handle missing Chromium version
if not chromium_version:
- chromium_version = "unknown"
+ chromium_version = '0.0.0.0'
+
+ versions.append(OperaChromiumVersion(
+ Version(version_str),
+ Version(chromium_version)
+ ))
except requests.exceptions.RequestException as e:
if e.args and e.args[0] == 404:
@@ -76,41 +102,69 @@ def get_opera_chromium_versions(base_url, start_version, end_version):
print(f"Unexpected error: {e}")
chromium_version = None # Reset chromium_version for next iteration
- return versions
+ # We're broadly sorted by major version, but within each major version we get newer entries first
+ # Sort by Opera version to get the correct order
+ sorted_versions = sorted(versions, key=lambda x: x.opera_version)
+ return sorted_versions
def remediate_unknown_versions(versions):
"""
- Remediates entries with "unknown" values in the versions dictionary by
+ Remediates entries with '0.0.0.0' values in the versions dictionary by
assuming no change from the previous known version.
Args:
- versions: A dictionary mapping Opera version to Chromium version.
+ versions: A list of OperaChromiumVersion objects containing the extracted version information.
Returns:
- The modified versions dictionary with "unknown" values replaced based on previous entries.
+ A list of OperaChromiumVersion objects with '0.0.0.0' values replaced
+ by the previous known version if available.
"""
- previous_version = None
- for version, chromium_version in versions.items():
- if chromium_version == "unknown":
- if previous_version is not None:
- # Update with previous version
- versions[version] = previous_version
+ previous_version: Version = Version('0.0.0.0')
+ fixed_versions: list[OperaChromiumVersion] = []
+
+ for mapping in versions:
+ if mapping.chromium_version == Version('0.0.0.0') and previous_version is not Version('0.0.0.0'):
+ # Update with previous version
+ fixed_versions.append(OperaChromiumVersion(mapping.opera_version, previous_version))
else:
- previous_version = chromium_version # Update known version for future references
- return versions
+ # This should be fine, we're always parsing from oldest to newest
+ if previous_version < mapping.chromium_version:
+ previous_version = mapping.chromium_version
+ fixed_versions.append(mapping)
+
+ return fixed_versions
+
+
+def parse_arguments():
+ """
+ Parses the command line arguments and returns the parsed values.
+
+ Returns:
+ The parsed command line arguments.
+ """
+ parser = argparse.ArgumentParser(description='Get Opera and Chromium versions.')
+ parser.add_argument('start_ver', type=int, help='starting version', default=110)
+ parser.add_argument('end_ver', type=int, help='ending version', default=115)
+ return parser.parse_args()
+
+
+def main():
+ args = parse_arguments()
+
+ # Base URL with version placeholder
+ base_url = "https://blogs.opera.com/desktop/changelog-for-{}/"
+ opera_chromium_versions = get_opera_chromium_versions(base_url, args.start_ver, args.end_ver)
+ fixed_versions = remediate_unknown_versions(opera_chromium_versions)
-# Example usage
-# Base URL with version placeholder
-base_url = "https://blogs.opera.com/desktop/changelog-for-{}/"
-opera_chromium_versions = get_opera_chromium_versions(base_url, 110, 115)
+ # Print the versions
+ if fixed_versions:
+ for mapping in fixed_versions:
+ print(mapping)
+ else:
+ print("Failed to extract any versions.")
-opera_chromium_versions = remediate_unknown_versions(opera_chromium_versions)
-if opera_chromium_versions:
- for opera_version, chromium_version in opera_chromium_versions.items():
- print(
- f"Opera Version: {opera_version}, Chromium Version: {chromium_version}")
-else:
- print("Failed to extract any versions.")
+if __name__ == "__main__":
+ main()