diff options
Diffstat (limited to 'ebuild-indexer.py')
-rwxr-xr-x | ebuild-indexer.py | 141 |
1 files changed, 141 insertions, 0 deletions
diff --git a/ebuild-indexer.py b/ebuild-indexer.py new file mode 100755 index 0000000..3a8e2dc --- /dev/null +++ b/ebuild-indexer.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python2.4 + +import os +import re +import portage +import portage_dep, portage_util, portage_versions + +use_reduce = None + +try: use_reduce = portage_dep.use_reduce +except AttributeError: + use_reduce = portage.use_reduce + +paren_reduce = None +try: paren_reduce = portage_dep.paren_reduce +except AttributeError: + paren_reduce = portage.paren_reduce + +flatten = None +try: flatten = portage_util.flatten +except AttributeError: + flatten = portage.flatten + +import md5 +import sha +md5_cons = md5.new +sha1_cons = sha.new + +porttree = "/usr/portage" + +outdir = "ebuildindex" +if not os.path.exists(outdir): + os.mkdir(outdir) +elif not os.path.isdir(outdir): + import sys + print "%s exists, but is not a directory." % (outdir) + sys.exit(2) + + +def write_ebuild_stats(fd_out, cpv, dbapi): + try: + # find out the actual path to the ebuild + path = dbapi.findname(cpv) + filesize = os.path.getsize(path) + mtime = os.path.getmtime(path) + + # hash it + ebuild = open(path) + (md5, sha1) = hash_file(ebuild, md5_cons(), sha1_cons()) + ebuild.seek(0) + + # Find $Header$ or $Id$ line + matcher = re.compile("(\$(Header|Id):.*\$)") + header = "" + for line in ebuild: + m = matcher.search(line) + if m: + header = m.group(1) + break + ebuild.close() + except: + filesize = 0 + mtime = 0 + md5 = 0 + sha1 = 0 + header = "" + pass + fd_out.write("Ebuild-PF: %s\n" % (cpv)) + fd_out.write("Ebuild-mtime: %s\n" % (mtime)) + fd_out.write("Ebuild-size: %s\n" % (filesize)) + fd_out.write("Ebuild-md5: %s\n" % (md5)) + fd_out.write("Ebuild-sha1: %s\n" % (sha1)) + fd_out.write("Ebuild-header: %s\n" % (header)) + + # Reconstruct portage's CPV-related variables + cpvlist = portage_versions.catpkgsplit(cpv) + if len(cpvlist) == 4: + cat = cpvlist[0] + pac = cpvlist[1] + ver = cpvlist[2] + rev = cpvlist[3] + fd_out.write("Ebuild-PN: %s\n" % (pac)) + fd_out.write("Ebuild-PV: %s\n" % (ver)) + fd_out.write("Ebuild-PR: %s\n" % (rev)) + fd_out.write("Ebuild-CATEGORY: %s\n" % (cat)) + fd_out.write("Ebuild-P: %s-%s\n" % (pac, ver)) + fd_out.write("Ebuild-PVR: %s-%s-%s\n" % (pac, ver, rev)) + + +def hash_file(fileobj, *hashobjects): + """ RAM efficient hashing implementation for stream-based file objects. """ + data = fileobj.read(1024*1024) + while data: + for ho in hashobjects: + ho.update(data) + data = fileobj.read(1024*1024) + + return (ho.hexdigest() for ho in hashobjects) + + + +config = portage.settings +config.unlock() +config["PORTDIR_OVERLAY"] = '' +config["PORTDIR"] = porttree + +dbapi = portage.portdbapi(porttree, mysettings=config) + + +for cp in dbapi.cp_all(): + for cpv in dbapi.cp_list(cp): + uris = "" + try: + uris, = dbapi.aux_get(cpv, ("SRC_URI",)) + uris = use_reduce(paren_reduce(uris), matchall=1) + uris = flatten(uris) + except Exception, e: + print "Error with %s: %s" % (cpv, str(e)) + continue + + indexfile = "%s/%s.DIST" % (outdir, cpv.replace("/", "+")) + fd_out = open(indexfile, "w") + + write_ebuild_stats(fd_out, cpv, dbapi) + + oldfiles = {} + num = 0 + for uri in uris: + filename = os.path.basename(uri) + filenum = num + if filename in oldfiles: + # in case we have multiple uri's for one filename, + # use the old filenum + filenum = oldfiles[filename] + else: + oldfiles[filename] = num + fd_out.write("Distfile-%05d-name: %s\n" % (num, filename)) + num += 1 + fd_out.write("Distfile-%05d-uri: %s\n" % (filenum, uri)) + + fd_out.close() |