aboutsummaryrefslogtreecommitdiff
blob: 702fe203e7d36e0e06eb44e9645fc6e4d65a05b8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# written 2007 by Markus Ullmann <jokey@gentoo.org>
# License: GPL-2

"""core module"""

from dbgenerator.backend import domain_repo_interface
import sys


def init_front_back():
    """initialize backend and frontend"""

    backend = domain_repo_interface()
    backend.set_domain_by_name("livefs domain")
    backend.set_work_repo_by_id("gentoo")

    from etc.database_config import DatabaseConfig
    database = None
    if DatabaseConfig.mode == 'sqlite':
        from dbgenerator.database import SQLitePackageDB
        database = SQLitePackageDB(DatabaseConfig.settings['sqlite'])
    if DatabaseConfig.mode == 'mysql':
        from dbgenerator.database import MySQLPackageDB
        database = MySQLPackageDB(DatabaseConfig.settings['mysql_rw'])
    if database is None:
        print "choose database in core.py first"
        sys.exit(1)

    return (backend, database)


def latest_changelog(full_changelog):
    """return latest message from changelog file"""

    changelog = ""
    if full_changelog != "No ChangeLog file there":
        partimestamp = full_changelog.split("\n\n")
        i = 0
        while True:
            tmp_changelog = partimestamp[i]
            # Some changelogs have extra comments at the top
            # We ignore those
            if tmp_changelog.startswith("#"):
                i += 1
                continue
            else:
                break
        if tmp_changelog.startswith("*"):
            tmp_changelog += "\n" + partimestamp[i+1]
        changelog = tmp_changelog.replace("  ", "")

    return changelog

def process_metadata(backend, database, cat, pn):
    """Fetch and store the metadata for the given CP/PN"""
    # get general metadata
    description, homepage, pkglicense = \
            backend.get_app_metadata((cat, pn))

    full_changelog = backend.get_changelog((cat, pn))
    changelog = latest_changelog(full_changelog)
    (mtime, sha1) = backend.get_changelog_meta((cat, pn))

    #store metadata
    database.add_metadata(cat, pn,
            description, homepage,
            pkglicense, changelog, 
            mtime, sha1)

def cleanup_database(database, old_cps, old_cpvs):
    for cpi in old_cps:
        old_cpvs.update(database.child_cpv(cpi))
        database.del_metadata(cpi)
        database.del_packages(cpi)
    for cpvi in old_cpvs:
        database.del_keywords(cpvi)
        database.del_verbumps(cpvi)
        database.del_versions(cpvi)
    database.commit()

def main():
    """build new / refresh current package database"""

    (backend, database) = init_front_back()

    not_init = not database.initdb

    old_cps = set(database.all_cp())
    old_cpvs = set(database.all_cpv())
    new_cps = set([])
    new_cpvs = set([])
    print 'Total CP=%d CPV=%d' % (len(old_cps), len(old_cpvs))

    #iter over all packages
    for (cat, pn) in backend.all_packages_list:
        # Were more detailed changes actually present?
        changed = False
        #print "Starting on %s/%s" % (cat, pn)

        cpi = database.find_cp(cat, pn)
        # Check for the changelog changing
        (changelog_mtime, changelog_sha1) = \
                backend.get_changelog_meta((cat, pn))
        (dummy, changelog_mtime_old, changelog_sha1_old) = \
                database.get_changelog(cat, pn)
        if changelog_mtime == changelog_mtime_old and \
                changelog_sha1 == changelog_sha1_old:
            if cpi and cpi in old_cps:
                old_cps.discard(cpi)
                old_cpvs.difference_update(database.child_cpv(cpi))
            #print "Skipping %s/%s: No changelog changes" % (cat, pn)
            continue

        # iter over ebuilds for keyword data
        versions = backend.get_package_keywords_dict((cat, pn))
        for pv, keywords_dict, mtime, sha1 in versions:
            if not_init:
                cpv = database.find_cpv(cat, pn, pv)
                if cpv:
                    (cpvi, dummy, oldsha1) = cpv
                else:
                    (cpvi, dummy, oldsha1) = (None, None, None)
                if oldsha1 == sha1:
                    if cpvi in old_cpvs:
                        old_cpvs.discard(cpvi)
                    #print "Skipping %s/%s: No CPV SHA1 changes" % (cat, pn)
                    continue
            changed = True
            (cpi, cpvi, dummy, dummy) = \
                database.add_version(cat, pn, pv, mtime, sha1)
            database.add_keywords(cat, pn, pv, keywords_dict)
            if cpvi in old_cpvs:
                old_cpvs.discard(cpvi)
            else:
                new_cpvs.add(cpvi)

        if cpi:
            if cpi in old_cps:
                old_cps.discard(cpi)
            else:
                new_cps.add(cpi)

        if not changed:
            #print "Skipping %s/%s" % (cat, pn)
            continue

        print "Working on %s/%s" % (cat, pn)
        process_metadata(backend, database, cat, pn)
        database.commit()
    
    # We need to assume that package.mask has always changed
    # and re-process keywords for every atom mentioned therein
    # Costs 14 seconds here at the moment
    # TODO: detect if it has actually changed
    if not_init:
        for (cat, pn) in backend.pmask_packages_list:
            versions = backend.get_package_keywords_dict((cat, pn))
            for pv, keywords_dict, mtime, sha1 in versions:
                database.add_keywords(cat, pn, pv, keywords_dict)

    # Clean up old stuff
    #print "old_cps", old_cps
    #print "new_cps", new_cps
    #print "old_cpvs", old_cpvs
    #print "new_cpvs", new_cpvs
    cleanup_database(database, old_cps, old_cpvs)

if __name__ == "__main__":
    main()

# vim:ts=4 et ft=python: