Avoid creating small child blocks, keep data in the parent where possible.

This reduces the number of blocks from 151 to 61.
This commit is contained in:
Oliver Jowett 2015-02-24 23:04:26 +00:00
parent da2fff8531
commit 8209267301
2 changed files with 34 additions and 14 deletions

View file

@ -47,20 +47,19 @@ function request_from_db(icao, level, defer) {
req.done(function(data) { req.done(function(data) {
var subkey; var subkey;
if ("children" in data) {
subkey = bkey + dkey.substring(0,1);
if (data.children.indexOf(subkey) == -1) {
defer.reject();
} else {
request_from_db(icao, level+1, defer);
}
} else {
if (dkey in data) { if (dkey in data) {
defer.resolve(data[dkey]); defer.resolve(data[dkey]);
} else { return;
}
if ("children" in data) {
subkey = bkey + dkey.substring(0,1);
if (data.children.indexOf(subkey) != -1) {
request_from_db(icao, level+1, defer);
return;
}
}
defer.reject(); defer.reject();
}
}
}); });
req.fail(function(jqXHR,textStatus,errorThrown) { req.fail(function(jqXHR,textStatus,errorThrown) {

View file

@ -36,17 +36,38 @@ def extract(dbfile, todir, blocklimit):
blockdata = blocks[bkey] blockdata = blocks[bkey]
if len(blockdata) > blocklimit: if len(blockdata) > blocklimit:
print 'Splitting block', bkey, 'with', len(blockdata), 'entries..', print 'Splitting block', bkey, 'with', len(blockdata), 'entries..',
# split all children out
children = {} children = {}
for dkey in blockdata.keys(): for dkey in blockdata.keys():
new_bkey = bkey + dkey[0] new_bkey = bkey + dkey[0]
new_dkey = dkey[1:] new_dkey = dkey[1:]
if new_bkey not in children: blocks[new_bkey] = children[new_bkey] = {} if new_bkey not in children: children[new_bkey] = {}
children[new_bkey][new_dkey] = blockdata[dkey] children[new_bkey][new_dkey] = blockdata[dkey]
print len(children), 'children' # look for small children we can retain in the parent, to
queue.extend(children.keys()) # reduce the total number of files needed. This reduces the
blockdata = blocks[bkey] = { 'children' : sorted(children.keys()) } # number of blocks needed from 150 to 61
blockdata = {}
children = list(children.items())
children.sort(lambda x,y: cmp(len(x[1]), len(y[1])))
retained = 1
while len(children[0][1]) + retained < blocklimit:
# move this child back to the parent
c_bkey, c_entries = children[0]
for c_dkey, entry in c_entries.items():
blockdata[c_bkey[-1] + c_dkey] = entry
retained += 1
del children[0]
print len(children), 'children created,', len(blockdata), 'entries retained in parent'
blockdata['children'] = sorted([x[0] for x in children])
blocks[bkey] = blockdata
for c_bkey, c_entries in children:
blocks[c_bkey] = c_entries
queue.append(c_bkey)
path = todir + '/' + bkey + '.json' path = todir + '/' + bkey + '.json'
print 'Writing', len(blockdata), 'entries to', path print 'Writing', len(blockdata), 'entries to', path