From 35299c47babc177cf939f59b09d3c5a8ed950f9d Mon Sep 17 00:00:00 2001 From: Oliver Jowett Date: Fri, 16 Aug 2019 15:16:55 +0800 Subject: [PATCH] Have filter-regs emit a special value "-COMPUTED-" for filtered data values, rather than blanking them out entirely. This lets csv-to-json.py handle the case where an earlier input file has (e.g.) a registration value which does not match the computed value, and then a later input file has a value that does match. In this case we want to override the older value with the newer value, then notice that the registration can be omitted when writing the database. Previously in this case the older (incorrect) value would be used. --- tools/csv-to-json.py | 11 +++++++++++ tools/filter-regs.js | 20 +++++++++++++------- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/tools/csv-to-json.py b/tools/csv-to-json.py index a9a3209..6f5d54e 100755 --- a/tools/csv-to-json.py +++ b/tools/csv-to-json.py @@ -37,6 +37,16 @@ def readcsv(name, infile, blocks): print >>sys.stderr, 'Read', ac_count, 'aircraft from', name +def cleandb(blocks): + for blockdata in blocks.values(): + for dkey in list(blockdata.keys()): + block = blockdata[dkey] + for key in list(block.keys()): + if block[key] == '-COMPUTED-': + del block[key] + if len(block) == 0: + del blockdata[dkey] + def writedb(blocks, todir, blocklimit, debug): block_count = 0 @@ -110,5 +120,6 @@ if __name__ == '__main__': with closing(open(filename, 'r')) as infile: readcsv(filename, infile, blocks) + cleandb(blocks) writedb(blocks, sys.argv[-1], 2500, False) sys.exit(0) diff --git a/tools/filter-regs.js b/tools/filter-regs.js index fa8fa1b..ce0456d 100644 --- a/tools/filter-regs.js +++ b/tools/filter-regs.js @@ -3,10 +3,16 @@ // ICAO addresses (column 'icao24') and registrations // (column 'r') // -// It removes all registration entries that exactly match -// what dump1090 would have computed from the hexid anyway, -// reducing the size of the CSV in the cases where the -// two approaches match. +// It replaces all registration entries that exactly match +// what dump1090 would have computed from the hexid anyway +// with the special value "-COMPUTED-"; these values are +// removed from the final data when csv-to-json.py writes +// the json database, reducing the size of the database +// in the cases where the two approaches match. +// +// It does a similar transformation on 'desc' (aircraft type +// description) and 'wtc' (wake turbulence categogy) based +// on the aircraft type designator in 't', if provided. // // Any additional columns are passed through unchanged. // @@ -28,7 +34,7 @@ var transformer = csv.transform(function (record, callback) { if ('r' in record && record.r != '') { var computed = reglookup(record.icao24); if (computed === record.r) { - record.r = ''; + record.r = '-COMPUTED-'; } else if (computed !== null) { console.warn(record.icao24 + " computed registration " + computed + " but CSV data had " + record.r); } @@ -38,7 +44,7 @@ var transformer = csv.transform(function (record, callback) { if ('desc' in record && record.desc != '') { var computed_desc = actypes[record.t].desc; if (computed_desc === record.desc) { - record.desc = ''; + record.desc = '-COMPUTED-'; } else if (computed_desc !== undefined) { // too noisy, the icao descriptors are very coarse and reality often disagrees //console.warn(record.icao24 + " (" + record.t + "): computed type description " + computed_desc + " but CSV data had " + record.desc); @@ -48,7 +54,7 @@ var transformer = csv.transform(function (record, callback) { if ('wtc' in record && record.wtc != '') { var computed_wtc = actypes[record.t].wtc; if (computed_wtc === record.wtc) { - record.wtc = ''; + record.wtc = '-COMPUTED-'; } else if (computed_desc !== undefined) { //console.warn(record.icao24 + " (" + record.t + "): computed type WTC " + computed_wtc + " but CSV data had " + record.wtc); }