Support reading multiple CSVs in csv-to-json.py

This commit is contained in:
Oliver Jowett 2016-09-10 17:26:55 +01:00
parent f45430e3b5
commit 2a3e4a0a0d

View file

@ -8,30 +8,38 @@
import sqlite3, json, sys, csv import sqlite3, json, sys, csv
from contextlib import closing from contextlib import closing
def extract(infile, todir, blocklimit, debug): def readcsv(name, infile, blocks):
ac_count = 0 print >>sys.stderr, 'Reading from', name
block_count = 0
blocks = {} if len(blocks) == 0:
for i in xrange(16): for i in xrange(16):
blocks['%01X' % i] = {} blocks['%01X' % i] = {}
ac_count = 0
reader = csv.DictReader(infile) reader = csv.DictReader(infile)
if not 'icao24' in reader.fieldnames: if not 'icao24' in reader.fieldnames:
raise RuntimeError('CSV should have at least an "icao24" column') raise RuntimeError('CSV should have at least an "icao24" column')
for row in reader: for row in reader:
icao24 = row['icao24'] icao24 = row['icao24']
bkey = icao24[0:1].upper() entry = {}
dkey = icao24[1:].upper()
blocks[bkey][dkey] = {}
for k,v in row.items(): for k,v in row.items():
if k != 'icao24' and v != '': if k != 'icao24' and v != '':
blocks[bkey][dkey][k] = v entry[k] = v
if len(entry) > 0:
ac_count += 1 ac_count += 1
print >>sys.stderr, 'Read', ac_count, 'aircraft' bkey = icao24[0:1].upper()
dkey = icao24[1:].upper()
blocks[bkey].setdefault(dkey, {}).update(entry)
print >>sys.stderr, 'Read', ac_count, 'aircraft from', name
def writedb(blocks, todir, blocklimit, debug):
block_count = 0
print >>sys.stderr, 'Writing blocks:', print >>sys.stderr, 'Writing blocks:',
queue = sorted(blocks.keys()) queue = sorted(blocks.keys())
@ -88,13 +96,19 @@ def extract(infile, todir, blocklimit, debug):
if __name__ == '__main__': if __name__ == '__main__':
if len(sys.argv) < 3: if len(sys.argv) < 3:
print >>sys.stderr, 'Reads a CSV file with aircraft information and produces a directory of JSON files' print >>sys.stderr, 'Reads a CSV file with aircraft information and produces a directory of JSON files'
print >>sys.stderr, 'Syntax: %s <path to CSV> <path to DB dir>' % sys.argv[0] print >>sys.stderr, 'Syntax: %s <path to CSV> [... additional CSV files ...] <path to DB dir>' % sys.argv[0]
print >>sys.stderr, 'Use "-" as the CSV path to read from stdin' print >>sys.stderr, 'Use "-" as the CSV path to read from stdin'
print >>sys.stderr, 'If multiple CSV files are specified and they provide conflicting data'
print >>sys.stderr, 'then the data from the last-listed CSV file is used'
sys.exit(1) sys.exit(1)
blocks = {}
for filename in sys.argv[1:-1]:
if filename == '-':
readcsv('stdin', sys.stdin, blocks)
else: else:
if sys.argv[1] == '-': with closing(open(filename, 'r')) as infile:
extract(sys.stdin, sys.argv[2], 1000, False) readcsv(filename, infile, blocks)
else:
with closing(open(sys.argv[1], 'r')) as infile: writedb(blocks, sys.argv[-1], 1000, False)
extract(infile, sys.argv[2], 1000, False)
sys.exit(0) sys.exit(0)