From 5b40377b98d7a84f4b8cefca0966975651eb41c7 Mon Sep 17 00:00:00 2001 From: Oliver Jowett Date: Fri, 23 Jan 2015 01:22:22 +0000 Subject: [PATCH] Measure CPU for demodulator, read thread, and background tasks individually. --- dump1090.c | 63 +++++++++++++++++++++++++++++++++++++++++------------- dump1090.h | 1 + net_io.c | 10 +++++---- stats.c | 37 ++++++++++++++++++++++---------- stats.h | 9 ++++++-- 5 files changed, 88 insertions(+), 32 deletions(-) diff --git a/dump1090.c b/dump1090.c index 09834e3..f13a9ee 100644 --- a/dump1090.c +++ b/dump1090.c @@ -80,6 +80,22 @@ void sigWinchCallback() { #else int getTermRows() { return MODES_INTERACTIVE_ROWS;} #endif + +static void start_cpu_timing(struct timespec *start_time) +{ + clock_gettime(CLOCK_THREAD_CPUTIME_ID, start_time); +} + +static void end_cpu_timing(const struct timespec *start_time, struct timespec *add_to) +{ + struct timespec end_time; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end_time); + add_to->tv_sec += (end_time.tv_sec - start_time->tv_sec - 1); + add_to->tv_nsec += (1000000000L + end_time.tv_nsec - start_time->tv_nsec); + add_to->tv_sec += add_to->tv_nsec / 1000000000L; + add_to->tv_nsec = add_to->tv_nsec % 1000000000L; +} + // // =============================== Initialization =========================== // @@ -326,6 +342,9 @@ int modesInitRTLSDR(void) { // // A Mutex is used to avoid races with the decoding thread. // + +static struct timespec reader_thread_start; + void rtlsdrCallback(unsigned char *buf, uint32_t len, void *ctx) { MODES_NOTUSED(ctx); @@ -358,6 +377,10 @@ void rtlsdrCallback(unsigned char *buf, uint32_t len, void *ctx) { Modes.iDataReady = (MODES_ASYNC_BUF_NUMBER-1); Modes.iDataLost++; } + + // accumulate CPU while holding the mutex, and restart measurement + end_cpu_timing(&reader_thread_start, &Modes.reader_cpu_accumulator); + start_cpu_timing(&reader_thread_start); // Signal to the other thread that new data is ready, and unlock pthread_cond_signal(&Modes.data_cond); @@ -415,6 +438,10 @@ void readDataFromFile(void) { Modes.iDataIn = (MODES_ASYNC_BUF_NUMBER-1) & (Modes.iDataIn + 1); Modes.iDataReady = (MODES_ASYNC_BUF_NUMBER-1) & (Modes.iDataIn - Modes.iDataOut); + // accumulate CPU while holding the mutex, and restart measurement + end_cpu_timing(&reader_thread_start, &Modes.reader_cpu_accumulator); + start_cpu_timing(&reader_thread_start); + // Signal to the other thread that new data is ready pthread_cond_signal(&Modes.data_cond); } @@ -432,6 +459,8 @@ void readDataFromFile(void) { void *readerThreadEntryPoint(void *arg) { MODES_NOTUSED(arg); + start_cpu_timing(&reader_thread_start); // we accumulate in rtlsdrCallback() or readDataFromFile() + if (Modes.filename == NULL) { while (!Modes.exit) { rtlsdr_read_async(Modes.dev, rtlsdrCallback, NULL, @@ -967,11 +996,17 @@ int main(int argc, char **argv) { // If the user specifies --net-only, just run in order to serve network // clients without reading data from the RTL device while (Modes.net_only) { + struct timespec start_time; + if (Modes.exit) { display_total_stats(); exit(0); // If we exit net_only nothing further in main() } + + start_cpu_timing(&start_time); backgroundTasks(); + end_cpu_timing(&start_time, &Modes.stats_current.background_cpu); + usleep(100000); } @@ -980,16 +1015,22 @@ int main(int argc, char **argv) { pthread_mutex_lock(&Modes.data_mutex); while (Modes.exit == 0) { - struct timespec cpu_start_time, cpu_end_time; + struct timespec start_time; if (Modes.iDataReady == 0) { pthread_cond_wait(&Modes.data_cond,&Modes.data_mutex); // This unlocks Modes.data_mutex, and waits for Modes.data_cond continue; // Once (Modes.data_cond) occurs, it locks Modes.data_mutex } + // copy out reader CPU time and reset it + add_timespecs(&Modes.reader_cpu_accumulator, &Modes.stats_current.reader_cpu, &Modes.stats_current.reader_cpu); + Modes.reader_cpu_accumulator.tv_sec = 0; + Modes.reader_cpu_accumulator.tv_nsec = 0; + // Modes.data_mutex is Locked, and (Modes.iDataReady != 0) if (Modes.iDataReady) { // Check we have new data, just in case!! - + start_cpu_timing(&start_time); + Modes.iDataOut &= (MODES_ASYNC_BUF_NUMBER-1); // Just incase // Translate the next lot of I/Q samples into Modes.magnitude @@ -1016,36 +1057,28 @@ int main(int argc, char **argv) { // thread can read data while we perform computationally expensive // stuff at the same time. - clock_gettime(CLOCK_THREAD_CPUTIME_ID, &cpu_start_time); - if (Modes.oversample) demodulate2400(Modes.magnitude, MODES_ASYNC_BUF_SAMPLES); else demodulate2000(Modes.magnitude, MODES_ASYNC_BUF_SAMPLES); - clock_gettime(CLOCK_THREAD_CPUTIME_ID, &cpu_end_time); - Modes.stats_current.cputime.tv_sec += (cpu_end_time.tv_sec - cpu_start_time.tv_sec); - Modes.stats_current.cputime.tv_nsec += (cpu_end_time.tv_nsec - cpu_start_time.tv_nsec); - if (Modes.stats_current.cputime.tv_nsec < 0) { - Modes.stats_current.cputime.tv_nsec += 1000000000L; - Modes.stats_current.cputime.tv_sec--; - } else if (Modes.stats_current.cputime.tv_nsec > 1000000000L) { - Modes.stats_current.cputime.tv_nsec -= 1000000000L; - Modes.stats_current.cputime.tv_sec++; - } - // Update the timestamp ready for the next block if (Modes.oversample) Modes.timestampBlk += (MODES_ASYNC_BUF_SAMPLES*5); else Modes.timestampBlk += (MODES_ASYNC_BUF_SAMPLES*6); Modes.stats_current.blocks_processed++; + + end_cpu_timing(&start_time, &Modes.stats_current.demod_cpu); } else { pthread_cond_signal (&Modes.data_cond); pthread_mutex_unlock(&Modes.data_mutex); } + start_cpu_timing(&start_time); backgroundTasks(); + end_cpu_timing(&start_time, &Modes.stats_current.background_cpu); + pthread_mutex_lock(&Modes.data_mutex); } diff --git a/dump1090.h b/dump1090.h index 55fcf8e..ebe3959 100644 --- a/dump1090.h +++ b/dump1090.h @@ -289,6 +289,7 @@ struct { // Internal state int iDataOut; // Fifo output pointer int iDataReady; // Fifo content count int iDataLost; // Count of missed buffers + struct timespec reader_cpu_accumulator; // CPU time used by the reader thread, copied out and reset by the main thread under the mutex int trailing_samples;// extra trailing samples in magnitude buffer diff --git a/net_io.c b/net_io.c index 16354e7..9fc434c 100644 --- a/net_io.c +++ b/net_io.c @@ -827,14 +827,12 @@ static char * appendStatsJson(char *p, p += snprintf(p, end-p, ",\"local\":{\"blocks_processed\":%u" ",\"blocks_dropped\":%u" - ",\"cputime\":%d.%09ld" ",\"modeac\":%u" ",\"modes\":%u" ",\"bad\":%u" ",\"unknown_icao\":%u", st->blocks_processed, st->blocks_dropped, - (int)st->cputime.tv_sec, (long)st->cputime.tv_nsec, st->demod_modeac, st->demod_preambles, st->demod_rejected_bad, @@ -884,14 +882,18 @@ static char * appendStatsJson(char *p, ",\"global_skipped\":%u" ",\"local_ok\":%u" ",\"local_skipped\":%u" - ",\"filtered\":%u" - "},\"messages\":%u}", + ",\"filtered\":%u}" + ",\"cpu\":{\"demod\":%d.%09ld,\"reader\":%d.%09ld,\"background\":%d.%09ld}" + ",\"messages\":%u}", st->cpr_global_ok, st->cpr_global_bad, st->cpr_global_skipped, st->cpr_local_ok, st->cpr_local_skipped, st->cpr_filtered, + (int)st->demod_cpu.tv_sec, (long)st->demod_cpu.tv_nsec, + (int)st->reader_cpu.tv_sec, (long)st->reader_cpu.tv_nsec, + (int)st->background_cpu.tv_sec, (long)st->background_cpu.tv_nsec, st->messages_total); return p; diff --git a/stats.c b/stats.c index 717dd76..fff52ed 100644 --- a/stats.c +++ b/stats.c @@ -49,6 +49,14 @@ #include "dump1090.h" +void add_timespecs(const struct timespec *x, const struct timespec *y, struct timespec *z) +{ + z->tv_sec = x->tv_sec + y->tv_sec; + z->tv_nsec = x->tv_nsec + y->tv_nsec; + z->tv_sec += z->tv_nsec / 1000000000L; + z->tv_nsec = z->tv_nsec % 1000000000L; +} + void display_stats(struct stats *st) { int j; struct tm tm_start, tm_end; @@ -70,13 +78,6 @@ void display_stats(struct stats *st) { printf(" %u sample blocks processed\n", st->blocks_processed); printf(" %u sample blocks dropped\n", st->blocks_dropped); - if (st->blocks_processed > 0) { - long cpu_millis = (long)st->cputime.tv_sec*1000L + st->cputime.tv_nsec/1000000L; - long sample_millis = (long) ((uint64_t)st->blocks_processed * MODES_ASYNC_BUF_SAMPLES / (Modes.oversample ? 2400 : 2000)); - printf(" %ld ms CPU time used to process %ld ms samples, %.1f%% load\n", - cpu_millis, sample_millis, 100.0 * cpu_millis / sample_millis); - } - printf(" %u Mode A/C messages received\n", st->demod_modeac); printf(" %u Mode-S message preambles received\n", st->demod_preambles); printf(" %u with bad message format or invalid CRC\n", st->demod_rejected_bad); @@ -132,6 +133,21 @@ void display_stats(struct stats *st) { if (Modes.net && Modes.net_http_port) printf("%d HTTP requests\n", st->http_requests); + { + uint64_t demod_cpu_millis = (uint64_t)st->demod_cpu.tv_sec*1000UL + st->demod_cpu.tv_nsec/1000000UL; + uint64_t reader_cpu_millis = (uint64_t)st->reader_cpu.tv_sec*1000UL + st->reader_cpu.tv_nsec/1000000UL; + uint64_t background_cpu_millis = (uint64_t)st->background_cpu.tv_sec*1000UL + st->background_cpu.tv_nsec/1000000UL; + + printf("CPU load: %.1f%%\n" + " %lu ms for demodulation\n" + " %lu ms for reading from USB\n" + " %lu ms for network input and background tasks\n", + 0.1 * (demod_cpu_millis + reader_cpu_millis + background_cpu_millis) / (st->end - st->start + 1), + demod_cpu_millis, + reader_cpu_millis, + background_cpu_millis); + } + fflush(stdout); } @@ -164,10 +180,9 @@ void add_stats(const struct stats *st1, const struct stats *st2, struct stats *t target->blocks_processed = st1->blocks_processed + st2->blocks_processed; target->blocks_dropped = st1->blocks_dropped + st2->blocks_dropped; - target->cputime.tv_sec = st1->cputime.tv_sec + st2->cputime.tv_sec; - target->cputime.tv_nsec = st1->cputime.tv_nsec + st2->cputime.tv_nsec; - target->cputime.tv_sec += target->cputime.tv_nsec / 1000000000L; - target->cputime.tv_nsec %= 1000000000L; + add_timespecs(&st1->demod_cpu, &st2->demod_cpu, &target->demod_cpu); + add_timespecs(&st1->reader_cpu, &st2->reader_cpu, &target->reader_cpu); + add_timespecs(&st1->background_cpu, &st2->background_cpu, &target->background_cpu); // noise floor: target->noise_power_sum = st1->noise_power_sum + st2->noise_power_sum; diff --git a/stats.h b/stats.h index 3cd1061..c878594 100644 --- a/stats.h +++ b/stats.h @@ -63,10 +63,13 @@ struct stats { // Mode A/C demodulator counts: uint32_t demod_modeac; - // timing: uint32_t blocks_processed; uint32_t blocks_dropped; - struct timespec cputime; + + // timing: + struct timespec demod_cpu; + struct timespec reader_cpu; + struct timespec background_cpu; // noise floor: double noise_power_sum; @@ -108,4 +111,6 @@ void add_stats(const struct stats *st1, const struct stats *st2, struct stats *t void display_stats(struct stats *st); void reset_stats(struct stats *st); +void add_timespecs(const struct timespec *x, const struct timespec *y, struct timespec *z); + #endif