From f64b3f986f0d68067dec7267df4e8c6a42c22b3a Mon Sep 17 00:00:00 2001 From: Alessandro Toppi Date: Mon, 7 Dec 2020 11:15:25 +0100 Subject: [PATCH] [janus-pp-rec] Drop audio RTP silence suppression packets. (#2467) --- postprocessing/janus-pp-rec.1 | 2 ++ postprocessing/janus-pp-rec.c | 47 ++++++++++++++++++++++++++++++++- postprocessing/janus-pp-rec.ggo | 1 + postprocessing/pp-g711.c | 11 ++++---- postprocessing/pp-g722.c | 13 +++++---- postprocessing/pp-opus.c | 1 - 6 files changed, 60 insertions(+), 15 deletions(-) diff --git a/postprocessing/janus-pp-rec.1 b/postprocessing/janus-pp-rec.1 index 45efa9684f..caa367a54a 100644 --- a/postprocessing/janus-pp-rec.1 +++ b/postprocessing/janus-pp-rec.1 @@ -59,6 +59,8 @@ For mp4 files write the MOOV atom at the head of the file (default=off) .TP .BR \-S ", " \-\-audioskew=milliseconds Time threshold to trigger an audio skew compensation, disabled if 0 (default=0) +.BR \-C ", " \-\-silence-distance=count +RTP packets distance used to detect RTP silence suppression, disabled if 0 (default=100) .SH EXAMPLES \fBjanus-pp-rec \-\-header rec1234.mjr\fR \- Parse the recordings header (shows metadata info) .TP diff --git a/postprocessing/janus-pp-rec.c b/postprocessing/janus-pp-rec.c index 3c7ff56eb0..6d7761f4a7 100644 --- a/postprocessing/janus-pp-rec.c +++ b/postprocessing/janus-pp-rec.c @@ -73,6 +73,8 @@ Usage: janus-pp-rec [OPTIONS] source.mjr [destination.[opus|wav|webm|mp4|srt]] of the file (default=off) -S, --audioskew=milliseconds Time threshold to trigger an audio skew compensation, disabled if 0 (default=0) + -C, --silence-distance=count RTP packets distance used to detect RTP silence + suppression, disabled if 0 (default=100) \endverbatim * * \note This utility does not do any form of transcoding. It just @@ -132,6 +134,8 @@ static int ignore_first_packets = 0; #define DEFAULT_AUDIO_SKEW_TH 0 static int audioskew_th = DEFAULT_AUDIO_SKEW_TH; +#define DEFAULT_SILENCE_DISTANCE 100 +static int silence_distance = DEFAULT_SILENCE_DISTANCE; /* Signal handler */ static void janus_pp_handle_signal(int signum) { @@ -223,6 +227,11 @@ int main(int argc, char *argv[]) if(val >= 0) audioskew_th = val; } + if(args_info.silence_distance_given || (g_getenv("JANUS_PPREC_SILENCE_DISTANCE") != NULL)) { + int val = args_info.silence_distance_given ? args_info.silence_distance_arg : atoi(g_getenv("JANUS_PPREC_SILENCE_DISTANCE")); + if(val >= 0) + silence_distance = val; + } /* Evaluate arguments to find source and target */ char *source = NULL, *destination = NULL, *setting = NULL; @@ -244,7 +253,8 @@ int main(int argc, char *argv[]) (strcmp(setting, "-v")) && (strcmp(setting, "--videoorient-ext")) && (strcmp(setting, "-d")) && (strcmp(setting, "--debug-level")) && (strcmp(setting, "-f")) && (strcmp(setting, "--format")) && - (strcmp(setting, "-S")) && (strcmp(setting, "--audioskew")) + (strcmp(setting, "-S")) && (strcmp(setting, "--audioskew")) && + (strcmp(setting, "-C")) && (strcmp(setting, "--silence-distance")) )) { if(source == NULL) source = argv[i]; @@ -274,6 +284,8 @@ int main(int argc, char *argv[]) JANUS_LOG(LOG_INFO, "Audio level extension ID: %d\n", audio_level_extmap_id); if(video_orient_extmap_id > 0) JANUS_LOG(LOG_INFO, "Video orientation extension ID: %d\n", video_orient_extmap_id); + if(silence_distance > 0) + JANUS_LOG(LOG_INFO, "RTP silence suppression distance: %d\n", silence_distance); JANUS_LOG(LOG_INFO, "\n"); if(source != NULL) JANUS_LOG(LOG_INFO, "Source file: %s\n", source); @@ -616,12 +628,15 @@ int main(int argc, char *argv[]) } /* Now let's parse the frames and order them */ uint32_t pkt_ts = 0, highest_rtp_ts = 0; + uint16_t highest_seq = 0; /* Start from 1 to take into account late packets */ int times_resetted = 1; uint64_t max32 = UINT32_MAX; int ignored = 0; offset = 0; gboolean started = FALSE; + /* DTX stuff */ + gboolean dtx_on = FALSE; /* Extensions, if any */ int audiolevel = 0, rotation = 0, last_rotation = -1, rotated = -1; uint16_t rtp_header_len, rtp_read_n; @@ -801,8 +816,37 @@ int main(int argc, char *argv[]) /* Simple enough... */ started = TRUE; highest_rtp_ts = rtp_ts; + highest_seq = p->seq; p->ts = (times_resetted*max32)+rtp_ts; } else { + if(!video && !data) { + if(dtx_on) { + /* Leaving DTX mode (RTP started flowing again) */ + dtx_on = FALSE; + JANUS_LOG(LOG_WARN, "Leaving RTP silence suppression (seq=%"SCNu16", rtp_ts=%"SCNu32")\n", ntohs(rtp->seq_number), rtp_ts); + } else if(rtp->markerbit == 1) { + /* Try to detect RTP silence suppression */ + int32_t seq_distance = abs((int16_t)(p->seq - highest_seq)); + if(seq_distance < silence_distance) { + /* Consider 20 ms audio packets */ + int32_t inter_rtp_ts = opus ? 960 : 160; + int32_t expected_rtp_distance = inter_rtp_ts * seq_distance; + int32_t rtp_distance = abs((int32_t)(rtp_ts - highest_rtp_ts)); + if(rtp_distance > 10 * expected_rtp_distance) { + /* Entering DTX mode (RTP will stop) */ + dtx_on = TRUE; + /* This is a close packet with not coherent RTP ts -> silence suppression */ + JANUS_LOG(LOG_WARN, "Dropping audio RTP silence suppression (seq_distance=%d, rtp_distance=%d)\n", seq_distance, rtp_distance); + /* Skip data */ + offset += len; + count++; + g_free(p); + continue; + } + } + } + } + /* Is the new timestamp smaller than the next one, and if so, is it a timestamp reset or simply out of order? */ gboolean pre_reset_pkt = FALSE; @@ -814,6 +858,7 @@ int main(int argc, char *argv[]) times_resetted++; } highest_rtp_ts = rtp_ts; + highest_seq = p->seq; } /* Out-of-order packet */ diff --git a/postprocessing/janus-pp-rec.ggo b/postprocessing/janus-pp-rec.ggo index 4a3dcd946e..22a81dd58e 100644 --- a/postprocessing/janus-pp-rec.ggo +++ b/postprocessing/janus-pp-rec.ggo @@ -14,3 +14,4 @@ option "disable-colors" o "Disable color in the logging" flag off option "format" f "Specifies the output format (overrides the format from the destination)" string values="opus", "wav", "webm", "mp4", "srt" optional option "faststart" t "For mp4 files write the MOOV atom at the head of the file" flag off option "audioskew" S "Time threshold to trigger an audio skew compensation, disabled if 0 (default=0)" int typestr="milliseconds" optional +option "silence-distance" C "RTP packets distance used to detect RTP silence suppression, disabled if 0 (default=100)" int typestr="count" optional diff --git a/postprocessing/pp-g711.c b/postprocessing/pp-g711.c index 781198ed18..d6d78aee99 100644 --- a/postprocessing/pp-g711.c +++ b/postprocessing/pp-g711.c @@ -162,13 +162,12 @@ int janus_pp_g711_process(FILE *file, janus_pp_frame_packet *list, int *working) memset(samples, 0, sizeof(samples)); size_t num_samples = 160; while(*working && tmp != NULL) { - if(tmp->prev != NULL && (tmp->seq - tmp->prev->seq > 1)) { + if(tmp->prev != NULL && ((tmp->ts - tmp->prev->ts)/8/20 > 1)) { JANUS_LOG(LOG_WARN, "Lost a packet here? (got seq %"SCNu16" after %"SCNu16", time ~%"SCNu64"s)\n", - tmp->seq, tmp->prev->seq, (tmp->ts-list->ts)/48000); - /* FIXME Write the silence packet N times to fill in the gaps */ + tmp->seq, tmp->prev->seq, (tmp->ts-list->ts)/8000); + int silence_count = (tmp->ts - tmp->prev->ts)/8/20 - 1; int i=0; - for(i=0; i<(tmp->seq-tmp->prev->seq-1); i++) { - /* FIXME We should actually also look at the timestamp differences */ + for(i=0; iprev->seq+i+1, i+1); /* Add silence */ @@ -183,7 +182,7 @@ int janus_pp_g711_process(FILE *file, janus_pp_frame_packet *list, int *working) } if(tmp->drop) { /* We marked this packet as one to drop, before */ - JANUS_LOG(LOG_WARN, "Dropping previously marked audio packet (time ~%"SCNu64"s)\n", (tmp->ts-list->ts)/48000); + JANUS_LOG(LOG_WARN, "Dropping previously marked audio packet (time ~%"SCNu64"s)\n", (tmp->ts-list->ts)/8000); tmp = tmp->next; continue; } diff --git a/postprocessing/pp-g722.c b/postprocessing/pp-g722.c index ac6a7309eb..b03670d697 100644 --- a/postprocessing/pp-g722.c +++ b/postprocessing/pp-g722.c @@ -138,18 +138,17 @@ int janus_pp_g722_process(FILE *file, janus_pp_frame_packet *list, int *working) uint8_t *buffer = g_malloc0(1500); int16_t samples[1500]; memset(samples, 0, sizeof(samples)); + uint num_samples = 320; while(*working && tmp != NULL) { - if(tmp->prev != NULL && (tmp->seq - tmp->prev->seq > 1)) { + if(tmp->prev != NULL && ((tmp->ts - tmp->prev->ts)/8/20 > 1)) { JANUS_LOG(LOG_WARN, "Lost a packet here? (got seq %"SCNu16" after %"SCNu16", time ~%"SCNu64"s)\n", - tmp->seq, tmp->prev->seq, (tmp->ts-list->ts)/48000); - /* FIXME Write the silence packet N times to fill in the gaps */ + tmp->seq, tmp->prev->seq, (tmp->ts-list->ts)/8000); + int silence_count = (tmp->ts - tmp->prev->ts)/8/20 - 1; int i=0; - for(i=0; i<(tmp->seq-tmp->prev->seq-1); i++) { - /* FIXME We should actually also look at the timestamp differences */ + for(i=0; iprev->seq+i+1, i+1); /* Add silence */ - uint num_samples = 320; memset(samples, 0, num_samples*2); if(wav_file != NULL) { if(fwrite(samples, sizeof(uint16_t), num_samples, wav_file) != num_samples) { @@ -161,7 +160,7 @@ int janus_pp_g722_process(FILE *file, janus_pp_frame_packet *list, int *working) } if(tmp->drop) { /* We marked this packet as one to drop, before */ - JANUS_LOG(LOG_WARN, "Dropping previously marked audio packet (time ~%"SCNu64"s)\n", (tmp->ts-list->ts)/48000); + JANUS_LOG(LOG_WARN, "Dropping previously marked audio packet (time ~%"SCNu64"s)\n", (tmp->ts-list->ts)/8000); tmp = tmp->next; continue; } diff --git a/postprocessing/pp-opus.c b/postprocessing/pp-opus.c index 459d51b0f1..5dede3ef42 100644 --- a/postprocessing/pp-opus.c +++ b/postprocessing/pp-opus.c @@ -76,7 +76,6 @@ int janus_pp_opus_process(FILE *file, janus_pp_frame_packet *list, int *working) if(tmp->prev != NULL && ((tmp->ts - tmp->prev->ts)/48/20 > 1)) { JANUS_LOG(LOG_WARN, "Lost a packet here? (got seq %"SCNu16" after %"SCNu16", time ~%"SCNu64"s)\n", tmp->seq, tmp->prev->seq, (tmp->ts-list->ts)/48000); - /* FIXME Write the silence packet N times to fill in the gaps */ ogg_packet *op = op_from_pkt((const unsigned char *)opus_silence, sizeof(opus_silence)); /* use ts differ to insert silence packet */ int silence_count = (tmp->ts - tmp->prev->ts)/48/20 - 1;