From f64b3f986f0d68067dec7267df4e8c6a42c22b3a Mon Sep 17 00:00:00 2001
From: Alessandro Toppi <atoppi@meetecho.com>
Date: Mon, 7 Dec 2020 11:15:25 +0100
Subject: [PATCH] [janus-pp-rec] Drop audio RTP silence suppression packets.
 (#2467)

---
 postprocessing/janus-pp-rec.1   |  2 ++
 postprocessing/janus-pp-rec.c   | 47 ++++++++++++++++++++++++++++++++-
 postprocessing/janus-pp-rec.ggo |  1 +
 postprocessing/pp-g711.c        | 11 ++++----
 postprocessing/pp-g722.c        | 13 +++++----
 postprocessing/pp-opus.c        |  1 -
 6 files changed, 60 insertions(+), 15 deletions(-)

diff --git a/postprocessing/janus-pp-rec.1 b/postprocessing/janus-pp-rec.1
index 45efa9684f..caa367a54a 100644
--- a/postprocessing/janus-pp-rec.1
+++ b/postprocessing/janus-pp-rec.1
@@ -59,6 +59,8 @@ For mp4 files write the MOOV atom at the head of the file  (default=off)
 .TP
 .BR \-S ", " \-\-audioskew=milliseconds
 Time threshold to trigger an audio skew compensation, disabled if 0 (default=0)
+.BR \-C ", " \-\-silence-distance=count
+RTP packets distance used to detect RTP silence suppression, disabled if 0 (default=100)
 .SH EXAMPLES
 \fBjanus-pp-rec \-\-header rec1234.mjr\fR \- Parse the recordings header (shows metadata info)
 .TP
diff --git a/postprocessing/janus-pp-rec.c b/postprocessing/janus-pp-rec.c
index 3c7ff56eb0..6d7761f4a7 100644
--- a/postprocessing/janus-pp-rec.c
+++ b/postprocessing/janus-pp-rec.c
@@ -73,6 +73,8 @@ Usage: janus-pp-rec [OPTIONS] source.mjr [destination.[opus|wav|webm|mp4|srt]]
                                   of the file  (default=off)
   -S, --audioskew=milliseconds  Time threshold to trigger an audio skew
                                   compensation, disabled if 0 (default=0)
+  -C, --silence-distance=count  RTP packets distance used to detect RTP silence
+								  suppression, disabled if 0 (default=100)
 \endverbatim
  *
  * \note This utility does not do any form of transcoding. It just
@@ -132,6 +134,8 @@ static int ignore_first_packets = 0;
 #define DEFAULT_AUDIO_SKEW_TH 0
 static int audioskew_th = DEFAULT_AUDIO_SKEW_TH;
 
+#define DEFAULT_SILENCE_DISTANCE 100
+static int silence_distance = DEFAULT_SILENCE_DISTANCE;
 
 /* Signal handler */
 static void janus_pp_handle_signal(int signum) {
@@ -223,6 +227,11 @@ int main(int argc, char *argv[])
 		if(val >= 0)
 			audioskew_th = val;
 	}
+	if(args_info.silence_distance_given || (g_getenv("JANUS_PPREC_SILENCE_DISTANCE") != NULL)) {
+		int val = args_info.silence_distance_given ? args_info.silence_distance_arg : atoi(g_getenv("JANUS_PPREC_SILENCE_DISTANCE"));
+		if(val >= 0)
+			silence_distance = val;
+	}
 
 	/* Evaluate arguments to find source and target */
 	char *source = NULL, *destination = NULL, *setting = NULL;
@@ -244,7 +253,8 @@ int main(int argc, char *argv[])
 				(strcmp(setting, "-v")) && (strcmp(setting, "--videoorient-ext")) &&
 				(strcmp(setting, "-d")) && (strcmp(setting, "--debug-level")) &&
 				(strcmp(setting, "-f")) && (strcmp(setting, "--format")) &&
-				(strcmp(setting, "-S")) && (strcmp(setting, "--audioskew"))
+				(strcmp(setting, "-S")) && (strcmp(setting, "--audioskew")) &&
+				(strcmp(setting, "-C")) && (strcmp(setting, "--silence-distance"))
 		)) {
 			if(source == NULL)
 				source = argv[i];
@@ -274,6 +284,8 @@ int main(int argc, char *argv[])
 			JANUS_LOG(LOG_INFO, "Audio level extension ID: %d\n", audio_level_extmap_id);
 		if(video_orient_extmap_id > 0)
 			JANUS_LOG(LOG_INFO, "Video orientation extension ID: %d\n", video_orient_extmap_id);
+		if(silence_distance > 0)
+			JANUS_LOG(LOG_INFO, "RTP silence suppression distance: %d\n", silence_distance);
 		JANUS_LOG(LOG_INFO, "\n");
 		if(source != NULL)
 			JANUS_LOG(LOG_INFO, "Source file: %s\n", source);
@@ -616,12 +628,15 @@ int main(int argc, char *argv[])
 	}
 	/* Now let's parse the frames and order them */
 	uint32_t pkt_ts = 0, highest_rtp_ts = 0;
+	uint16_t highest_seq = 0;
 	/* Start from 1 to take into account late packets */
 	int times_resetted = 1;
 	uint64_t max32 = UINT32_MAX;
 	int ignored = 0;
 	offset = 0;
 	gboolean started = FALSE;
+	/* DTX stuff */
+	gboolean dtx_on = FALSE;
 	/* Extensions, if any */
 	int audiolevel = 0, rotation = 0, last_rotation = -1, rotated = -1;
 	uint16_t rtp_header_len, rtp_read_n;
@@ -801,8 +816,37 @@ int main(int argc, char *argv[])
 			/* Simple enough... */
 			started = TRUE;
 			highest_rtp_ts = rtp_ts;
+			highest_seq = p->seq;
 			p->ts = (times_resetted*max32)+rtp_ts;
 		} else {
+			if(!video && !data) {
+				if(dtx_on) {
+					/* Leaving DTX mode (RTP started flowing again) */
+					dtx_on = FALSE;
+					JANUS_LOG(LOG_WARN, "Leaving RTP silence suppression (seq=%"SCNu16", rtp_ts=%"SCNu32")\n", ntohs(rtp->seq_number), rtp_ts);
+				} else if(rtp->markerbit == 1) {
+					/* Try to detect RTP silence suppression */
+					int32_t seq_distance = abs((int16_t)(p->seq - highest_seq));
+					if(seq_distance < silence_distance) {
+						/* Consider 20 ms audio packets */
+						int32_t inter_rtp_ts = opus ? 960 : 160;
+						int32_t expected_rtp_distance = inter_rtp_ts * seq_distance;
+						int32_t rtp_distance = abs((int32_t)(rtp_ts - highest_rtp_ts));
+						if(rtp_distance > 10 * expected_rtp_distance) {
+							/* Entering DTX mode (RTP will stop) */
+							dtx_on = TRUE;
+							/* This is a close packet with not coherent RTP ts -> silence suppression */
+							JANUS_LOG(LOG_WARN, "Dropping audio RTP silence suppression (seq_distance=%d, rtp_distance=%d)\n", seq_distance, rtp_distance);
+							/* Skip data */
+							offset += len;
+							count++;
+							g_free(p);
+							continue;
+						}
+					}
+				}
+			}
+
 			/* Is the new timestamp smaller than the next one, and if so, is it a timestamp reset or simply out of order? */
 			gboolean pre_reset_pkt = FALSE;
 
@@ -814,6 +858,7 @@ int main(int argc, char *argv[])
 					times_resetted++;
 				}
 				highest_rtp_ts = rtp_ts;
+				highest_seq = p->seq;
 			}
 
 			/* Out-of-order packet */
diff --git a/postprocessing/janus-pp-rec.ggo b/postprocessing/janus-pp-rec.ggo
index 4a3dcd946e..22a81dd58e 100644
--- a/postprocessing/janus-pp-rec.ggo
+++ b/postprocessing/janus-pp-rec.ggo
@@ -14,3 +14,4 @@ option "disable-colors" o "Disable color in the logging" flag off
 option "format" f "Specifies the output format (overrides the format from the destination)" string values="opus", "wav", "webm", "mp4", "srt" optional
 option "faststart" t "For mp4 files write the MOOV atom at the head of the file" flag off
 option "audioskew" S "Time threshold to trigger an audio skew compensation, disabled if 0 (default=0)" int typestr="milliseconds" optional
+option "silence-distance" C "RTP packets distance used to detect RTP silence suppression, disabled if 0 (default=100)" int typestr="count" optional
diff --git a/postprocessing/pp-g711.c b/postprocessing/pp-g711.c
index 781198ed18..d6d78aee99 100644
--- a/postprocessing/pp-g711.c
+++ b/postprocessing/pp-g711.c
@@ -162,13 +162,12 @@ int janus_pp_g711_process(FILE *file, janus_pp_frame_packet *list, int *working)
 	memset(samples, 0, sizeof(samples));
 	size_t num_samples = 160;
 	while(*working && tmp != NULL) {
-		if(tmp->prev != NULL && (tmp->seq - tmp->prev->seq > 1)) {
+		if(tmp->prev != NULL && ((tmp->ts - tmp->prev->ts)/8/20 > 1)) {
 			JANUS_LOG(LOG_WARN, "Lost a packet here? (got seq %"SCNu16" after %"SCNu16", time ~%"SCNu64"s)\n",
-				tmp->seq, tmp->prev->seq, (tmp->ts-list->ts)/48000);
-			/* FIXME Write the silence packet N times to fill in the gaps */
+				tmp->seq, tmp->prev->seq, (tmp->ts-list->ts)/8000);
+			int silence_count = (tmp->ts - tmp->prev->ts)/8/20 - 1;
 			int i=0;
-			for(i=0; i<(tmp->seq-tmp->prev->seq-1); i++) {
-				/* FIXME We should actually also look at the timestamp differences */
+			for(i=0; i<silence_count; i++) {
 				JANUS_LOG(LOG_WARN, "[FILL] Writing silence (seq=%d, index=%d)\n",
 					tmp->prev->seq+i+1, i+1);
 				/* Add silence */
@@ -183,7 +182,7 @@ int janus_pp_g711_process(FILE *file, janus_pp_frame_packet *list, int *working)
 		}
 		if(tmp->drop) {
 			/* We marked this packet as one to drop, before */
-			JANUS_LOG(LOG_WARN, "Dropping previously marked audio packet (time ~%"SCNu64"s)\n", (tmp->ts-list->ts)/48000);
+			JANUS_LOG(LOG_WARN, "Dropping previously marked audio packet (time ~%"SCNu64"s)\n", (tmp->ts-list->ts)/8000);
 			tmp = tmp->next;
 			continue;
 		}
diff --git a/postprocessing/pp-g722.c b/postprocessing/pp-g722.c
index ac6a7309eb..b03670d697 100644
--- a/postprocessing/pp-g722.c
+++ b/postprocessing/pp-g722.c
@@ -138,18 +138,17 @@ int janus_pp_g722_process(FILE *file, janus_pp_frame_packet *list, int *working)
 	uint8_t *buffer = g_malloc0(1500);
 	int16_t samples[1500];
 	memset(samples, 0, sizeof(samples));
+	uint num_samples = 320;
 	while(*working && tmp != NULL) {
-		if(tmp->prev != NULL && (tmp->seq - tmp->prev->seq > 1)) {
+		if(tmp->prev != NULL && ((tmp->ts - tmp->prev->ts)/8/20 > 1)) {
 			JANUS_LOG(LOG_WARN, "Lost a packet here? (got seq %"SCNu16" after %"SCNu16", time ~%"SCNu64"s)\n",
-				tmp->seq, tmp->prev->seq, (tmp->ts-list->ts)/48000);
-			/* FIXME Write the silence packet N times to fill in the gaps */
+				tmp->seq, tmp->prev->seq, (tmp->ts-list->ts)/8000);
+			int silence_count = (tmp->ts - tmp->prev->ts)/8/20 - 1;
 			int i=0;
-			for(i=0; i<(tmp->seq-tmp->prev->seq-1); i++) {
-				/* FIXME We should actually also look at the timestamp differences */
+			for(i=0; i<silence_count; i++) {
 				JANUS_LOG(LOG_WARN, "[FILL] Writing silence (seq=%d, index=%d)\n",
 					tmp->prev->seq+i+1, i+1);
 				/* Add silence */
-				uint num_samples = 320;
 				memset(samples, 0, num_samples*2);
 				if(wav_file != NULL) {
 					if(fwrite(samples, sizeof(uint16_t), num_samples, wav_file) != num_samples) {
@@ -161,7 +160,7 @@ int janus_pp_g722_process(FILE *file, janus_pp_frame_packet *list, int *working)
 		}
 		if(tmp->drop) {
 			/* We marked this packet as one to drop, before */
-			JANUS_LOG(LOG_WARN, "Dropping previously marked audio packet (time ~%"SCNu64"s)\n", (tmp->ts-list->ts)/48000);
+			JANUS_LOG(LOG_WARN, "Dropping previously marked audio packet (time ~%"SCNu64"s)\n", (tmp->ts-list->ts)/8000);
 			tmp = tmp->next;
 			continue;
 		}
diff --git a/postprocessing/pp-opus.c b/postprocessing/pp-opus.c
index 459d51b0f1..5dede3ef42 100644
--- a/postprocessing/pp-opus.c
+++ b/postprocessing/pp-opus.c
@@ -76,7 +76,6 @@ int janus_pp_opus_process(FILE *file, janus_pp_frame_packet *list, int *working)
 		if(tmp->prev != NULL && ((tmp->ts - tmp->prev->ts)/48/20 > 1)) {
 			JANUS_LOG(LOG_WARN, "Lost a packet here? (got seq %"SCNu16" after %"SCNu16", time ~%"SCNu64"s)\n",
 				tmp->seq, tmp->prev->seq, (tmp->ts-list->ts)/48000);
-			/* FIXME Write the silence packet N times to fill in the gaps */
 			ogg_packet *op = op_from_pkt((const unsigned char *)opus_silence, sizeof(opus_silence));
 			/* use ts differ to insert silence packet */
 			int silence_count = (tmp->ts - tmp->prev->ts)/48/20 - 1;