-
Notifications
You must be signed in to change notification settings - Fork 0
/
Alerts_Package_Comparisons.R
456 lines (403 loc) · 21.3 KB
/
Alerts_Package_Comparisons.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
# jsonlite-ndjson Package Comparison
library(data.table)
library(dplyr)
library(janitor)
library(tidyr)
library(purrr)
library(tidyverse)
library(stringr)
library(waldo)
# For the purposes of this work, assume we have pasted all
# JSON rows into a single larger JSON file
# Stream in the entire dataset as a large nested list
alertsData <- jsonlite::stream_in(file(file.choose(), open = "r"))
# We know that ndjson::stream_in() will automatically unnest everything
# We want to know if it does so correctly
aD <- ndjson::stream_in(file.choose(), cls = "dt")
# jsonlite Cleanup --------------------------------------------------------
# "correlationIds" Feature ------------------------------------------------
correlationIds <- alertsData$`_source`$correlationIds
correlationIds <- t(sapply(correlationIds, '[', seq(max(sapply(correlationIds, length)))))
correlationIds <- as.data.table(correlationIds)
# The first column in subAD is the empty header column from the original nesting
# We drop it. The tables now appear to be transpositions of each other in size
# Pull out the names from the ndjson version, which are not in order
# Clean up the names and sort the columns in order
subAD <- aD %>% dplyr::select(grep("correlationIds", names(aD)), )
subAD <- subAD[, -1]
subAD <- clean_names(subAD)
nom <- names(subAD)
nom <- sub("source_correlation_", "", nom)
nom <- sub("ids_", "", nom)
nom <- ifelse(nchar(nom) == 1, paste0("000", nom),
ifelse(nchar(nom) == 2, paste0("00", nom),
ifelse(nchar(nom) == 3, paste0("0", nom), nom)))
nom <- paste0("correlation_id_", nom)
names(subAD) <- nom
subAD <- subAD %>% select(sort(names(.)))
# Name the columns of the jsonlite version to match
names(correlationIds) <- names(subAD)
# Test a few columns for matching. The first 10 entries of row 6 match by visual inspection
# Now we select a random column to compare
# Choosing a random column index, we get 5744
# Choosing 10 random indices, we get
# c(1626, 4454, 7593, 6380, 2227, 1313, 2122, 8197, 6892, 2609)
identical(subAD[, 6], correlationIds[, 6])
set.seed(3859)
rndInd <- sample(1:8735, 1)
identical(subAD[, 5744], correlationIds[, 5744])
rndInds <- sample(1:8735, 10)
identical(subAD[, c(1626, 4454, 7593, 6380, 2227, 1313, 2122, 8197, 6892, 2609)],
correlationIds[, c(1626, 4454, 7593, 6380, 2227, 1313, 2122, 8197, 6892, 2609)])
identical(subAD[, 1], correlationIds[, 1])
identical(subAD[, 8735], correlationIds[, 8735])
# We have matches on all! Success!
rm(correlationIds, subAD, nom, rndInd, rndInds)
# Convert jsonlite version to wider data.table ----------------------------
# This immediately unnests the "_source" column-list into 76 columns
str(alertsData, max.level = 1)
alertsData <- as.data.table(alertsData)
# "structuredMessage" Feature ---------------------------------------------
# This is empty
structuredMessage <- alertsData[, "X_source.data.rank_alert.structuredMessage"]
structMessCheck <- unnest(structuredMessage,
cols = c(X_source.data.rank_alert.structuredMessage),
keep_empty = T)
table(structMessCheck$X_source.data.rank_alert.structuredMessage)
structMessCheck <- unlist(structMessCheck$X_source.data.rank_alert.structuredMessage)
View(aD$`_source.data.rank_alert.structuredMessage`)
table(aD$`_source.data.rank_alert.structuredMessage`)
rm(structMessCheck, structuredMessage, structMessage)
# "justification" Feature -------------------------------------------------
# This is empty
justifData <- as.data.table(unlist(srcData$data.rank_alert.justification))
justification <- alertsData[, "X_source.data.rank_alert.justification"]
justifCheck <- justification[[1]]
justifCheck <- unlist(justifCheck)
str(justifCheck)
View(aD$`_source.data.rank_alert.justification`)
table(aD$`_source.data.rank_alert.justification`)
rm(justifCheck, justification, justifData)
# "source.Reputation" Feature ---------------------------------------------
# Produces two columns with key-value pairs in 8,115 rows
sourceReputation <- alertsData[, "X_source.source.reputation"]
srcRepCheck <- as_tibble(sourceReputation)
srcRepCheck$ID <- alertsData$X_id
srcRepCheck <- srcRepCheck %>% unnest_wider(col = "X_source.source.reputation")
srcRepCheck$reputation <- unlist(srcRepCheck$reputation)
alertsData$source <- srcRepCheck$source[srcRepCheck$ID == alertsData$X_id]
alertsData$reputation <- srcRepCheck$reputation[srcRepCheck$ID == alertsData$X_id]
View(aD$`_source.source.reputation.0.reputation.0`)
View(aD$`_source.source.reputation.0.source`)
table(aD$`_source.source.reputation.0.source`)
rm(srcRepCheck, sourceReputation)
# "destination.Reputation" Feature ---------------------------------------------
# There are only two observations of alien-vault/Malicious Host, so
# we'll ignore this feature, as it does not provide enough to do anything with it
destinationReputation <- alertsData[, "X_source.destination.reputation"]
dRep <- unlist(destinationReputation)
destRepCheck <- as_tibble(destinationReputation)
destRepCheck$ID <- alertsData$X_id
destRepCheck <- destRepCheck %>% unnest_wider(col = "X_source.destination.reputation")
rm(destinationReputation, dRep, destRepCheck)
# "feedbackHistoryClosed" Feature -----------------------------------------
# Produces 12 columns, all character vectors, with 4/37082 observations
# These are 4 false positives from the IDS.
# With such a limited number there are not enough to do anything
feedbackHistoryClosed <- alertsData[, "X_source.feedbackData.history.closed"]
fHistClosedCheck <- as_tibble(feedbackHistoryClosed)
fHistClosedCheck$ID <- alertsData$X_id
fHistClosedCheck <- fHistClosedCheck %>% unnest_wider(col = "X_source.feedbackData.history.closed")
table(is.na(fHistClosedCheck$name))
View(fHistClosedCheck[c(29745, 30383, 31047, 31794),])
rm(feedbackHistoryClosed, fHistClosedCheck)
# "feedbackHistoryComment" Feature ----------------------------------------
# Produces 12 columns with 4 observations in list structure
# The observations are not relevant to any kind of analysis we'll be doing
# since they are simply test comments added to the data by security team
# As such, we will not retain these in the dataset
feedbackHistoryComment <- alertsData[, "X_source.feedbackData.history.comment"]
fHistCommentCheck <- as_tibble(feedbackHistoryComment)
fHistCommentCheck$ID <- alertsData$X_id
fHistCommentCheck <- fHistCommentCheck %>% unnest_wider(col = "X_source.feedbackData.history.comment")
fHistCommentCheckExpand <- apply(fHistCommentCheck[, 1:12], 2, unlist)
View(!is.na(fHistCommentCheckExpand))
View(fHistCommentCheckExpand[c(30263, 35883, 36639, 36640),])
rm(feedbackHistoryComment, fHistCommentCheck, fHistCommentCheckExpand)
# "feedbackHistoryDefault" Feature ----------------------------------------
# Produces 12 columns, all character vectors, with 4/37082 entries
# The resulting three columns are 132 "suppressed" messages and timestamps from a security analyst
# These are not helpful to this analysis and are eliminated
feedbackHistoryDefault <- alertsData[, "X_source.feedbackData.history.default"]
fHistDefaultCheck <- as_tibble(feedbackHistoryDefault)
fHistDefaultCheck$ID <- alertsData$X_id
fHistDefaultCheck$X_source.feedbackData.history.default <- as.list(fHistDefaultCheck$X_source.feedbackData.history.default)
fHistDefaultCheck <- fHistDefaultCheck %>% unnest_wider(col = "X_source.feedbackData.history.default")
fHistDefaultCheckExpand <- apply(fHistDefaultCheck[, 1:3], 2, unlist)
rm(fHistDefaultCheck, fHistDefaultCheckExpand, feedbackHistoryDefault)
# "feedbackHistoryTag" Feature --------------------------------------------
# Again, the resulting 12 columns are messages about added and removed tags
# These are not helpful here, so they are eliminated
feedbackHistoryTag <- alertsData[, "X_source.feedbackData.history.tag"]
fHistTagCheck <- as_tibble(feedbackHistoryTag)
fHistTagCheck$ID <- alertsData$X_id
fHistTagCheck$X_source.feedbackData.history.tag <- as.list(fHistTagCheck$X_source.feedbackData.history.tag)
fHistTagCheck <- fHistTagCheck %>% unnest_wider(col = "X_source.feedbackData.history.tag")
fHistTagCheckExpand <- apply(fHistTagCheck[, 1:12], 2, unlist)
rm(fHistTagCheck, fHistTagCheckExpand, feedbackHistoryTag)
# "feedbackHistoryOpen" Feature -------------------------------------------
# The columns do not unlist properly using the method from earlier
# However, unlisting the entire original column yields better information
# This is a set of notes on whether an alert is or should be re-opened
# with analyst information, timestamps, and so on.
# There is no particularly useful information for our work here,
# so we eliminate these values as well.
feedbackHistoryOpen <- alertsData[, "X_source.feedbackData.history.open"]
fHistOpenCheck <- as_tibble(feedbackHistoryOpen)
fHistOpenCheck$ID <- alertsData$X_id
fHistOpenCheck$X_source.feedbackData.history.open <- as.list(fHistOpenCheck$X_source.feedbackData.history.open)
fHistOpenCheck <- fHistOpenCheck %>% unnest_wider(col = "X_source.feedbackData.history.open")
fHistOpenCheckExpand <- apply(fHistOpenCheck[, 1:12], 2, unlist)
fHistUnlist <- unlist(feedbackHistoryOpen)
rm(fHistOpenCheck, fHistOpenCheckExpand, feedbackHistoryOpen, fHistUnlist)
# "feedbackTags" Feature --------------------------------------------------
# This results in 10 "test tag" values. We eliminate this as well
feedbackTags <- alertsData[, "X_source.feedbackData.tags"]
fTagsCheck <- as_tibble(feedbackTags)
fTagsCheck$ID <- alertsData$X_id
fTagsCheck$X_source.feedbackData.tags <- as.list(fTagsCheck$X_source.feedbackData.tags)
fTagsUnlist <- unlist(fTagsCheck$X_source.feedbackData.tags)
rm(fTagsCheck, fTagsUnlist, feedbackTags)
# "anomalyAssets" Feature -------------------------------------------------
# The resulting `@type` column is constant
# 11688 unique assets
anomalyAssets <- alertsData[, "X_source.data.rank_alert.additionalDetails.AnomalyReason.anomaly_assets"]
anomalyAssets <- anomalyAssets %>% data.table::rbindlist(fill = T, idcol = T)
anomalyAssets[, `@type`:= NULL]
names(anomalyAssets) <- c("id", "type", "value")
n_distinct(anomalyAssets$id)
# Resulting in 11688 rows
anomalyAssets <- anomalyAssets %>% pivot_wider(id_cols = "id", names_from = "type", values_fill = NA)
# Fill in the remaining rows with blanks
anomalyAssets2 <- as.data.table(seq(from = 1, to = 37082, by = 1))
names(anomalyAssets2) <- "id"
# Merge back in
anomalyAssets2 <- merge(x = anomalyAssets2, y = anomalyAssets, by = "id", all.x = TRUE, )
alertsData <- as.data.table(alertsData)
alertsData[, "id" := seq(from = 1, to = 37082, by = 1)]
alertsData <- merge(x = alertsData, y = anomalyAssets, by = "id", all.x = T)
rm(anomalyAssets, anomalyAssets2)
# "interesting" Feature ---------------------------------------------------
# This feature has quite a bit of deeply nested data
# Not all of it may be relevant
interesting <- alertsData[, "X_source.data.rank_alert.interesting"]
interesting <- interesting$X_source.data.rank_alert.interesting
# interesting2 <- as.data.table(sapply(interesting[[1]], '[', (seq(max(sapply(interesting[[1]], length))))))
intCheck <- as.data.table(interesting)
intCheck$ID <- alertsData$X_id
intCheck <- intCheck %>% unnest_wider(col = "X_source.data.rank_alert.interesting")
intCheck <- intCheck %>% unnest_wider(col = "additionalInfo")
# The ID column (not to be confused with the id column) is a single value per cell
# Hence, we'll retain this column and need to do nothing further with it
intID <- as.data.table(t(sapply(intCheck$ID, '[', seq(max(sapply(intCheck$ID, length))))))
rm(intID)
# There is a "type" feature. This contains possible values
# anomaly, domain, file, ip, machine, program signature, user
# 36,894/37082 alerts have at least one interesting feature associated with them (99.49%)
# One of them has 203 interesting features associated with it. This will be a recurring theme
intType <- as.data.table(t(sapply(intCheck$type, '[', seq(max(sapply(intCheck$type, length))))))
names(intType) <- c(paste0("type_", as.character(seq(1, 203, 1))))
intCheck <- bind_cols(intCheck, intType)
intCheck$type <- NULL
rm(intType)
# We can find matching "value" entries for each "interesting:type" entry
# Since this is one of the famous deeply nested JSON key-value pairs nested within pairs
# nested within pairs nested within pairs
# For example, an anomaly value could be "periodicity.conn-bytes:192.168.2.9:ip:1567296000:3.9358220718978E-4"
# and may be associated with the individual information found within that string
# as well as a large amount of other information
intValue <- as.data.table(t(sapply(intCheck$value, '[', seq(max(sapply(intCheck$value, length))))))
# We can find matching values in the ndjson data columns by visually scanning through the jsonlite columns
View(interesting[32161])
View((interesting[32161])[[1]][[1]])
View(intCheck[32161, ])
View(aD[32161, ])
intCheck <- bind_cols(intCheck, intValue)
intCheck$value <- NULL
rm(intValue)
valCols <- c(paste0("value_", as.character(seq(1, 203, 1))))
names(intCheck[215:417]) <- valCols
rm(valCols)
# The coordinates are latitude and longitude values
# There is a maximum of 203 in a single alert
intCoords <- rbindlist(intCheck$coordinates, idcol = T)
names(intCoords)[1] <- "id"
idCol <- as.data.table(seq(1, 37082, 1))
names(idCol) <- "id"
intCoords <- left_join(idCol, intCoords, by = "id")
intCoords[, CaseCount := rowid(id)]
intCoords <- data.table::dcast(setDT(intCoords), id ~ CaseCount, sep = "_", value.var = names(intCoords)[2:3])
intCheck <- bind_cols(intCheck, intCoords[, 2:407])
rm(intCoords, idCol)
intCheck$coordinates <- NULL
# We should find that the asn, country, city, and region are similar
# The country feature is very similar
intCountry <- sapply(intCheck$country, '[', seq(max(sapply(intCheck$country, length))))
intCountryInd <- lapply(intCheck$country, is.null)
intCountry2 <- intCountry[intCountryInd %in% FALSE]
intCountry2 <- as.data.table(intCountry2)
intCountry2 <- as.data.table(t(intCountry2))
intCountryRowNames <- unlist(intCountryInd)
intCountryRowNames <- as.data.table(intCountryRowNames)
intCountryRowNames$intCountryRowNames <- as.numeric(intCountryRowNames$intCountryRowNames)
intCountryRowNames[, "rowID" := seq(1, 37082, 1)]
intCountryRowNames <- intCountryRowNames[intCountryRowNames %in% 0]
intCountry2$id <- intCountryRowNames$rowID
rm(intCountryRowNames, intCountryInd)
names(intCountry2)[1:203] <- c(paste0("country_", as.character(seq(1, 203, 1))))
idCol <- as.data.table(seq(1, 37082, 1))
names(idCol) <- "id"
intCountry2 <- left_join(idCol, intCountry2, by = "id")
intCheck <- bind_cols(intCheck, intCountry2[, 1:203])
intCheck$country <- NULL
rm(idCol, intCountry, intCountry2)
# The region feature matches with the country feature
intRegion <- as.data.table(intCheck$region)
intRegion <- as.data.table(t(intRegion))
intRegionRows <- lapply(intCheck$region, is.null)
intRegionRows <- as.data.table(unlist(intRegionRows))
intRegionRows$V1 <- as.numeric(intRegionRows$V1)
intRegionRows[, "id" := seq(1, 37082, 1)]
intRegionRows <- intRegionRows[V1 %in% 0]
intRegion$id <- intRegionRows$id
names(intRegion)[1:203] <- c(paste0("region_", as.character(seq(1, 203, 1))))
idCol <- as.data.table(seq(1, 37082, 1))
names(idCol) <- "id"
intRegion <- left_join(idCol, intRegion, by = "id")
intCheck <- bind_cols(intCheck, intRegion[, 1:203])
intCheck$region <- NULL
rm(idCol, intRegion, intRegionRows)
# The city feature matches with the country feature
intCity <- as.data.table(intCheck$city)
intCity <- as.data.table(t(intCity))
intCityRows <- lapply(intCheck$city, is.null)
intCityRows <- as.data.table(unlist(intCityRows))
intCityRows$V1 <- as.numeric(intCityRows$V1)
intCityRows[, "id" := seq(1, 37082, 1)]
intCityRows <- intCityRows[V1 %in% 0]
intCity$id <- intCityRows$id
names(intCity)[1:203] <- c(paste0("city_", as.character(seq(1, 203, 1))))
idCol <- as.data.table(seq(1, 37082, 1))
names(idCol) <- "id"
intCity <- left_join(idCol, intCity, by = "id")
intCheck <- bind_cols(intCheck, intCity[, 1:203])
intCheck$city <- NULL
rm(idCol, intCity, intCityRows)
# The owner feature matches the country feature
intOwner <- as.data.table(intCheck$owner)
intOwner <- as.data.table(t(intOwner))
intOwnerRows <- lapply(intCheck$owner, is.null)
intOwnerRows <- as.data.table(unlist(intOwnerRows))
intOwnerRows$V1 <- as.numeric(intOwnerRows$V1)
intOwnerRows[, "id" := seq(1, 37082, 1)]
intOwnerRows <- intOwnerRows[V1 %in% 0]
intOwner$id <- intOwnerRows$id
names(intOwner)[1:203] <- c(paste0("owner_", as.character(seq(1, 203, 1))))
idCol <- as.data.table(seq(1, 37082, 1))
names(idCol) <- "id"
intOwner <- left_join(idCol, intOwner, by = "id")
intCheck <- bind_cols(intCheck, intOwner[, 1:203])
intCheck$owner <- NULL
rm(idCol, intOwner, intOwnerRows)
# The asn feature matches the country feature
intAsn <- as.data.table(intCheck$asn)
intAsn <- as.data.table(t(intAsn))
intAsnRows <- lapply(intCheck$asn, is.null)
intAsnRows <- as.data.table(unlist(intAsnRows))
intAsnRows$V1 <- as.numeric(intAsnRows$V1)
intAsnRows[, "id" := seq(1, 37082, 1)]
intAsnRows <- intAsnRows[V1 %in% 0]
intAsn$id <- intAsnRows$id
names(intAsn)[1:203] <- c(paste0("asn_", as.character(seq(1, 203, 1))))
idCol <- as.data.table(seq(1, 37082, 1))
names(idCol) <- "id"
intAsn <- left_join(idCol, intAsn, by = "id")
intCheck <- bind_cols(intCheck, intAsn[, 1:203])
intCheck$asn <- NULL
rm(idCol, intAsn, intAsnRows)
# The "interesting:: Internal" feature
intInternal <- as.data.table(intCheck$internal)
intInternal <- as.data.table(t(intInternal))
intInternalRows <- lapply(intCheck$internal, is.null)
intInternalRows <- as.data.table(unlist(intInternalRows))
intInternalRows$V1 <- as.numeric(intInternalRows$V1)
intInternalRows[, "id" := seq(1, 37082, 1)]
intInternalRows <- intInternalRows[V1 %in% 0]
intInternal$id <- intInternalRows$id
names(intInternal)[1:203] <- c(paste0("internal_", as.character(seq(1, 203, 1))))
idCol <- as.data.table(seq(1, 37082, 1))
names(idCol) <- "id"
intInternal <- left_join(idCol, intInternal, by = "id")
intCheck <- bind_cols(intCheck, intInternal[, 1:203])
intCheck$internal <- NULL
rm(idCol, intInternal, intInternalRows)
compare(alertsData$X_source.data.rank_alert.interesting[[1]]$relatedInterestingObjects, list(aD[, 21:34]))
# Reputation
intReputation <- intCheck$reputation
elemsRows <- sapply(intReputation, lengths)
intReputation <- as.data.table(intReputation)
intReputation <- as.data.table(t(intReputation))
rownames(intReputation) <- elemsRows[which(elemsRows != 0)]
intRep <- rbindlist(intReputation)
# intRep <- rlist::list.clean(intReputation[, 1] ,recursive = T)
# tempCol1 <- rlist::list.clean(intReputation[, 1] ,recursive = T)
# elems captures the nonempty list elements within the list
elems <- as.data.table(elems)
elems[, "id" := seq(1, 37082, 1)]
bigTemp <- intReputation$V1
bigTemp <- compact(bigTemp)
bigTemp <- rbindlist(bigTemp) # Same
tempCol2 <- as.data.table(rbindlist(intReputation$V1)) # Same
elems2 <- sapply(intReputation$V1, lengths)
empties <- lapply(intReputation$V1, sjmisc::is_empty, first.only = FALSE)
intRepCol <- as.data.table(intReputation[, 1])
intRepRows <- lapply(intReputation[, 1], sjmisc::is_empty, first.only = FALSE)
tempCol2 <- compact(intReputation[, 1])
intReputation <- intCheck$reputation
elemsRows <- sapply(intReputation, lengths)
intReputation <- as.data.table(intReputation)
intReputation <- as.data.table(t(intReputation))
# intReputation <- map_dfr(intReputation, rbindlist)
intRep <- map_dfr(intReputation[, 1], as.data.table)
intRep <- as.data.table(t(intRep))
intRep[, "id" := seq(1, nrow(intRep), 1)]
for(i in 1:5) {
tempIntRep <- map_dfc(intReputation[, c()], as.data.table)
tempIntRep <- as.data.table(t(tempIntRep))
tempIntRep[, "id" := seq(1, nrow(tempIntRep), 1)]
intRep <- intRep[tempIntRep, on = .(id = id)]
}
tempIntRep <- map_dfc(intReputation[, c(1)], as.data.table)
tempIntRep <- as.data.table(t(tempIntRep))
tempIntRep[, "id" := seq(1, nrow(tempIntRep), 1)]
intRep <- intRep[tempIntRep, on = .(id = id)]
tempIntRep <- map_dfc(intReputation[, c(2)], as.data.table)
tempIntRep <- as.data.table(t(tempIntRep))
tempIntRep[, "id" := seq(1, nrow(tempIntRep), 1)]
intRep <- intRep[tempIntRep, on = .(id = id)]
# Nullify the columns checked
alertsData2 <- alertsData[, c("X_source.data.rank_alert.structuredMessage",
"X_source.data.rank_alert.additionalDetails.AnomalyReason.anomaly_assets",
"X_source.data.rank_alert.justification",
"X_source.data.rank_alert.interesting",
"X_source.correlationIds",
"X_source.feedbackData.history.default",
"X_source.feedbackData.history.open",
"X_source.feedbackData.history.tag",
"X_source.feedbackData.history.closed",
"X_source.feedbackData.history.comment",
"X_source.feedbackData.tags",
"X_source.source.reputation",
"X_source.destination.reputation") := NULL]
# Remove any column remaining that is a constant, which eliminates 3 columns
alertsData2 <- janitor::remove_constant(alertsData2)