-
Notifications
You must be signed in to change notification settings - Fork 0
/
diff.sh
67 lines (67 loc) · 1.91 KB
/
diff.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/bin/bash
#
# JournalList.net diff shell script.
#
# Name - cron.sh
# Synopsis - diff.sh DIRNAME1 DIRNAME2
# DIRNAME1 - the first webcrawl directory
# DIRNAME1 - the second webcrawl directory
#
# Copyright (c) 2021 Brown Wolf Consulting LLC
# License: Creative Commons Attribution-NonCommercial-ShareAlike license. See: https://creativecommons.org/
#
#-------------------------------------
#
DIRNAME1=$1
DIRNAME2=$2
#
echo ""
echo "Ecosystem Associations"
diff $DIRNAME1/$DIRNAME1-associations.csv $DIRNAME2/$DIRNAME2-associations.csv > temp
echo "Removed:"
grep "< " temp
echo "Added:"
grep "> " temp
echo ""
echo "Ecosystem Publishers"
diff $DIRNAME1/$DIRNAME1-publishers.csv $DIRNAME2/$DIRNAME2-publishers.csv > temp
echo "Removed:"
grep "< " temp
echo "Added:"
grep "> " temp
echo ""
echo "Ecosystem Vendors"
diff $DIRNAME1/$DIRNAME1-vendors.csv $DIRNAME2/$DIRNAME2-vendors.csv > temp
echo "Removed:"
grep "< " temp
echo "Added:"
grep "> " temp
echo ""
echo "JournalList Members"
diff $DIRNAME1/www.journallist.net-trust.txt $DIRNAME2/www.journallist.net-trust.txt > temp
echo "Removed:"
grep "< " temp
echo "Added:"
grep "> " temp
echo ""
echo "trust.txt files"
awk -F "," '{ print $1 }' $DIRNAME1/$DIRNAME1.csv | sort | uniq > temp1
awk -F "," '{ print $1 }' $DIRNAME2/$DIRNAME2.csv | sort | uniq > temp2
diff temp1 temp2 > temp
echo "Removed:"
grep "< " temp
echo "Added:"
grep "> " temp
echo ""
echo "Well-known.dev resources"
sed "s/[^,]*,\([^,]*\).*/\1/" $DIRNAME1/$DIRNAME1-resources.csv | sort > temp1
sed "s/[^,]*,\([^,]*\).*/\1/" $DIRNAME2/$DIRNAME2-resources.csv | sort > temp2
diff temp1 temp2 > temp
echo "Removed:"
grep "< " temp
echo "Added:"
grep "> " temp
rm temp temp1 temp2
echo ""
echo "Domains with trust.txt files found by well-known.dev"
grep -A 1000 "BEGIN: processing well-known.dev resource list" $DIRNAME2/$DIRNAME2-log.txt | grep "Fetching" | sed -e "s/[^ ]* \([^ ]*\).*/\1/" -e "s/https:\/\/www.//" -e "s/\/trust.txt//"