/
waybackarchive.go
executable file
·52 lines (42 loc) · 1.38 KB
/
waybackarchive.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
package waybackarchive
import (
"context"
"fmt"
"io/ioutil"
"strings"
"github.com/projectdiscovery/subfinder/pkg/subscraping"
)
// Source is the passive scraping agent
type Source struct{}
// Run function returns all subdomains found with the service
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
results := make(chan subscraping.Result)
go func() {
pagesResp, err := session.NormalGetWithContext(ctx, fmt.Sprintf("http://web.archive.org/cdx/search/cdx?url=*.%s/*&output=json&fl=original&collapse=urlkey", domain))
if err != nil {
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
close(results)
return
}
body, err := ioutil.ReadAll(pagesResp.Body)
if err != nil {
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
pagesResp.Body.Close()
close(results)
return
}
pagesResp.Body.Close()
match := session.Extractor.FindAllString(string(body), -1)
for _, subdomain := range match {
subdomain = strings.TrimPrefix(subdomain, "25")
subdomain = strings.TrimPrefix(subdomain, "2F")
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain}
}
close(results)
}()
return results
}
// Name returns the name of the source
func (s *Source) Name() string {
return "waybackarchive"
}