uploading functional project

anark1tt3n · Aug 21, 2018 · a630de9 · a630de9
1 parent 8fab458
commit a630de9
Show file tree

Hide file tree

Showing 3 changed files with 64 additions and 80 deletions.
diff --git a/src/crawler/crawlerS.java b/src/crawler/crawlerS.java
@@ -24,9 +24,8 @@ public class crawlerS {
 	static ArrayList<String> docExt = new ArrayList<String>(); //where we'll be placing the file extension of the documents
 	static ArrayList<String> docs = new ArrayList<String>(); //the <a> links that are actually documents
 	static ArrayList<String> docTitles = new ArrayList<String>(); //used for folder names
-	static ArrayList<String> docGroups = new ArrayList<String>(); //used folder grouping later
-	static String posExt[] = {"pdf" , "msw12", "excel12book" };//possible attachment extensions
-	static String realExt[] = {"pdf" , "docx" , "xlsx"};
+	static String posExt[] = {"pdf" , "msw12" , "excel12book" , "crtext"};//possible attachment extensions
+	static String realExt[] = {"pdf" , "docx" , "xlsx" , "txt"};
 	static boolean nPage = true; //bool to verify if there is a next page
 	static ChromeOptions options = new ChromeOptions();
 	static String[] urlSplit; //where we place the document id
@@ -92,14 +91,7 @@ public static void crawl(String url, WebDriver crawler) throws InterruptedExcept
 		main.print(docs);
 		main.print("Now we start looking for the attachments");
 		compileList(crawler, docs);
-		down.loadAll(attLinks,attTitles,docExt,docGroups,docTitles);
-		/*		int a=0;
-		for(String b : docLinks) { //for each doc in the documents string...
-			int downProg = 100-progress;
-			progress = progress+((downProg)*(a/docLinks.size()));
-			down.load(b, docTitles.get(a), docExt.get(a), docGroups.get(a));
-			a++;
-		}*/
+		down.loadAll(attLinks,attTitles,docExt,docTitles);
 		main.print("Should've downloaded all attachments listed.");
 		crawler.quit();
 		JOptionPane.showMessageDialog(draw.urlF, "We should've downloaded all attachments in the Docket Browser for this document!", "Done", JOptionPane.INFORMATION_MESSAGE);
@@ -138,8 +130,8 @@ public static void getLinks (WebDriver crawler) {
 		nextPage(crawler);
 	}
 
-	public static void percentArch(int a,int b) {
-		progress = (a/b)*80;
+	public static int percentArch(int a,int b) {
+		return (a/b)*80;
 	}
 
 	public static void compileList(WebDriver crawler, ArrayList<String> docs) {
@@ -154,28 +146,24 @@ public static void compileList(WebDriver crawler, ArrayList<String> docs) {
 				for(String ext : posExt) { //looking for files of every extension type
 					List<WebElement> loadAtt = wait.until(ExpectedConditions.visibilityOfAllElementsLocatedBy((By.cssSelector("a[href*='=" + ext + "']")))); //waiting for column to show
 					main.print("We've found files with the extension of: " + realExt[i]);
-					int attPer = 0; //attachments on this page for folder grouping later
 					for(WebElement att : loadAtt) {
 						try {
 							main.print("Found the attachment, adding to link collection");
-							attLinks.add(att.getAttribute("href"));
-							WebElement rTitle = wait.until(ExpectedConditions.presenceOfElementLocated((By.xpath("/html/body/div[3]/div[2]/div[2]/div[2]/div/div/div[1]/div[1]/h1")))); 
-							try {
+							attLinks.add(att.getAttribute("href")); //adding download link
+							WebElement rTitle = wait.until(ExpectedConditions.presenceOfElementLocated((By.xpath("/html/body/div[3]/div[2]/div[2]/div[2]/div/div/div[1]/div[1]/h1"))));
+							docTitles.add(rTitle.getText()); //
+							try { //titles are nested weirdly so we're ripping em
 								WebElement parentElement = att.findElement(By.xpath("./..")); //getting to grandparentto try to
 								WebElement grandElement = parentElement.findElement((By.xpath("./.."))); //find the parent to find the title
 								WebElement title = grandElement.findElement(By.xpath("//h3")); //if its an attachment listed an alt way
 								rTitle = title;
-								attPer++; //haha lets hope this works
 							} catch(NoSuchElementException e) {
 								main.print("Defaulting to document title for file name...");
-								attPer++; //we just don't change the title now lmao
 							}
 							attTitles.add(rTitle.getText()); //adding the text of the above element to our list of document titles
 							docExt.add(realExt[i]); //adding the extension, filtered because this website is weird with extensions
-							percentArch(i,docs.size()); 
-							docGroups.add(Integer.toString(attPer));
-							attPer=0;
-							main.print("We are " + progress + "% percent done with checking these links :)");
+//							progress = percentArch(i,docs.size()); 
+//							main.print("We are " + progress + "% percent done with checking these links :)");
 						} catch(InvalidSelectorException e) {
 							main.print("Couldn't find a document with an extension of \"" + ext + "\" on this page. \n Continuing...");
 						}
@@ -184,7 +172,11 @@ public static void compileList(WebDriver crawler, ArrayList<String> docs) {
 				}
 			} catch (TimeoutException e) {
 				main.print("Couldnt find document with current extension, continuing.");
-				attLinks.add("No Att");
+				attLinks.add(null);
+				docExt.add(null);
+				attTitles.add(null);
+				docTitles.add(null);
+
 			}
 		}
 	}

diff --git a/src/crawler/down.java b/src/crawler/down.java
@@ -15,48 +15,39 @@
 import java.util.zip.ZipFile;
 
 public class down{
+								//attachment links | attachment titles | extension of attachments | the folder they go in
+	public static void loadAll(ArrayList<String> links, ArrayList<String> titles, ArrayList<String> ext, ArrayList<String> folds) { 
+		int i = 0;
+        new File("Attachments").mkdir(); //make the attachments folder
+        for(String url : links) {    
+            if(!(url == null)) {
+            		main.print("Referencing index " + i + " out of " + links.size());
+                    load(url, titles.get(i), ext.get(i), folds.get(i));
+            }
+            i++;
+        }
+    }
 
-	public static void loadAll(ArrayList<String> links, ArrayList<String> titles, ArrayList<String> ext, ArrayList<String> groups, ArrayList<String> folds) {
-		String folder = groups.toString().replaceAll("[\\[\\]\\s]", ""); //what folder are we on?
-		int[] groupNumb = Arrays.stream(folder.split(",")).mapToInt(Integer::parseInt).toArray(); //grouping of attachments as ints
-		int document = 0; //what document in that directory are we on?
-		int groupCount = 0; //what docs-per-folder are we comparing this number to?
-		int count = 0; //what number are we in terms of docs-per-folder?
-		for(String url : links) {
-			if(!url.equals("No Att")) {
-				if(count < groupNumb[document]) { //if there's still more files to fit in this folder
-					try {
-						load(url, titles.get(count), ext.get(count), folds.get(groupNumb[groupCount])); //try to download the file
-						count++;
-					} catch (IOException e) {
-						main.print("failed to DL attachment, continuing...");
-						count++;
-						e.printStackTrace();
-					}
-				}
-			}
-			else {
-				document++;
-			}
-			if(count==groupNumb.length-1) {
-				groupCount++;	
-			}
-		}
-	}
-
 	public static void delete(File file) {
 		file.delete();
 	}
 
-	public static void load(String link, String title, String ext, String fold) throws IOException { //goes through list of pdfs, downloads
+	public static void load(String link, String title, String ext, String fold) { //goes through list of pdfs, downloads
 		main.print("Downloading file titled " + title);
-		String T = title.replaceAll("[\\\\/:*?\"<>|]", "_") + "." + ext; //we're sanitizing the title
-		new File("Attachments" + fold).mkdirs(); //making a folder for it
-		URL website = new URL(link); //makes a URL from the first string passed
-		ReadableByteChannel rbc = Channels.newChannel(website.openStream()); //opens the given url as a stream of bytes
-		FileOutputStream fos = new FileOutputStream(new File("Attachments/" + fold + "/" + T)); //create new fileoutput stream, file name as the stored PDF title"
-		fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE); //writes the above bytes to the file output stream
-		fos.close(); //closes file output stream
+		try {
+			String T = title.replaceAll("[\\\\/:*?\"<>|]", "_"); //we're sanitizing the title
+			new File("Attachments/" + fold).mkdirs();
+			File attachment = File.createTempFile("Attachments/" + fold + "/" + T, ext); 
+			URL website = new URL(link); //makes a URL from the first string passed
+			ReadableByteChannel rbc = Channels.newChannel(website.openStream()); //opens the given url as a stream of bytes
+			FileOutputStream fos = new FileOutputStream(attachment); //create new fileoutput stream, file name as the stored PDF title"
+			main.print(new File("Attachments/" + fold + "/" + T).getAbsolutePath());
+			fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE); //writes the above bytes to the file output stream
+			fos.close(); //closes file output stream
+		} catch (IOException e) {
+			main.print("failed to DL attachment, continuing...");
+            e.printStackTrace();
+		}
 	}
 
 	public static void unzip(String a) throws IOException { //utility for unzipping
@@ -86,6 +77,7 @@ public static void unzip(String a) throws IOException { //utility for unzipping
 				while ((length = is.read(bytes)) >= 0) {
 					fos.write(bytes, 0, length);
 				}
+
 				is.close();
 				fos.close();
 

diff --git a/src/crawler/draw.java b/src/crawler/draw.java
@@ -25,7 +25,7 @@
 public class draw extends JPanel implements ActionListener {
 
 	String userU;
-	JButton go = new JButton("Get PDFs"); 
+	JButton go = new JButton("Start"); 
 	static JTextField urlF = new JTextField(20);
 	static JProgressBar progressBar = new JProgressBar(0, 100);
 
@@ -57,22 +57,22 @@ public void actionPerformed(ActionEvent e) {
 		userU = urlF.getText();
 		main.print(userU);
 		try {
-			new Thread(new Runnable(){
-				public void run(){
-					int x = 0;
-					while(x<=100) {
-						x = crawlerS.progress;
-						progressBar.setValue(x);        // Setting incremental values
-						if (x == 100 ){
-							progressBar.setString("Done with the download!");   // End message
-							try{
-								Thread.sleep(200);
-							}catch(Exception ex){
-							}
-						}
-					}
-				}
-			}).start();
+//			new Thread(new Runnable(){
+//				public void run(){
+//					int x = 0;
+//					while(x<=100) {
+//						x = crawlerS.progress;
+//						progressBar.setValue(x);        // Setting incremental values
+//						if (x == 100 ){
+//							progressBar.setString("Done with the download!");   // End message
+//							try{
+//								Thread.sleep(200);
+//							}catch(Exception ex){
+//							}
+//						}
+//					}
+//				}
+//			}).start();
 			crawlerS.initC(userU); //passes the url to the crawlerS class so it can start searching
 		} catch (InterruptedException malf) {
 			main.print("Malformed URL");
@@ -102,12 +102,12 @@ public void focusLost(FocusEvent e) {
 		});
 		urlF.setText("Place document URL here");
 
-		progressBar.setValue(0);
-		progressBar.setStringPainted(true);
+//		progressBar.setValue(0);
+//		progressBar.setStringPainted(true);
 
 		add(urlF, BorderLayout.CENTER);
 		add(go, BorderLayout.SOUTH);
-		add(progressBar,BorderLayout.NORTH);
+//		add(progressBar,BorderLayout.NORTH);
 
 		JFrame test = new JFrame();