Skip to content

Commit

Permalink
Fix cloudfare blocking requests
Browse files Browse the repository at this point in the history
  • Loading branch information
samtay committed Aug 21, 2024
1 parent f5a398d commit aae87f3
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 13 deletions.
17 changes: 11 additions & 6 deletions src/stackexchange/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ use crate::error::Result;
use crate::tui::markdown;

/// StackExchange API v2.2 URL
// TODO why not https?
const SE_API_URL: &str = "http://api.stackexchange.com";
const SE_API_URL: &str = "https://api.stackexchange.com";
const SE_API_VERSION: &str = "2.2";

/// Filter generated to include only the fields needed to populate
Expand Down Expand Up @@ -80,6 +79,10 @@ impl Api {
header::ACCEPT,
header::HeaderValue::from_static("application/json"),
);
headers.insert(
header::USER_AGENT,
header::HeaderValue::from_static(super::USER_AGENT),
);
let client = Client::builder().default_headers(headers).build().unwrap();
Api { client, api_key }
}
Expand All @@ -91,15 +94,17 @@ impl Api {
let endpoint = format!("questions/{ids}", ids = ids.join(";"));
let url = stackexchange_url(&endpoint);
log::debug!("Fetching questions from: {url}");
let qs = self
let qs_rsp = self
.client
.get(url)
.query(&self.get_default_se_opts())
.query(&[("site", site), ("pagesize", &total)])
.send()
.await?
.json::<ResponseWrapper<Question<String>>>()
.await?
.await?;
let status_code = qs_rsp.status();
let body = qs_rsp.text().await?;
log::debug!("Stack exchange returned status {status_code} and body {body}");
let qs = serde_json::from_str::<ResponseWrapper<Question<String>>>(&body)?
.items
.into_iter()
.filter(|q| !q.answers.is_empty())
Expand Down
4 changes: 4 additions & 0 deletions src/stackexchange/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,7 @@ pub mod scraper;
pub use api::{Answer, Id, Question};
pub use local_storage::{LocalStorage, SiteMap};
pub use search::Search;

/// Mock user agent
const USER_AGENT: &str =
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0";
9 changes: 2 additions & 7 deletions src/stackexchange/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,6 @@ use super::scraper::{DuckDuckGo, Google, ScrapedData, Scraper};
/// Limit on concurrent requests (gets passed to `buffer_unordered`)
const CONCURRENT_REQUESTS_LIMIT: usize = 8;

/// Mock user agent to get real DuckDuckGo results
// TODO copy other user agents and use random one each time
const USER_AGENT: &str =
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0";

/// This structure provides methods to search queries and get StackExchange
/// questions/answers in return.
// TODO this really needs a better name...
Expand Down Expand Up @@ -105,13 +100,13 @@ impl Search {
let url = scraper.get_url(&self.query, self.site_map.values());
let html = Client::new()
.get(url)
.header(header::USER_AGENT, USER_AGENT)
.header(header::USER_AGENT, super::USER_AGENT)
.send()
.await?
.text()
.await?;
let data = scraper.parse(&html, self.site_map.as_ref(), self.config.limit)?;
log::trace!("Scraped question IDs: {:#?}", &data.question_ids);
log::debug!("Scraped question IDs: {:#?}", &data.question_ids);
self.parallel_questions(data).await
}

Expand Down

0 comments on commit aae87f3

Please sign in to comment.