import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import java.nio.file.Files; import java.nio.file.Paths; import java.io.IOException; import java.util.Base64; import java.util.List; public class WebScraping { public static void main(String[] args) throws Exception { String authString = "<CRAWLERA_APIKEY>:"; String encodedAuthString = Base64.getEncoder().encodeToString(authString.getBytes()); final List<String> urls = Files.readAllLines(Paths.get(".", "urls.txt")); urls.parallelStream().forEach(url -> { try { final Document doc = Jsoup.connect(url) .header("Proxy-Authorization", "Basic " + encodedAuthString) .followRedirects(true) .ignoreHttpErrors(true) .ignoreContentType(true) .timeout(180000) .proxy("proxy.zyte.com", 8010) .get(); final String title = doc.select("title").text(); System.out.println(Thread.currentThread().getName() + ": " + title); } catch (IOException e) { e.printStackTrace(); } }); } }
Was this article helpful?
That’s Great!
Thank you for your feedback
Sorry! We couldn't be helpful
Thank you for your feedback
Feedback sent
We appreciate your effort and will try to fix the article