ARTICLE AD BOX
package org.degoogle.Networking;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.time.Duration;
import java.time.temporal.ChronoUnit;
import java.util.Optional;
public class HTTPComms {
private static final HttpClient HTTP_CLIENT = HttpClient.newBuilder().connectTimeout(Duration.of(10, ChronoUnit.SECONDS)).build();
// if 404 the url is not found else return the body
public Optional<String> urlBodyGetter(String url) {
try{
HttpRequest request = HttpRequest.newBuilder().uri(URI.create(url)).timeout(Duration.of(10, ChronoUnit.SECONDS)).GET().build();
HttpResponse<String> response = HTTP_CLIENT.send(request, HttpResponse.BodyHandlers.ofString());
if(response.statusCode() == 200){
return Optional.of(response.body());
} else{
return Optional.empty();
}
}
catch (Exception e) {
if(e instanceof InterruptedException)
{
Thread.currentThread().interrupt();
//Log for interrupt
}
else if(e instanceof IllegalArgumentException) {
//Log for URI
}
else{
//for IOBlock
}
return Optional.empty();
}
}
}
Output:
<!doctype html> <html lang="en"> <head> <meta charset="UTF-8" /> <link rel="icon" type="image/svg+xml" href="/knight.svg" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" /> <title>Knights Time</title> <script data-goatcounter="https://xyz.goatcounter.com/count" async src="//gc.zgo.at/count.js"> </script> <script type="module" crossorigin src="/knightsTime/assets/index-B0Z7CBXE.js"></script> <link rel="stylesheet" crossorigin href="/knightsTime/assets/index-CZBET73t.css"> </head> <body> <div id="root"></div> </body> </html>the code works for some of the websites and some websites its just return the `<div id="root">` is this a good practice or we should go with other approach if yes please link a resource for me to learn about it. BTW the idea is to build a web crawler
