From 5b35b675cbeeed3f9688412f9f805213850cd42d Mon Sep 17 00:00:00 2001 From: Vaxry Date: Sat, 12 Apr 2025 21:03:14 +0100 Subject: [PATCH] core: Implement basic git integration Missing streaming, so git's a bit confused, but works. Also easy to bypass. Although I doubt any mf at alibaba gon bypass it with his custom UA. --- README.md | 1 + src/config/Config.hpp | 1 + src/core/Handler.cpp | 166 +++++++++++++++--------------- src/headers/gitProtocolHeader.hpp | 24 +++++ src/main.cpp | 2 + subprojects/pistache | 2 +- 6 files changed, 114 insertions(+), 82 deletions(-) create mode 100644 src/headers/gitProtocolHeader.hpp diff --git a/README.md b/README.md index c99a25e..6329641 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ AI scrapers are everywhere. This will stop them. `robots.txt` won't. - Dynamic challenge amount (aka difficulty) - Detection of token overuse - Better wait screen +- Better git integration (it's quite rudimentary right now) ## Caveats If you are using this, it's almost certain search engines will stop indexing your site. Keep this in mind. diff --git a/src/config/Config.hpp b/src/config/Config.hpp index d6eb8ef..9a8b4b2 100644 --- a/src/config/Config.hpp +++ b/src/config/Config.hpp @@ -13,6 +13,7 @@ class CConfig { std::string data_dir = ""; std::string html_dir = ""; unsigned long int max_request_size = 10000000; // 10MB + bool git_host = false; } m_config; }; diff --git a/src/core/Handler.cpp b/src/core/Handler.cpp index b9408f9..039227c 100644 --- a/src/core/Handler.cpp +++ b/src/core/Handler.cpp @@ -2,6 +2,7 @@ #include "../headers/authorization.hpp" #include "../headers/cfHeader.hpp" #include "../headers/xforwardfor.hpp" +#include "../headers/gitProtocolHeader.hpp" #include "../debug/log.hpp" #include "../GlobalState.hpp" #include "../config/Config.hpp" @@ -12,7 +13,9 @@ #include #include +#define private public #include +#undef private #include #include #include @@ -89,9 +92,11 @@ void CServerHandler::onRequest(const Pistache::Http::Request& req, Pistache::Htt const auto HEADERS = req.headers(); std::shared_ptr hostHeader; std::shared_ptr contentTypeHeader; + std::shared_ptr userAgentHeader; std::shared_ptr cfHeader; std::shared_ptr xForwardedForHeader; std::shared_ptr authHeader; + std::shared_ptr gitProtocolHeader; try { hostHeader = Pistache::Http::Header::header_cast(HEADERS.get("Host")); @@ -125,6 +130,18 @@ void CServerHandler::onRequest(const Pistache::Http::Request& req, Pistache::Htt ; // silent ignore } + try { + userAgentHeader = Pistache::Http::Header::header_cast(HEADERS.get("User-Agent")); + } catch (std::exception& e) { + ; // silent ignore + } + + try { + gitProtocolHeader = Pistache::Http::Header::header_cast(HEADERS.get("Git-Protocol")); + } catch (std::exception& e) { + ; // silent ignore + } + Debug::log(LOG, "Got request for: {}:{}{}", hostHeader->host(), hostHeader->port().toString(), req.resource()); Debug::log(LOG, "Request author: IP {}", req.address().host()); if (cfHeader) @@ -132,6 +149,9 @@ void CServerHandler::onRequest(const Pistache::Http::Request& req, Pistache::Htt else Debug::log(WARN, "Connection does not come through CloudFlare"); + if (userAgentHeader) + Debug::log(LOG, "UA: {}", userAgentHeader->agent()); + if (req.resource() == "/checkpoint/challenge") { if (req.method() == Pistache::Http::Method::Post) challengeSubmitted(req, response); @@ -140,6 +160,21 @@ void CServerHandler::onRequest(const Pistache::Http::Request& req, Pistache::Htt return; } + if (g_pConfig->m_config.git_host) { + // TODO: ratelimit and check this. This can be faked! + if (gitProtocolHeader && userAgentHeader) { + Debug::log(LOG, "Request looks like it is coming from git (UA + GP). Accepting."); + + proxyPass(req, response); + return; + } else if (userAgentHeader->agent().starts_with("git/")) { + Debug::log(LOG, "Request looks like it is coming from git (UA git). Accepting."); + + proxyPass(req, response); + return; + } + } + if (req.cookies().has("CheckpointToken")) { // check the token const auto TOKEN = g_pDB->getToken(req.cookies().get("CheckpointToken").value); @@ -243,89 +278,58 @@ void CServerHandler::proxyPass(const Pistache::Http::Request& req, Pistache::Htt client.init(Pistache::Http::Experimental::Client::options().threads(1).maxConnectionsPerHost(8)); const std::string FORWARD_ADDR = g_pConfig->m_config.forward_address; - switch (req.method()) { - // there are some crazy semantics going on here, idk how to make this cleaner with less c+p + Debug::log(LOG, "Method ({}): Forwarding to {}", (uint32_t)req.method(), FORWARD_ADDR + req.resource()); - case Pistache::Http::Method::Get: { - Debug::log(LOG, "Get: Forwarding to {}", FORWARD_ADDR + req.resource()); - auto builder = client.get(FORWARD_ADDR + req.resource()).body(req.body()); - for (auto it = req.cookies().begin(); it != req.cookies().end(); ++it) { - builder.cookie(*it); - } - builder.timeout(std::chrono::milliseconds(10000)); - - auto resp = builder.send(); - resp.then([&](Pistache::Http::Response resp) { response.send(Pistache::Http::Code::Ok, resp.body()); }, - [&](std::exception_ptr e) { response.send(Pistache::Http::Code::Internal_Server_Error, "Internal Proxy Error"); }); - Pistache::Async::Barrier b(resp); - b.wait_for(std::chrono::seconds(10)); - break; - } - case Pistache::Http::Method::Post: { - Debug::log(LOG, "Post: Forwarding to {}", FORWARD_ADDR + req.resource()); - auto builder = client.post(FORWARD_ADDR + req.resource()).body(req.body()); - for (auto it = req.cookies().begin(); it != req.cookies().end(); ++it) { - builder.cookie(*it); - } - builder.timeout(std::chrono::milliseconds(10000)); - - auto resp = builder.send(); - resp.then([&](Pistache::Http::Response resp) { response.send(Pistache::Http::Code::Ok, resp.body()); }, - [&](std::exception_ptr e) { response.send(Pistache::Http::Code::Internal_Server_Error, "Internal Proxy Error"); }); - Pistache::Async::Barrier b(resp); - b.wait_for(std::chrono::seconds(10)); - break; - } - case Pistache::Http::Method::Put: { - Debug::log(LOG, "Put: Forwarding to {}", FORWARD_ADDR + req.resource()); - auto builder = client.put(FORWARD_ADDR + req.resource()).body(req.body()); - for (auto it = req.cookies().begin(); it != req.cookies().end(); ++it) { - builder.cookie(*it); - } - builder.timeout(std::chrono::milliseconds(10000)); - - auto resp = builder.send(); - resp.then([&](Pistache::Http::Response resp) { response.send(Pistache::Http::Code::Ok, resp.body()); }, - [&](std::exception_ptr e) { response.send(Pistache::Http::Code::Internal_Server_Error, "Internal Proxy Error"); }); - Pistache::Async::Barrier b(resp); - b.wait_for(std::chrono::seconds(10)); - break; - } - case Pistache::Http::Method::Delete: { - Debug::log(LOG, "Delete: Forwarding to {}", FORWARD_ADDR + req.resource()); - auto builder = client.del(FORWARD_ADDR + req.resource()).body(req.body()); - for (auto it = req.cookies().begin(); it != req.cookies().end(); ++it) { - builder.cookie(*it); - } - builder.timeout(std::chrono::milliseconds(10000)); - - auto resp = builder.send(); - resp.then([&](Pistache::Http::Response resp) { response.send(Pistache::Http::Code::Ok, resp.body()); }, - [&](std::exception_ptr e) { response.send(Pistache::Http::Code::Internal_Server_Error, "Internal Proxy Error"); }); - Pistache::Async::Barrier b(resp); - b.wait_for(std::chrono::seconds(10)); - break; - } - case Pistache::Http::Method::Patch: { - Debug::log(LOG, "Patch: Forwarding to {}", FORWARD_ADDR + req.resource()); - auto builder = client.patch(FORWARD_ADDR + req.resource()).body(req.body()); - for (auto it = req.cookies().begin(); it != req.cookies().end(); ++it) { - builder.cookie(*it); - } - builder.timeout(std::chrono::milliseconds(10000)); - - auto resp = builder.send(); - resp.then([&](Pistache::Http::Response resp) { response.send(Pistache::Http::Code::Ok, resp.body()); }, - [&](std::exception_ptr e) { response.send(Pistache::Http::Code::Internal_Server_Error, "Internal Proxy Error"); }); - Pistache::Async::Barrier b(resp); - b.wait_for(std::chrono::seconds(10)); - break; - } - - default: { - response.send(Pistache::Http::Code::Internal_Server_Error, "Invalid request type for proxy"); - } + auto builder = client.prepareRequest(FORWARD_ADDR + req.resource(), req.method()); + builder.body(req.body()); + for (auto it = req.cookies().begin(); it != req.cookies().end(); ++it) { + builder.cookie(*it); } + const auto HEADERS = req.headers().list(); + for (auto& h : HEADERS) { + // FIXME: why does this break e.g. gitea if we include it? + if (std::string_view{h->name()} == "Host") { + Debug::log(LOG, "Header in: {}: {} (DROPPED)", h->name(), req.headers().getRaw(h->name()).value()); + continue; + } + + Debug::log(LOG, "Header in: {}: {}", h->name(), req.headers().getRaw(h->name()).value()); + builder.header(h); + } + builder.timeout(std::chrono::milliseconds(10000)); + + // TODO: implement streaming for git's large objects? + + auto resp = builder.send(); + resp.then( + [&](Pistache::Http::Response resp) { + const auto HEADERSRESP = resp.headers().list(); + + for (auto& h : HEADERSRESP) { + if (std::string_view{h->name()} == "Transfer-Encoding") { + Debug::log(LOG, "Header out: {}: {} (DROPPED)", h->name(), resp.headers().getRaw(h->name()).value()); + continue; + } + + Debug::log(LOG, "Header out: {}: {}", h->name(), resp.headers().getRaw(h->name()).value()); + response.headers().add(h); + } + + response.send(Pistache::Http::Code::Ok, resp.body()); + }, + [&](std::exception_ptr e) { + try { + std::rethrow_exception(e); + } catch (std::exception& e) { Debug::log(ERR, "Proxy failed: {}", e.what()); } catch (const std::string& e) { + Debug::log(ERR, "Proxy failed: {}", e); + } catch (const char* e) { Debug::log(ERR, "Proxy failed: {}", e); } catch (...) { + Debug::log(ERR, "Proxy failed: God knows why."); + } + + response.send(Pistache::Http::Code::Internal_Server_Error, "Internal Proxy Error"); + }); + Pistache::Async::Barrier b(resp); + b.wait_for(std::chrono::seconds(10)); client.shutdown(); } \ No newline at end of file diff --git a/src/headers/gitProtocolHeader.hpp b/src/headers/gitProtocolHeader.hpp new file mode 100644 index 0000000..d837f1b --- /dev/null +++ b/src/headers/gitProtocolHeader.hpp @@ -0,0 +1,24 @@ +#include +#include + +class GitProtocolHeader : public Pistache::Http::Header::Header { + public: + NAME("Git-Protocol"); + + GitProtocolHeader() = default; + + void parse(const std::string& str) override { + m_text = str; + } + + void write(std::ostream& os) const override { + os << m_text; + } + + std::string text() const { + return m_text; + } + + private: + std::string m_text = ""; +}; \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 32bfc09..4ca03ac 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -11,6 +11,7 @@ #include "headers/authorization.hpp" #include "headers/xforwardfor.hpp" #include "headers/cfHeader.hpp" +#include "headers/gitProtocolHeader.hpp" #include "debug/log.hpp" @@ -72,6 +73,7 @@ int main(int argc, char** argv, char** envp) { Pistache::Http::Header::Registry::instance().registerHeader(); Pistache::Http::Header::Registry::instance().registerHeader(); + Pistache::Http::Header::Registry::instance().registerHeader(); g_pDB = std::make_unique(); diff --git a/subprojects/pistache b/subprojects/pistache index 31ef837..67ca233 160000 --- a/subprojects/pistache +++ b/subprojects/pistache @@ -1 +1 @@ -Subproject commit 31ef83778e075939d13f48ba7d2de805cec5d246 +Subproject commit 67ca2337767dd12f4961fd4a4fc5aae68b4bfd9d