traffic: Add a new traffic logger

This commit is contained in:
Vaxry
2025-04-28 17:27:21 +01:00
parent 119da9c973
commit fd9baeeab8
10 changed files with 298 additions and 71 deletions

View File

@@ -15,6 +15,7 @@ AI scrapers are everywhere. This will stop them. `robots.txt` won't.
- Support for IP-Range based rules (both ipv4 and ipv6) - Support for IP-Range based rules (both ipv4 and ipv6)
- Support for async (multithreaded) request handling - Support for async (multithreaded) request handling
- Minimal. The waiting page is tiny and light on network usage. - Minimal. The waiting page is tiny and light on network usage.
- Support for verbose traffic logging for later inspection or statistics
### Planned features ### Planned features
- Dynamic challenge amount (aka difficulty) based on traffic - Dynamic challenge amount (aka difficulty) based on traffic

View File

@@ -69,5 +69,17 @@
"async_proxy": true, "async_proxy": true,
// If enabled, specific requests that look like git HTTP(s) clones will be let through. // If enabled, specific requests that look like git HTTP(s) clones will be let through.
"git_host": false "git_host": false,
// Traffic logging to a .csv file
"logging": {
"log_traffic": false,
// This is a sample schema with all supported fields
// Please keep in mind your local legal regulations, as IPs under GDPR are considered personal data.
"traffic_log_schema": "epoch,ip,domain,resource,useragent,action",
// Where to save the logfile. Each run will continue appending to this file. It may grow HUGE! No automatic pruning / compression is done.
"traffic_log_file": "./traffic.csv"
}
} }

View File

@@ -37,6 +37,12 @@ class CConfig {
int default_challenge_difficulty = 4; int default_challenge_difficulty = 4;
bool async_proxy = true; bool async_proxy = true;
std::vector<SProxyRule> proxy_rules; std::vector<SProxyRule> proxy_rules;
struct {
bool log_traffic = false;
std::string traffic_log_schema;
std::string traffic_log_file;
} logging;
} m_config; } m_config;
struct { struct {

View File

@@ -14,6 +14,8 @@
#include "../GlobalState.hpp" #include "../GlobalState.hpp"
#include "../config/Config.hpp" #include "../config/Config.hpp"
#include "../helpers/FsUtils.hpp" #include "../helpers/FsUtils.hpp"
#include "../helpers/RequestUtils.hpp"
#include "../logging/TrafficLogger.hpp"
#include <filesystem> #include <filesystem>
#include <random> #include <random>
@@ -56,73 +58,10 @@ static std::string generateToken() {
return ss.str(); return ss.str();
} }
std::string CServerHandler::fingerprintForRequest(const Pistache::Http::Request& req) {
const auto HEADERS = req.headers();
std::shared_ptr<const Pistache::Http::Header::AcceptEncoding> acceptEncodingHeader;
std::shared_ptr<const Pistache::Http::Header::UserAgent> userAgentHeader;
std::shared_ptr<const AcceptLanguageHeader> languageHeader;
std::string input = "checkpoint-";
try {
acceptEncodingHeader = Pistache::Http::Header::header_cast<Pistache::Http::Header::AcceptEncoding>(HEADERS.get("Accept-Encoding"));
} catch (std::exception& e) {
; // silent ignore
}
try {
languageHeader = Pistache::Http::Header::header_cast<AcceptLanguageHeader>(HEADERS.get("Accept-Language"));
} catch (std::exception& e) {
; // silent ignore
}
try {
userAgentHeader = Pistache::Http::Header::header_cast<Pistache::Http::Header::UserAgent>(HEADERS.get("User-Agent"));
} catch (std::exception& e) {
; // silent ignore
}
input += ipForRequest(req);
// TODO: those seem to change. Find better things to hash.
// if (acceptEncodingHeader)
// input += HEADERS.getRaw("Accept-Encoding").value();
// if (languageHeader)
// input += languageHeader->language();
if (userAgentHeader)
input += userAgentHeader->agent();
return g_pCrypto->sha256(input);
}
bool CServerHandler::isResourceCheckpoint(const std::string_view& res) { bool CServerHandler::isResourceCheckpoint(const std::string_view& res) {
return res.starts_with("/checkpoint/"); return res.starts_with("/checkpoint/");
} }
std::string CServerHandler::ipForRequest(const Pistache::Http::Request& req) {
std::shared_ptr<const CFConnectingIPHeader> cfHeader;
std::shared_ptr<const XRealIPHeader> xRealIPHeader;
try {
cfHeader = Pistache::Http::Header::header_cast<CFConnectingIPHeader>(req.headers().get("cf-connecting-ip"));
} catch (std::exception& e) {
; // silent ignore
}
try {
xRealIPHeader = Pistache::Http::Header::header_cast<XRealIPHeader>(req.headers().get("X-Real-IP"));
} catch (std::exception& e) {
; // silent ignore
}
if (cfHeader)
return cfHeader->ip();
if (xRealIPHeader)
return xRealIPHeader->ip();
return req.address().host();
}
void CServerHandler::onRequest(const Pistache::Http::Request& req, Pistache::Http::ResponseWriter response) { void CServerHandler::onRequest(const Pistache::Http::Request& req, Pistache::Http::ResponseWriter response) {
const auto HEADERS = req.headers(); const auto HEADERS = req.headers();
std::shared_ptr<const Pistache::Http::Header::Host> hostHeader; std::shared_ptr<const Pistache::Http::Header::Host> hostHeader;
@@ -186,7 +125,7 @@ void CServerHandler::onRequest(const Pistache::Http::Request& req, Pistache::Htt
Debug::log(LOG, "New request: {}:{}{}", hostHeader->host(), hostHeader->port().toString(), req.resource()); Debug::log(LOG, "New request: {}:{}{}", hostHeader->host(), hostHeader->port().toString(), req.resource());
const auto REQUEST_IP = ipForRequest(req); const auto REQUEST_IP = NRequestUtils::ipForRequest(req);
Debug::log(LOG, " | Request author: IP {}, direct: {}", REQUEST_IP, req.address().host()); Debug::log(LOG, " | Request author: IP {}, direct: {}", REQUEST_IP, req.address().host());
@@ -228,12 +167,14 @@ void CServerHandler::onRequest(const Pistache::Http::Request& req, Pistache::Htt
Debug::log(TRACE, "Request looks like it is coming from git (UA + GP). Accepting."); Debug::log(TRACE, "Request looks like it is coming from git (UA + GP). Accepting.");
proxyPass(req, response); proxyPass(req, response);
g_pTrafficLogger->logTraffic(req, IP_ACTION_ALLOW);
return; return;
} else if (userAgentHeader->agent().starts_with("git/")) { } else if (userAgentHeader->agent().starts_with("git/")) {
Debug::log(LOG, " | Action: PASS (git)"); Debug::log(LOG, " | Action: PASS (git)");
Debug::log(TRACE, "Request looks like it is coming from git (UA git). Accepting."); Debug::log(TRACE, "Request looks like it is coming from git (UA git). Accepting.");
proxyPass(req, response); proxyPass(req, response);
g_pTrafficLogger->logTraffic(req, IP_ACTION_ALLOW);
return; return;
} }
} }
@@ -249,10 +190,12 @@ void CServerHandler::onRequest(const Pistache::Http::Request& req, Pistache::Htt
case IP_ACTION_DENY: case IP_ACTION_DENY:
Debug::log(LOG, " | Action: DENY (rule)"); Debug::log(LOG, " | Action: DENY (rule)");
response.send(Pistache::Http::Code::Forbidden, "Blocked by checkpoint"); response.send(Pistache::Http::Code::Forbidden, "Blocked by checkpoint");
g_pTrafficLogger->logTraffic(req, IP_ACTION_DENY);
return; return;
case IP_ACTION_ALLOW: case IP_ACTION_ALLOW:
Debug::log(LOG, " | Action: PASS (rule)"); Debug::log(LOG, " | Action: PASS (rule)");
proxyPass(req, response); proxyPass(req, response);
g_pTrafficLogger->logTraffic(req, IP_ACTION_ALLOW);
return; return;
case IP_ACTION_CHALLENGE: case IP_ACTION_CHALLENGE:
Debug::log(LOG, " | Action: CHALLENGE (rule)"); Debug::log(LOG, " | Action: CHALLENGE (rule)");
@@ -273,8 +216,9 @@ void CServerHandler::onRequest(const Pistache::Http::Request& req, Pistache::Htt
if (TOKEN.valid()) { if (TOKEN.valid()) {
const auto AGE = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch()).count() - const auto AGE = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch()).count() -
std::chrono::duration_cast<std::chrono::milliseconds>(TOKEN.issued().time_since_epoch()).count(); std::chrono::duration_cast<std::chrono::milliseconds>(TOKEN.issued().time_since_epoch()).count();
if (AGE <= TOKEN_MAX_AGE_MS && TOKEN.fingerprint() == fingerprintForRequest(req)) { if (AGE <= TOKEN_MAX_AGE_MS && TOKEN.fingerprint() == NRequestUtils::fingerprintForRequest(req)) {
Debug::log(LOG, " | Action: PASS (token)"); Debug::log(LOG, " | Action: PASS (token)");
g_pTrafficLogger->logTraffic(req, IP_ACTION_ALLOW);
proxyPass(req, response); proxyPass(req, response);
return; return;
} else { // token has been used from a different IP or is expired. Nuke it. } else { // token has been used from a different IP or is expired. Nuke it.
@@ -329,6 +273,8 @@ void CServerHandler::onRequest(const Pistache::Http::Request& req, Pistache::Htt
return; return;
} }
g_pTrafficLogger->logTraffic(req, IP_ACTION_CHALLENGE);
serveStop(req, response, challengeDifficulty); serveStop(req, response, challengeDifficulty);
} }
@@ -338,7 +284,7 @@ void CServerHandler::onTimeout(const Pistache::Http::Request& request, Pistache:
void CServerHandler::challengeSubmitted(const Pistache::Http::Request& req, Pistache::Http::ResponseWriter& response, bool js) { void CServerHandler::challengeSubmitted(const Pistache::Http::Request& req, Pistache::Http::ResponseWriter& response, bool js) {
const auto JSON = req.body(); const auto JSON = req.body();
const auto FINGERPRINT = fingerprintForRequest(req); const auto FINGERPRINT = NRequestUtils::fingerprintForRequest(req);
CChallenge CHALLENGE; CChallenge CHALLENGE;
if (!js) if (!js)
@@ -385,7 +331,7 @@ void CServerHandler::serveStop(const Pistache::Http::Request& req, Pistache::Htt
page.setTemplateRoot(PAGE_ROOT); page.setTemplateRoot(PAGE_ROOT);
const auto NONCE = generateNonce(); const auto NONCE = generateNonce();
const auto CHALLENGE = CChallenge(fingerprintForRequest(req), NONCE, difficulty); const auto CHALLENGE = CChallenge(NRequestUtils::fingerprintForRequest(req), NONCE, difficulty);
auto hostDomain = req.headers().getRaw("Host").value(); auto hostDomain = req.headers().getRaw("Host").value();
if (hostDomain.contains(":")) if (hostDomain.contains(":"))

View File

@@ -22,8 +22,6 @@ class CServerHandler : public Pistache::Http::Handler {
void proxyPassInternal(const Pistache::Http::Request& req, Pistache::Http::ResponseWriter& response, bool async = false); void proxyPassInternal(const Pistache::Http::Request& req, Pistache::Http::ResponseWriter& response, bool async = false);
void proxyPassAsync(const Pistache::Http::Request& req, Pistache::Http::ResponseWriter& response); void proxyPassAsync(const Pistache::Http::Request& req, Pistache::Http::ResponseWriter& response);
void challengeSubmitted(const Pistache::Http::Request& req, Pistache::Http::ResponseWriter& response, bool js); void challengeSubmitted(const Pistache::Http::Request& req, Pistache::Http::ResponseWriter& response, bool js);
std::string fingerprintForRequest(const Pistache::Http::Request& req);
std::string ipForRequest(const Pistache::Http::Request& req);
bool isResourceCheckpoint(const std::string_view& res); bool isResourceCheckpoint(const std::string_view& res);

View File

@@ -0,0 +1,75 @@
#include "RequestUtils.hpp"
#include "../core/Crypto.hpp"
#include "../headers/authorization.hpp"
#include "../headers/cfHeader.hpp"
#include "../headers/xforwardfor.hpp"
#include "../headers/gitProtocolHeader.hpp"
#include "../headers/wwwAuthenticateHeader.hpp"
#include "../headers/acceptLanguageHeader.hpp"
#include "../headers/setCookieHeader.hpp"
#include "../headers/xrealip.hpp"
std::string NRequestUtils::fingerprintForRequest(const Pistache::Http::Request& req) {
const auto HEADERS = req.headers();
std::shared_ptr<const Pistache::Http::Header::AcceptEncoding> acceptEncodingHeader;
std::shared_ptr<const Pistache::Http::Header::UserAgent> userAgentHeader;
std::shared_ptr<const AcceptLanguageHeader> languageHeader;
std::string input = "checkpoint-";
try {
acceptEncodingHeader = Pistache::Http::Header::header_cast<Pistache::Http::Header::AcceptEncoding>(HEADERS.get("Accept-Encoding"));
} catch (std::exception& e) {
; // silent ignore
}
try {
languageHeader = Pistache::Http::Header::header_cast<AcceptLanguageHeader>(HEADERS.get("Accept-Language"));
} catch (std::exception& e) {
; // silent ignore
}
try {
userAgentHeader = Pistache::Http::Header::header_cast<Pistache::Http::Header::UserAgent>(HEADERS.get("User-Agent"));
} catch (std::exception& e) {
; // silent ignore
}
input += ipForRequest(req);
// TODO: those seem to change. Find better things to hash.
// if (acceptEncodingHeader)
// input += HEADERS.getRaw("Accept-Encoding").value();
// if (languageHeader)
// input += languageHeader->language();
if (userAgentHeader)
input += userAgentHeader->agent();
return g_pCrypto->sha256(input);
}
std::string NRequestUtils::ipForRequest(const Pistache::Http::Request& req) {
std::shared_ptr<const CFConnectingIPHeader> cfHeader;
std::shared_ptr<const XRealIPHeader> xRealIPHeader;
try {
cfHeader = Pistache::Http::Header::header_cast<CFConnectingIPHeader>(req.headers().get("cf-connecting-ip"));
} catch (std::exception& e) {
; // silent ignore
}
try {
xRealIPHeader = Pistache::Http::Header::header_cast<XRealIPHeader>(req.headers().get("X-Real-IP"));
} catch (std::exception& e) {
; // silent ignore
}
if (cfHeader)
return cfHeader->ip();
if (xRealIPHeader)
return xRealIPHeader->ip();
return req.address().host();
}

View File

@@ -0,0 +1,10 @@
#pragma once
#include <string>
#include <pistache/http.h>
namespace NRequestUtils {
std::string fingerprintForRequest(const Pistache::Http::Request& req);
std::string ipForRequest(const Pistache::Http::Request& req);
};

View File

@@ -0,0 +1,143 @@
#include "TrafficLogger.hpp"
#include <sstream>
#include <fmt/format.h>
#include "../config/Config.hpp"
#include "../debug/log.hpp"
#include "../helpers/RequestUtils.hpp"
CTrafficLogger::CTrafficLogger() {
if (!g_pConfig->m_config.logging.log_traffic)
return;
const auto COMMAS = std::count(g_pConfig->m_config.logging.traffic_log_schema.begin(), g_pConfig->m_config.logging.traffic_log_schema.end(), ',');
// parse the schema
std::string_view curr;
size_t lastPos = 0;
bool first = true;
auto advance = [&]() {
size_t prev = !first ? lastPos + 1 : lastPos;
lastPos = g_pConfig->m_config.logging.traffic_log_schema.find(',', prev);
if (lastPos == std::string::npos)
curr = std::string_view{g_pConfig->m_config.logging.traffic_log_schema}.substr(prev);
else
curr = std::string_view{g_pConfig->m_config.logging.traffic_log_schema}.substr(prev, lastPos - prev);
first = false;
};
for (size_t i = 0; i < COMMAS + 1; ++i) {
advance();
if (curr == "ip")
m_logSchema.emplace_back(TRAFFIC_IP);
else if (curr == "epoch")
m_logSchema.emplace_back(TRAFFIC_EPOCH);
else if (curr == "domain")
m_logSchema.emplace_back(TRAFFIC_DOMAIN);
else if (curr == "resource")
m_logSchema.emplace_back(TRAFFIC_RESOURCE);
else if (curr == "useragent")
m_logSchema.emplace_back(TRAFFIC_USERAGENT);
else if (curr == "action")
m_logSchema.emplace_back(TRAFFIC_ACTION);
if (curr == "")
break;
}
m_file.open(g_pConfig->m_config.logging.traffic_log_file, std::ios::app);
if (!m_file.good())
Debug::die("TrafficLogger: bad file {}", g_pConfig->m_config.logging.traffic_log_file);
}
CTrafficLogger::~CTrafficLogger() {
if (m_file.is_open())
m_file.close();
}
static std::string sanitize(const std::string& s) {
if (s.empty())
return s;
std::string cpy = s;
size_t pos = 0;
while ((pos = cpy.find('"', pos)) != std::string::npos) {
cpy.replace(pos, 1, "\\\"");
pos += 2;
}
return cpy;
}
static const char* actionToString(eConfigIPAction a) {
switch (a) {
case IP_ACTION_CHALLENGE: return "CHALLENGE";
case IP_ACTION_ALLOW: return "ALLOW";
case IP_ACTION_DENY: return "DENY";
case IP_ACTION_NONE: return "NONE";
}
return "ERROR";
}
void CTrafficLogger::logTraffic(const Pistache::Http::Request& req, eConfigIPAction actionTaken) {
if (!g_pConfig->m_config.logging.log_traffic)
return;
std::stringstream ss;
for (const auto& t : m_logSchema) {
switch (t) {
case TRAFFIC_EPOCH: {
ss << fmt::format("{},", std::chrono::duration_cast<std::chrono::seconds>(std::chrono::system_clock::now().time_since_epoch()).count());
break;
}
case TRAFFIC_DOMAIN: {
const auto HOST = Pistache::Http::Header::header_cast<Pistache::Http::Header::Host>(req.headers().get("Host"));
ss << fmt::format("\"{}\",", sanitize(HOST->host()));
break;
}
case TRAFFIC_IP: {
ss << fmt::format("{},", NRequestUtils::ipForRequest(req));
break;
}
case TRAFFIC_RESOURCE: {
ss << fmt::format("\"{}\",", sanitize(req.resource()));
break;
}
case TRAFFIC_USERAGENT: {
if (!req.headers().has("User-Agent")) {
ss << "\"<no data>\",";
break;
}
const auto UA = Pistache::Http::Header::header_cast<Pistache::Http::Header::UserAgent>(req.headers().get("User-Agent"));
ss << fmt::format("\"{}\",", sanitize(UA->agent()));
break;
}
case TRAFFIC_ACTION: {
ss << fmt::format("{},", actionToString(actionTaken));
break;
}
}
}
std::string trafficLine = ss.str();
if (trafficLine.empty())
return;
// replace , with \n
trafficLine.back() = '\n';
m_file << trafficLine;
m_file.flush();
}

View File

@@ -0,0 +1,33 @@
#pragma once
#include <string>
#include <cstdint>
#include <memory>
#include <fstream>
#include "../config/ConfigTypes.hpp"
#include <pistache/http.h>
class CTrafficLogger {
public:
CTrafficLogger();
~CTrafficLogger();
void logTraffic(const Pistache::Http::Request& req, eConfigIPAction actionTaken);
private:
enum eTrafficLoggerProps : uint8_t {
TRAFFIC_EPOCH = 0,
TRAFFIC_IP,
TRAFFIC_DOMAIN,
TRAFFIC_RESOURCE,
TRAFFIC_USERAGENT,
TRAFFIC_ACTION,
};
std::vector<eTrafficLoggerProps> m_logSchema;
std::ofstream m_file;
};
inline std::unique_ptr<CTrafficLogger> g_pTrafficLogger;

View File

@@ -25,6 +25,8 @@
#include "config/Config.hpp" #include "config/Config.hpp"
#include "logging/TrafficLogger.hpp"
#include "GlobalState.hpp" #include "GlobalState.hpp"
#include <signal.h> #include <signal.h>
@@ -88,6 +90,7 @@ int main(int argc, char** argv, char** envp) {
Pistache::Http::Header::Registry::instance().registerHeader<XRealIPHeader>(); Pistache::Http::Header::Registry::instance().registerHeader<XRealIPHeader>();
g_pCrypto = std::make_unique<CCrypto>(); g_pCrypto = std::make_unique<CCrypto>();
g_pTrafficLogger = std::make_unique<CTrafficLogger>();
auto endpoint = std::make_unique<Pistache::Http::Endpoint>(address); auto endpoint = std::make_unique<Pistache::Http::Endpoint>(address);
auto opts = Pistache::Http::Endpoint::options().threads(threads).flags(Pistache::Tcp::Options::ReuseAddr | Pistache::Tcp::Options::ReusePort); auto opts = Pistache::Http::Endpoint::options().threads(threads).flags(Pistache::Tcp::Options::ReuseAddr | Pistache::Tcp::Options::ReusePort);