traffic: Add a new traffic logger
This commit is contained in:
@@ -15,6 +15,7 @@ AI scrapers are everywhere. This will stop them. `robots.txt` won't.
|
||||
- Support for IP-Range based rules (both ipv4 and ipv6)
|
||||
- Support for async (multithreaded) request handling
|
||||
- Minimal. The waiting page is tiny and light on network usage.
|
||||
- Support for verbose traffic logging for later inspection or statistics
|
||||
|
||||
### Planned features
|
||||
- Dynamic challenge amount (aka difficulty) based on traffic
|
||||
|
||||
@@ -69,5 +69,17 @@
|
||||
"async_proxy": true,
|
||||
|
||||
// If enabled, specific requests that look like git HTTP(s) clones will be let through.
|
||||
"git_host": false
|
||||
"git_host": false,
|
||||
|
||||
// Traffic logging to a .csv file
|
||||
"logging": {
|
||||
"log_traffic": false,
|
||||
|
||||
// This is a sample schema with all supported fields
|
||||
// Please keep in mind your local legal regulations, as IPs under GDPR are considered personal data.
|
||||
"traffic_log_schema": "epoch,ip,domain,resource,useragent,action",
|
||||
|
||||
// Where to save the logfile. Each run will continue appending to this file. It may grow HUGE! No automatic pruning / compression is done.
|
||||
"traffic_log_file": "./traffic.csv"
|
||||
}
|
||||
}
|
||||
@@ -37,6 +37,12 @@ class CConfig {
|
||||
int default_challenge_difficulty = 4;
|
||||
bool async_proxy = true;
|
||||
std::vector<SProxyRule> proxy_rules;
|
||||
|
||||
struct {
|
||||
bool log_traffic = false;
|
||||
std::string traffic_log_schema;
|
||||
std::string traffic_log_file;
|
||||
} logging;
|
||||
} m_config;
|
||||
|
||||
struct {
|
||||
|
||||
@@ -14,6 +14,8 @@
|
||||
#include "../GlobalState.hpp"
|
||||
#include "../config/Config.hpp"
|
||||
#include "../helpers/FsUtils.hpp"
|
||||
#include "../helpers/RequestUtils.hpp"
|
||||
#include "../logging/TrafficLogger.hpp"
|
||||
|
||||
#include <filesystem>
|
||||
#include <random>
|
||||
@@ -56,73 +58,10 @@ static std::string generateToken() {
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string CServerHandler::fingerprintForRequest(const Pistache::Http::Request& req) {
|
||||
const auto HEADERS = req.headers();
|
||||
std::shared_ptr<const Pistache::Http::Header::AcceptEncoding> acceptEncodingHeader;
|
||||
std::shared_ptr<const Pistache::Http::Header::UserAgent> userAgentHeader;
|
||||
std::shared_ptr<const AcceptLanguageHeader> languageHeader;
|
||||
|
||||
std::string input = "checkpoint-";
|
||||
|
||||
try {
|
||||
acceptEncodingHeader = Pistache::Http::Header::header_cast<Pistache::Http::Header::AcceptEncoding>(HEADERS.get("Accept-Encoding"));
|
||||
} catch (std::exception& e) {
|
||||
; // silent ignore
|
||||
}
|
||||
|
||||
try {
|
||||
languageHeader = Pistache::Http::Header::header_cast<AcceptLanguageHeader>(HEADERS.get("Accept-Language"));
|
||||
} catch (std::exception& e) {
|
||||
; // silent ignore
|
||||
}
|
||||
|
||||
try {
|
||||
userAgentHeader = Pistache::Http::Header::header_cast<Pistache::Http::Header::UserAgent>(HEADERS.get("User-Agent"));
|
||||
} catch (std::exception& e) {
|
||||
; // silent ignore
|
||||
}
|
||||
|
||||
input += ipForRequest(req);
|
||||
// TODO: those seem to change. Find better things to hash.
|
||||
// if (acceptEncodingHeader)
|
||||
// input += HEADERS.getRaw("Accept-Encoding").value();
|
||||
// if (languageHeader)
|
||||
// input += languageHeader->language();
|
||||
if (userAgentHeader)
|
||||
input += userAgentHeader->agent();
|
||||
|
||||
return g_pCrypto->sha256(input);
|
||||
}
|
||||
|
||||
bool CServerHandler::isResourceCheckpoint(const std::string_view& res) {
|
||||
return res.starts_with("/checkpoint/");
|
||||
}
|
||||
|
||||
std::string CServerHandler::ipForRequest(const Pistache::Http::Request& req) {
|
||||
std::shared_ptr<const CFConnectingIPHeader> cfHeader;
|
||||
std::shared_ptr<const XRealIPHeader> xRealIPHeader;
|
||||
|
||||
try {
|
||||
cfHeader = Pistache::Http::Header::header_cast<CFConnectingIPHeader>(req.headers().get("cf-connecting-ip"));
|
||||
} catch (std::exception& e) {
|
||||
; // silent ignore
|
||||
}
|
||||
|
||||
try {
|
||||
xRealIPHeader = Pistache::Http::Header::header_cast<XRealIPHeader>(req.headers().get("X-Real-IP"));
|
||||
} catch (std::exception& e) {
|
||||
; // silent ignore
|
||||
}
|
||||
|
||||
if (cfHeader)
|
||||
return cfHeader->ip();
|
||||
|
||||
if (xRealIPHeader)
|
||||
return xRealIPHeader->ip();
|
||||
|
||||
return req.address().host();
|
||||
}
|
||||
|
||||
void CServerHandler::onRequest(const Pistache::Http::Request& req, Pistache::Http::ResponseWriter response) {
|
||||
const auto HEADERS = req.headers();
|
||||
std::shared_ptr<const Pistache::Http::Header::Host> hostHeader;
|
||||
@@ -186,7 +125,7 @@ void CServerHandler::onRequest(const Pistache::Http::Request& req, Pistache::Htt
|
||||
|
||||
Debug::log(LOG, "New request: {}:{}{}", hostHeader->host(), hostHeader->port().toString(), req.resource());
|
||||
|
||||
const auto REQUEST_IP = ipForRequest(req);
|
||||
const auto REQUEST_IP = NRequestUtils::ipForRequest(req);
|
||||
|
||||
Debug::log(LOG, " | Request author: IP {}, direct: {}", REQUEST_IP, req.address().host());
|
||||
|
||||
@@ -228,12 +167,14 @@ void CServerHandler::onRequest(const Pistache::Http::Request& req, Pistache::Htt
|
||||
Debug::log(TRACE, "Request looks like it is coming from git (UA + GP). Accepting.");
|
||||
|
||||
proxyPass(req, response);
|
||||
g_pTrafficLogger->logTraffic(req, IP_ACTION_ALLOW);
|
||||
return;
|
||||
} else if (userAgentHeader->agent().starts_with("git/")) {
|
||||
Debug::log(LOG, " | Action: PASS (git)");
|
||||
Debug::log(TRACE, "Request looks like it is coming from git (UA git). Accepting.");
|
||||
|
||||
proxyPass(req, response);
|
||||
g_pTrafficLogger->logTraffic(req, IP_ACTION_ALLOW);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -249,10 +190,12 @@ void CServerHandler::onRequest(const Pistache::Http::Request& req, Pistache::Htt
|
||||
case IP_ACTION_DENY:
|
||||
Debug::log(LOG, " | Action: DENY (rule)");
|
||||
response.send(Pistache::Http::Code::Forbidden, "Blocked by checkpoint");
|
||||
g_pTrafficLogger->logTraffic(req, IP_ACTION_DENY);
|
||||
return;
|
||||
case IP_ACTION_ALLOW:
|
||||
Debug::log(LOG, " | Action: PASS (rule)");
|
||||
proxyPass(req, response);
|
||||
g_pTrafficLogger->logTraffic(req, IP_ACTION_ALLOW);
|
||||
return;
|
||||
case IP_ACTION_CHALLENGE:
|
||||
Debug::log(LOG, " | Action: CHALLENGE (rule)");
|
||||
@@ -273,8 +216,9 @@ void CServerHandler::onRequest(const Pistache::Http::Request& req, Pistache::Htt
|
||||
if (TOKEN.valid()) {
|
||||
const auto AGE = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now().time_since_epoch()).count() -
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(TOKEN.issued().time_since_epoch()).count();
|
||||
if (AGE <= TOKEN_MAX_AGE_MS && TOKEN.fingerprint() == fingerprintForRequest(req)) {
|
||||
if (AGE <= TOKEN_MAX_AGE_MS && TOKEN.fingerprint() == NRequestUtils::fingerprintForRequest(req)) {
|
||||
Debug::log(LOG, " | Action: PASS (token)");
|
||||
g_pTrafficLogger->logTraffic(req, IP_ACTION_ALLOW);
|
||||
proxyPass(req, response);
|
||||
return;
|
||||
} else { // token has been used from a different IP or is expired. Nuke it.
|
||||
@@ -329,6 +273,8 @@ void CServerHandler::onRequest(const Pistache::Http::Request& req, Pistache::Htt
|
||||
return;
|
||||
}
|
||||
|
||||
g_pTrafficLogger->logTraffic(req, IP_ACTION_CHALLENGE);
|
||||
|
||||
serveStop(req, response, challengeDifficulty);
|
||||
}
|
||||
|
||||
@@ -338,7 +284,7 @@ void CServerHandler::onTimeout(const Pistache::Http::Request& request, Pistache:
|
||||
|
||||
void CServerHandler::challengeSubmitted(const Pistache::Http::Request& req, Pistache::Http::ResponseWriter& response, bool js) {
|
||||
const auto JSON = req.body();
|
||||
const auto FINGERPRINT = fingerprintForRequest(req);
|
||||
const auto FINGERPRINT = NRequestUtils::fingerprintForRequest(req);
|
||||
|
||||
CChallenge CHALLENGE;
|
||||
if (!js)
|
||||
@@ -385,7 +331,7 @@ void CServerHandler::serveStop(const Pistache::Http::Request& req, Pistache::Htt
|
||||
page.setTemplateRoot(PAGE_ROOT);
|
||||
|
||||
const auto NONCE = generateNonce();
|
||||
const auto CHALLENGE = CChallenge(fingerprintForRequest(req), NONCE, difficulty);
|
||||
const auto CHALLENGE = CChallenge(NRequestUtils::fingerprintForRequest(req), NONCE, difficulty);
|
||||
|
||||
auto hostDomain = req.headers().getRaw("Host").value();
|
||||
if (hostDomain.contains(":"))
|
||||
|
||||
@@ -22,8 +22,6 @@ class CServerHandler : public Pistache::Http::Handler {
|
||||
void proxyPassInternal(const Pistache::Http::Request& req, Pistache::Http::ResponseWriter& response, bool async = false);
|
||||
void proxyPassAsync(const Pistache::Http::Request& req, Pistache::Http::ResponseWriter& response);
|
||||
void challengeSubmitted(const Pistache::Http::Request& req, Pistache::Http::ResponseWriter& response, bool js);
|
||||
std::string fingerprintForRequest(const Pistache::Http::Request& req);
|
||||
std::string ipForRequest(const Pistache::Http::Request& req);
|
||||
|
||||
bool isResourceCheckpoint(const std::string_view& res);
|
||||
|
||||
|
||||
75
src/helpers/RequestUtils.cpp
Normal file
75
src/helpers/RequestUtils.cpp
Normal file
@@ -0,0 +1,75 @@
|
||||
#include "RequestUtils.hpp"
|
||||
|
||||
#include "../core/Crypto.hpp"
|
||||
|
||||
#include "../headers/authorization.hpp"
|
||||
#include "../headers/cfHeader.hpp"
|
||||
#include "../headers/xforwardfor.hpp"
|
||||
#include "../headers/gitProtocolHeader.hpp"
|
||||
#include "../headers/wwwAuthenticateHeader.hpp"
|
||||
#include "../headers/acceptLanguageHeader.hpp"
|
||||
#include "../headers/setCookieHeader.hpp"
|
||||
#include "../headers/xrealip.hpp"
|
||||
|
||||
std::string NRequestUtils::fingerprintForRequest(const Pistache::Http::Request& req) {
|
||||
const auto HEADERS = req.headers();
|
||||
std::shared_ptr<const Pistache::Http::Header::AcceptEncoding> acceptEncodingHeader;
|
||||
std::shared_ptr<const Pistache::Http::Header::UserAgent> userAgentHeader;
|
||||
std::shared_ptr<const AcceptLanguageHeader> languageHeader;
|
||||
|
||||
std::string input = "checkpoint-";
|
||||
|
||||
try {
|
||||
acceptEncodingHeader = Pistache::Http::Header::header_cast<Pistache::Http::Header::AcceptEncoding>(HEADERS.get("Accept-Encoding"));
|
||||
} catch (std::exception& e) {
|
||||
; // silent ignore
|
||||
}
|
||||
|
||||
try {
|
||||
languageHeader = Pistache::Http::Header::header_cast<AcceptLanguageHeader>(HEADERS.get("Accept-Language"));
|
||||
} catch (std::exception& e) {
|
||||
; // silent ignore
|
||||
}
|
||||
|
||||
try {
|
||||
userAgentHeader = Pistache::Http::Header::header_cast<Pistache::Http::Header::UserAgent>(HEADERS.get("User-Agent"));
|
||||
} catch (std::exception& e) {
|
||||
; // silent ignore
|
||||
}
|
||||
|
||||
input += ipForRequest(req);
|
||||
// TODO: those seem to change. Find better things to hash.
|
||||
// if (acceptEncodingHeader)
|
||||
// input += HEADERS.getRaw("Accept-Encoding").value();
|
||||
// if (languageHeader)
|
||||
// input += languageHeader->language();
|
||||
if (userAgentHeader)
|
||||
input += userAgentHeader->agent();
|
||||
|
||||
return g_pCrypto->sha256(input);
|
||||
}
|
||||
|
||||
std::string NRequestUtils::ipForRequest(const Pistache::Http::Request& req) {
|
||||
std::shared_ptr<const CFConnectingIPHeader> cfHeader;
|
||||
std::shared_ptr<const XRealIPHeader> xRealIPHeader;
|
||||
|
||||
try {
|
||||
cfHeader = Pistache::Http::Header::header_cast<CFConnectingIPHeader>(req.headers().get("cf-connecting-ip"));
|
||||
} catch (std::exception& e) {
|
||||
; // silent ignore
|
||||
}
|
||||
|
||||
try {
|
||||
xRealIPHeader = Pistache::Http::Header::header_cast<XRealIPHeader>(req.headers().get("X-Real-IP"));
|
||||
} catch (std::exception& e) {
|
||||
; // silent ignore
|
||||
}
|
||||
|
||||
if (cfHeader)
|
||||
return cfHeader->ip();
|
||||
|
||||
if (xRealIPHeader)
|
||||
return xRealIPHeader->ip();
|
||||
|
||||
return req.address().host();
|
||||
}
|
||||
10
src/helpers/RequestUtils.hpp
Normal file
10
src/helpers/RequestUtils.hpp
Normal file
@@ -0,0 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <pistache/http.h>
|
||||
|
||||
namespace NRequestUtils {
|
||||
std::string fingerprintForRequest(const Pistache::Http::Request& req);
|
||||
std::string ipForRequest(const Pistache::Http::Request& req);
|
||||
};
|
||||
143
src/logging/TrafficLogger.cpp
Normal file
143
src/logging/TrafficLogger.cpp
Normal file
@@ -0,0 +1,143 @@
|
||||
#include "TrafficLogger.hpp"
|
||||
|
||||
#include <sstream>
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "../config/Config.hpp"
|
||||
#include "../debug/log.hpp"
|
||||
#include "../helpers/RequestUtils.hpp"
|
||||
|
||||
CTrafficLogger::CTrafficLogger() {
|
||||
if (!g_pConfig->m_config.logging.log_traffic)
|
||||
return;
|
||||
|
||||
const auto COMMAS = std::count(g_pConfig->m_config.logging.traffic_log_schema.begin(), g_pConfig->m_config.logging.traffic_log_schema.end(), ',');
|
||||
|
||||
// parse the schema
|
||||
std::string_view curr;
|
||||
size_t lastPos = 0;
|
||||
bool first = true;
|
||||
auto advance = [&]() {
|
||||
size_t prev = !first ? lastPos + 1 : lastPos;
|
||||
lastPos = g_pConfig->m_config.logging.traffic_log_schema.find(',', prev);
|
||||
|
||||
if (lastPos == std::string::npos)
|
||||
curr = std::string_view{g_pConfig->m_config.logging.traffic_log_schema}.substr(prev);
|
||||
else
|
||||
curr = std::string_view{g_pConfig->m_config.logging.traffic_log_schema}.substr(prev, lastPos - prev);
|
||||
|
||||
first = false;
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < COMMAS + 1; ++i) {
|
||||
advance();
|
||||
|
||||
if (curr == "ip")
|
||||
m_logSchema.emplace_back(TRAFFIC_IP);
|
||||
else if (curr == "epoch")
|
||||
m_logSchema.emplace_back(TRAFFIC_EPOCH);
|
||||
else if (curr == "domain")
|
||||
m_logSchema.emplace_back(TRAFFIC_DOMAIN);
|
||||
else if (curr == "resource")
|
||||
m_logSchema.emplace_back(TRAFFIC_RESOURCE);
|
||||
else if (curr == "useragent")
|
||||
m_logSchema.emplace_back(TRAFFIC_USERAGENT);
|
||||
else if (curr == "action")
|
||||
m_logSchema.emplace_back(TRAFFIC_ACTION);
|
||||
|
||||
if (curr == "")
|
||||
break;
|
||||
}
|
||||
|
||||
m_file.open(g_pConfig->m_config.logging.traffic_log_file, std::ios::app);
|
||||
|
||||
if (!m_file.good())
|
||||
Debug::die("TrafficLogger: bad file {}", g_pConfig->m_config.logging.traffic_log_file);
|
||||
}
|
||||
|
||||
CTrafficLogger::~CTrafficLogger() {
|
||||
if (m_file.is_open())
|
||||
m_file.close();
|
||||
}
|
||||
|
||||
static std::string sanitize(const std::string& s) {
|
||||
if (s.empty())
|
||||
return s;
|
||||
|
||||
std::string cpy = s;
|
||||
size_t pos = 0;
|
||||
while ((pos = cpy.find('"', pos)) != std::string::npos) {
|
||||
cpy.replace(pos, 1, "\\\"");
|
||||
pos += 2;
|
||||
}
|
||||
|
||||
return cpy;
|
||||
}
|
||||
|
||||
static const char* actionToString(eConfigIPAction a) {
|
||||
switch (a) {
|
||||
case IP_ACTION_CHALLENGE: return "CHALLENGE";
|
||||
case IP_ACTION_ALLOW: return "ALLOW";
|
||||
case IP_ACTION_DENY: return "DENY";
|
||||
case IP_ACTION_NONE: return "NONE";
|
||||
}
|
||||
|
||||
return "ERROR";
|
||||
}
|
||||
|
||||
void CTrafficLogger::logTraffic(const Pistache::Http::Request& req, eConfigIPAction actionTaken) {
|
||||
if (!g_pConfig->m_config.logging.log_traffic)
|
||||
return;
|
||||
|
||||
std::stringstream ss;
|
||||
|
||||
for (const auto& t : m_logSchema) {
|
||||
switch (t) {
|
||||
case TRAFFIC_EPOCH: {
|
||||
ss << fmt::format("{},", std::chrono::duration_cast<std::chrono::seconds>(std::chrono::system_clock::now().time_since_epoch()).count());
|
||||
break;
|
||||
}
|
||||
|
||||
case TRAFFIC_DOMAIN: {
|
||||
const auto HOST = Pistache::Http::Header::header_cast<Pistache::Http::Header::Host>(req.headers().get("Host"));
|
||||
ss << fmt::format("\"{}\",", sanitize(HOST->host()));
|
||||
break;
|
||||
}
|
||||
|
||||
case TRAFFIC_IP: {
|
||||
ss << fmt::format("{},", NRequestUtils::ipForRequest(req));
|
||||
break;
|
||||
}
|
||||
|
||||
case TRAFFIC_RESOURCE: {
|
||||
ss << fmt::format("\"{}\",", sanitize(req.resource()));
|
||||
break;
|
||||
}
|
||||
|
||||
case TRAFFIC_USERAGENT: {
|
||||
if (!req.headers().has("User-Agent")) {
|
||||
ss << "\"<no data>\",";
|
||||
break;
|
||||
}
|
||||
const auto UA = Pistache::Http::Header::header_cast<Pistache::Http::Header::UserAgent>(req.headers().get("User-Agent"));
|
||||
ss << fmt::format("\"{}\",", sanitize(UA->agent()));
|
||||
break;
|
||||
}
|
||||
|
||||
case TRAFFIC_ACTION: {
|
||||
ss << fmt::format("{},", actionToString(actionTaken));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string trafficLine = ss.str();
|
||||
if (trafficLine.empty())
|
||||
return;
|
||||
|
||||
// replace , with \n
|
||||
trafficLine.back() = '\n';
|
||||
|
||||
m_file << trafficLine;
|
||||
m_file.flush();
|
||||
}
|
||||
33
src/logging/TrafficLogger.hpp
Normal file
33
src/logging/TrafficLogger.hpp
Normal file
@@ -0,0 +1,33 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <fstream>
|
||||
|
||||
#include "../config/ConfigTypes.hpp"
|
||||
|
||||
#include <pistache/http.h>
|
||||
|
||||
class CTrafficLogger {
|
||||
public:
|
||||
CTrafficLogger();
|
||||
~CTrafficLogger();
|
||||
|
||||
void logTraffic(const Pistache::Http::Request& req, eConfigIPAction actionTaken);
|
||||
|
||||
private:
|
||||
enum eTrafficLoggerProps : uint8_t {
|
||||
TRAFFIC_EPOCH = 0,
|
||||
TRAFFIC_IP,
|
||||
TRAFFIC_DOMAIN,
|
||||
TRAFFIC_RESOURCE,
|
||||
TRAFFIC_USERAGENT,
|
||||
TRAFFIC_ACTION,
|
||||
};
|
||||
|
||||
std::vector<eTrafficLoggerProps> m_logSchema;
|
||||
std::ofstream m_file;
|
||||
};
|
||||
|
||||
inline std::unique_ptr<CTrafficLogger> g_pTrafficLogger;
|
||||
@@ -25,6 +25,8 @@
|
||||
|
||||
#include "config/Config.hpp"
|
||||
|
||||
#include "logging/TrafficLogger.hpp"
|
||||
|
||||
#include "GlobalState.hpp"
|
||||
|
||||
#include <signal.h>
|
||||
@@ -88,6 +90,7 @@ int main(int argc, char** argv, char** envp) {
|
||||
Pistache::Http::Header::Registry::instance().registerHeader<XRealIPHeader>();
|
||||
|
||||
g_pCrypto = std::make_unique<CCrypto>();
|
||||
g_pTrafficLogger = std::make_unique<CTrafficLogger>();
|
||||
|
||||
auto endpoint = std::make_unique<Pistache::Http::Endpoint>(address);
|
||||
auto opts = Pistache::Http::Endpoint::options().threads(threads).flags(Pistache::Tcp::Options::ReuseAddr | Pistache::Tcp::Options::ReusePort);
|
||||
|
||||
Reference in New Issue
Block a user