diff --git a/config.js b/config.js index 91da96d..c5c8e50 100644 --- a/config.js +++ b/config.js @@ -1,9 +1,15 @@ const env = process.env.APP__ENV_NAME || "local"; const isLocal = env === "local" || env === "test"; +const strippedPath = process.env.APP__STRIPPED_PATH || ''; + +console.log("Environment variables:"); +console.log(`APP__STRIPPED_PATH=${strippedPath} (path added to original host in analytics scripts)`); +console.log(`APP__ENV_NAME=${env} (should not be local nor test in production)`); export default { isLocalEnv: isLocal, httpPort: process.env.PORT || 80, + strippedPath, proxyDomain: "", // Domain to proxy calls through. Leave it empty to use the requested domain as a proxy domain proxy: { // Proxy configuration is here domains: [ // These domains are replaced in any proxied response (including scripts, URLs and redirects) @@ -69,6 +75,7 @@ export default { }, maskPaths: [ // Paths which are masked in URLs and redirects in order to avoid firing ad-blocking rules "/google-analytics", + "/www.google-analytics.com", "/adsbygoogle", "/googleads", "/log_event\\?", diff --git a/package.json b/package.json index e19125c..ba74bfa 100644 --- a/package.json +++ b/package.json @@ -5,7 +5,9 @@ "module": "src/api.js", "scripts": { "test": "exit 0", - "start": "node -r esm src/api.js" + "start": "node -r esm src/api.js", + "mask": "node -r esm scripts.js mask", + "unmask": "node -r esm scripts.js unmask" }, "keywords": [ "proxy" diff --git a/readme.md b/readme.md index f7daad6..7fb5250 100644 --- a/readme.md +++ b/readme.md @@ -43,15 +43,17 @@ Technically, NodeJS proxy API works as follows: ## Prerequisites -In order to enable analytics proxying, you have to have some "DevOps" skills to perform the following: +In order to enable analytics proxying, you have to perform some DevOps in your infrastructure. Assuming you're using microservices: -1. Run a dedicated back end with proxy (NodeJS application in this repository). -2. Enable forwarding of a particular path (for example, `/gtm-proxy/*`) on your domain to this back end. - 1. It is important to use your own domain, as using centralized domains might appear at the ad-blocking databases. - 2. Make sure you strip the prefix path before proxy, the request path `https://your-domain.com/gtm-proxy/www.google-analytics.com/analytics.js` should land as `/www.google-analytics.com/analytics.js` at the NodeJS proxy application (this repository), stripping `gtm-proxy/` from the URL. +1. Run a dedicated back end (container) with proxy (NodeJS application / container in this repository) - see setup instructions below. +2. Create forwarding rule from your front end to hit this back end. + 1. For instance, proxy all calls requesting `/gtm-proxy/*` to this back end. You can either strip `/gtm-proxy` in your setup or make use of `APP__STRIPPED_PATH` env variable by specifying it to `/gtm-proxy`. Ultimately, the request path `https://your-domain.com/gtm-proxy/www.google-analytics.com/analytics.js` should land as `/www.google-analytics.com/analytics.js` at the NodeJS proxy application/container (this repository), stripping `/gtm-proxy` from the URL. + 2. It is important to use your own domain, as using centralized domains might one day appear at the ad-blocking databases. 3. **Modify your initial Google Tag Manager / Google Analytics script to request the proxied file** - 1. Replace `https://www.googletagmanager.com/gtag/js?id=UA-123456-7` there to use `https://your-domain.com/gtm-proxy/www.googletagmanager.com/gtag/js?id=UA-123456-7` (or whatever path you've set up). - 2. The [example](src/test-static/index.html) in this repository uses `/www.googletagmanager.com/gtm.js` (which is equivalent of `http://localhost/www.googletagmanager.com/gtm.js`). + 1. Replace `https://www.googletagmanager.com/gtag/js?id=UA-123456-7` there to use `https://your-domain.com/gtm-proxy/www.googletagmanager.com/gtag/js?id=UA-123456-7` (or whatever path you've set up). Also, mask the URL by running `npm run mask ` in this repository so that ad-blockers won't block it right away. + 2. For instance, if you run `npm run mask www.google-analytics.com/analytics.js`, you get this masked URL: `*(d3d3Lmdvb2dsZS1hbmFseXRpY3MuY29t)*/*(YW5hbHl0aWNzLmpz)*`. Use it in your script tag now: ``. + 3. The [example](src/test-static/index.html) in this repository uses unmasked `/www.googletagmanager.com/gtm.js` (which is equivalent of `http://localhost/www.googletagmanager.com/gtm.js`). +4. Test the thing! **This to consider before implementing the solution**: @@ -61,6 +63,25 @@ In order to enable analytics proxying, you have to have some "DevOps" skills to ## Setup +### In Docker + +The [light Docker container](https://hub.docker.com/r/zitros/analytics-saviour) of 41.5MB is available and ready to be run in your infrastructure. + +```bash +docker pull zitros/analytics-saviour +docker run -p 80:80 zitros/analytics-saviour +# Now open http://localhost and check the proxy. +``` + +Available environment variables: + +```bash +APP__STRIPPED_PATH=/gtm-proxy +# A URL prefix to strip. If you didn't manage to remove this prefix in the request hitting the container, you can specify it here. +``` + +### NodeJS Application + To run the NodeJS application, simply clone the repository, navigate to its directory and run: ```bash @@ -77,8 +98,6 @@ Proxied: www.google-analytics.com/collect?v=1&_v=j73&a=531530768&t=pageview&_s=1 Check the [test-static/index.html](test-static/index.html) file's code to see how to bind the proxied analytics to your front end. -Later, you can containerize this repository and route the incoming traffic to `/gtm-proxy` path (for example) through this container in order to avoid analytics blocking. - ## Configuration You can configure which third-parties to proxy/replace and how to do it in the config file. Find the actual configuration in [config.js](config.js) file: @@ -114,7 +133,7 @@ You can configure which third-parties to proxy/replace and how to do it in the c ## License -[MIT](LICENSE) © [Nikita Savchenko](https://nikita.tk) +[MIT](LICENSE) © [Nikita Savchenko](https://nikita.tk/developer) ## Contributions diff --git a/scripts.js b/scripts.js new file mode 100644 index 0000000..de7e4f2 --- /dev/null +++ b/scripts.js @@ -0,0 +1,28 @@ +import { mask, unmask } from './src/modules/mask'; + +const command = process.argv[2]; +const args = process.argv.slice(3); + +const f = ({ + + "mask": (path) => { + + if (typeof path !== 'string') { + console.log('Please provide an argument to mask'); + return; + } + console.log(`Masking ${path}...`); + console.log('Masked URL:', mask(path)); + }, + + "unmask": (path) => { + if (typeof path !== 'string') { + console.log('Please provide an argument to unmask'); + return; + } + console.log('Unmasked URL:', unmask(path)); + }, + +})[command] || (() => console.log(`No script ${command}!`)); + +f(...args); \ No newline at end of file diff --git a/src/api.js b/src/api.js index eb81eee..c444595 100644 --- a/src/api.js +++ b/src/api.js @@ -48,7 +48,7 @@ export async function init () { }); app.listen(config.httpPort, function onReady () { - info(`Web server is listening on port ${ config.httpPort }`); + info(`Analytics proxy web server is listening on port ${ config.httpPort }`); isReady = true; }); diff --git a/src/modules/proxy.js b/src/modules/proxy.js index 26d5a9b..c8cff89 100644 --- a/src/modules/proxy.js +++ b/src/modules/proxy.js @@ -16,12 +16,12 @@ const maskRegex = new RegExp( "gi" ); const replaceDomainsForHost = (host) => (match, pos, str) => { - const escapedSlashes = str[pos - 2] === "\\" && str[pos - 2] === "/" + const escapedSlashes = str[pos - 2] === "\\" && str[pos - 2] === "/" // wat? const r = `${ escapedSlashes ? (config.proxyDomain || host).replace(/\//g, "\\/") + "\\" : (config.proxyDomain || host) - }/${ match }`; + }${ config.strippedPath }/${ match }`; return r; }; @@ -37,19 +37,31 @@ export function createDefaultProxy (targetDomain, proxyOptionsOverride = {}) { ? proxyOptionsOverride["proxyReqOptDecorator"](proxyRequest, originalRequest) : proxyRequest; }, - userResHeaderDecorator: (headers, { headers: { host } }) => { - if (headers.location) { + userResHeaderDecorator: (proxyHeaders, origninalHeaders) => { + const { headers: { host } } = origninalHeaders; + if (proxyHeaders.location) { if (config.proxy.specialContentReplace[servername]) { // Keep before other replacements const replacements = config.proxy.specialContentReplace[servername]; for (const r of replacements) { - headers.location = headers.location.replace(r.regex, r.replace); + proxyHeaders.location = proxyHeaders.location.replace(r.regex, r.replace); } } - headers.location = headers.location + proxyHeaders.location = proxyHeaders.location .replace(replaceDomainRegex, replaceDomainsForHost(host)) .replace(maskRegex, match => mask(match)); } - return headers; + // [ + // "user-agent", + // "accept", + // "accept-encoding", + // "accept-language", + // "cookie" + // ].forEach(prop => { + // if (origninalHeaders.headers.hasOwnProperty(prop) && !proxyHeaders[prop]) { + // proxyHeaders[prop] = origninalHeaders.headers[prop]; + // } + // }); + return proxyHeaders; }, userResDecorator: (_, proxyResData, { headers: { host } }) => { if (_.req.res && _.req.res.client && _.req.res.client.servername) { @@ -84,8 +96,9 @@ export function createDefaultProxy (targetDomain, proxyOptionsOverride = {}) { ) { const parsedUrl = url.parse(unmasked); - const clientIp = req.headers["x-forwarded-for"] - ? req.headers["x-forwarded-for"].split(/,\s?/g)[0] + const pverwrittenIp = req.headers["x-forwarded-for"] || req.headers["x-real-ip"]; + const clientIp = pverwrittenIp + ? pverwrittenIp.split(/,\s?/g)[0] : req.connection.remoteAddress.split(":").pop(); const encodedIp = encodeURIComponent(clientIp); diff --git a/test-static/index.html b/test-static/index.html index 2f702a5..9a34c63 100644 --- a/test-static/index.html +++ b/test-static/index.html @@ -7,7 +7,7 @@ @@ -19,7 +19,8 @@ ga('create', 'UA-98253329-1', 'auto'); // Replace UA-98253329-1 with your tag ga('send', 'pageview'); - + +