From 6aa9cfb20b2c8773fb51c6ffb9dcb517be01c4d0 Mon Sep 17 00:00:00 2001 From: Starbeamrainbowlabs Date: Mon, 16 Aug 2021 23:17:46 +0100 Subject: [PATCH] Add optional FILTER_FEED environment variable --- README.md | 13 +++++++++++++ package-lock.json | 11 +++++++++++ package.json | 1 + src/index.11tydata.js | 10 ++++++++++ src/lib/check_text.js | 24 ++++++++++++++++++++++++ 5 files changed, 59 insertions(+) create mode 100644 src/lib/check_text.js diff --git a/README.md b/README.md index 421535b..4d94fae 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,19 @@ If you're debugging the feed compatibility code, then setting the `DEBUG_FEEDITE If you're working on the CSS, note that Eleventy will *not* auto-rebuild & reload the browser. For that, you need to re-save a file it *does* watch, such as `index.html`, `.eleventy.js`, etc (even if you haven't made any changes). +### Environment Variables +The following environment variables can be used to influence the behaviour of the aggregator. + +Environment Variable | Purpose +------------------------|------------------------------ +`FILTER_FEED` | Set to any value to enable the filtering of posts based on whether they contain any bad words. Uses [futility](https://www.npmjs.com/package/futility), but with a custom algorithm on top to avoid the "[Scunthorpe Problem](https://en.wikipedia.org/wiki/Scunthorpe_problem)". + +Example of setting an environment variable when running a build: + +```bash +FILTER_FEED=true npm run build +``` + ### Design Decisions A number of decisions were made in the design process of this website. These are documented with the reasoning behind them here. diff --git a/package-lock.json b/package-lock.json index fc04bbf..b737bb2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,6 +12,7 @@ "dateformat": "^4.5.1", "feedme": "^2.0.2", "file-type": "^16.5.3", + "futility": "^1.1.2", "html-entities": "^2.3.2", "p-reflect": "^2.1.0", "phin": "^3.6.0", @@ -1217,6 +1218,11 @@ "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz", "integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==" }, + "node_modules/futility": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/futility/-/futility-1.1.2.tgz", + "integrity": "sha1-w55ZYFAH8jnL7Vpu/RZRuFiHy/s=" + }, "node_modules/get-caller-file": { "version": "2.0.5", "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", @@ -4598,6 +4604,11 @@ "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz", "integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==" }, + "futility": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/futility/-/futility-1.1.2.tgz", + "integrity": "sha1-w55ZYFAH8jnL7Vpu/RZRuFiHy/s=" + }, "get-caller-file": { "version": "2.0.5", "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", diff --git a/package.json b/package.json index c6c8aa1..3c542d4 100644 --- a/package.json +++ b/package.json @@ -25,6 +25,7 @@ "dateformat": "^4.5.1", "feedme": "^2.0.2", "file-type": "^16.5.3", + "futility": "^1.1.2", "html-entities": "^2.3.2", "p-reflect": "^2.1.0", "phin": "^3.6.0", diff --git a/src/index.11tydata.js b/src/index.11tydata.js index 1107f10..0f455da 100644 --- a/src/index.11tydata.js +++ b/src/index.11tydata.js @@ -6,6 +6,7 @@ const dateformat = require("dateformat"); const striptags = require("striptags"); const fetch_feed = require("./lib/fetch_feed.js"); +const check_text = require("./lib/check_text.js"); // The length of auto-generated descriptions if one isn't provided. const DESCRIPTION_LENGTH = 200; @@ -72,6 +73,15 @@ module.exports = async function() { }))); global.feed_items.sort((a, b) => b.pubdate_obj - a.pubdate_obj); // console.log(feed_items.map(el => el.title)); + + if(process.env.FILTER_FEED) { + const feed_items_count = global.feed_items.length; + global.feed_items = global.feed_items.filter((item) => !check_text(item.title) + && !check_text(item.description) + && !check_text(item.content)); + + console.log(`>>> Feed filtering enabled, removed ${feed_items_count - global.feed_items.length} posts`); + } } return { diff --git a/src/lib/check_text.js b/src/lib/check_text.js new file mode 100644 index 0000000..b7201e9 --- /dev/null +++ b/src/lib/check_text.js @@ -0,0 +1,24 @@ +const Futility = require("futility").default; +const striptags = require("striptags"); + +const checker = new Futility(); + +/** + * Checks the words in the given string for bad (swear) words. + * Checks on a word level - therefore avoiding the "scunthorpe problem". + * @param {string} str The string to check. + * @return {boolean} Truee if the given string contains any bad words, or false if it does not. + */ +function check_text(str) { + const words = striptags(str).split(/\s+/); + + for(let word of words) { + if(word.trim().search(/^\*+$/) > -1) continue; + let replaced = checker.censor(word, "*"); + if(replaced.trim().search(/^\*+$/) > -1) + return true; + } + return false; +} + +module.exports = check_text;