Merge pull request #140 from epi052/136-add-regex-filter

add regex filter
epi052 · Nov 26, 2020 · 2128b9e · 2128b9e
2 parents 4c39944 + 605661e
commit 2128b9e
Show file tree

Hide file tree

Showing 12 changed files with 309 additions and 41 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "feroxbuster"
-version = "1.7.0"
+version = "1.8.0"
 authors = ["Ben 'epi' Risher <[email protected]>"]
 license = "MIT"
 edition = "2018"

diff --git a/README.md b/README.md
@@ -84,6 +84,7 @@ This attack is also known as Predictable Resource Location, File Enumeration, Di
     - [Pass auth token via query parameter](#pass-auth-token-via-query-parameter)
     - [Limit Total Number of Concurrent Scans (new in `v1.2.0`)](#limit-total-number-of-concurrent-scans-new-in-v120)
     - [Filter Response by Status Code  (new in `v1.3.0`)](#filter-response-by-status-code--new-in-v130)
+    - [Filter Response Using a Regular Expression (new in `v1.8.0`)](#filter-response-using-a-regular-expression-new-in-v180)
     - [Replay Responses to a Proxy based on Status Code (new in `v1.5.0`)](#replay-responses-to-a-proxy-based-on-status-code-new-in-v150)
 - [Comparison w/ Similar Tools](#-comparison-w-similar-tools)
 - [Common Problems/Issues (FAQ)](#-common-problemsissues-faq)
@@ -343,6 +344,7 @@ A pre-made configuration file with examples of all available settings can be fou
 # extract_links = true
 # depth = 1
 # filter_size = [5174]
+# filter_regex = ["^ignore me$"]
 # filter_word_count = [993]
 # filter_line_count = [35, 36]
 # queries = [["name","value"], ["rick", "astley"]]
@@ -389,6 +391,8 @@ OPTIONS:
     -d, --depth <RECURSION_DEPTH>           Maximum recursion depth, a depth of 0 is infinite recursion (default: 4)
     -x, --extensions <FILE_EXTENSION>...    File extension(s) to search for (ex: -x php -x pdf js)
     -N, --filter-lines <LINES>...           Filter out messages of a particular line count (ex: -N 20 -N 31,30)
+    -X, --filter-regex <REGEX>...           Filter out messages via regular expression matching on the response's body
+                                            (ex: -X '^ignore me$')
     -S, --filter-size <SIZE>...             Filter out messages of a particular size (ex: -S 5120 -S 4927,1970)
     -C, --filter-status <STATUS_CODE>...    Filter out status codes (deny list) (ex: -C 200 -C 401)
     -W, --filter-words <WORDS>...           Filter out messages of a particular word count (ex: -W 312 -W 91,82)
@@ -519,6 +523,19 @@ each one is checked against a list of known filters and either displayed or not
 ./feroxbuster -u http://127.1 --filter-status 301
 ```
 
+### Filter Response Using a Regular Expression (new in `v1.8.0`) 
+
+Version 1.3.0 included an overhaul to the filtering system which will allow for a wide array of filters to be added 
+with minimal effort. The latest addition is a Regular Expression Filter. As responses come back from the scanned server,
+the **body** of the response is checked against the filter's regular expression.  If the expression is found in the 
+body, then that response is filtered out.  
+
+**NOTE: Using regular expressions to filter large responses or many regular expressions may negatively impact performance.**  
+
+```
+./feroxbuster -u http://127.1 --filter-regex '[aA]ccess [dD]enied.?' --output results.txt --json
+```
+
 ### Replay Responses to a Proxy based on Status Code (new in `v1.5.0`)
 
 The `--replay-proxy` and `--replay-codes` options were added as a way to only send a select few responses to a proxy.  This is in stark contrast to `--proxy` which proxies EVERY request.  

diff --git a/ferox-config.toml.example b/ferox-config.toml.example
@@ -32,6 +32,7 @@
 # extract_links = true
 # depth = 1
 # filter_size = [5174]
+# filter_regex = ["^ignore me$"]
 # filter_word_count = [993]
 # filter_line_count = [35, 36]
 # queries = [["name","value"], ["rick", "astley"]]

diff --git a/src/banner.rs b/src/banner.rs
@@ -315,6 +315,15 @@ by Ben "epi" Risher {}                  ver: {}"#,
         .unwrap_or_default(); // 💢
     }
 
+    for filter in &config.filter_regex {
+        writeln!(
+            &mut writer,
+            "{}",
+            format_banner_entry!("\u{1f4a2}", "Regex Filter", filter)
+        )
+        .unwrap_or_default(); // 💢
+    }
+
     if config.extract_links {
         writeln!(
             &mut writer,

diff --git a/src/config.rs b/src/config.rs
@@ -184,6 +184,10 @@ pub struct Configuration {
     #[serde(default)]
     pub filter_word_count: Vec<usize>,
 
+    /// Filter out messages by regular expression
+    #[serde(default)]
+    pub filter_regex: Vec<String>,
+
     /// Don't auto-filter wildcard responses
     #[serde(default)]
     pub dont_filter: bool,
@@ -270,6 +274,7 @@ impl Default for Configuration {
             queries: Vec::new(),
             extensions: Vec::new(),
             filter_size: Vec::new(),
+            filter_regex: Vec::new(),
             filter_line_count: Vec::new(),
             filter_word_count: Vec::new(),
             filter_status: Vec::new(),
@@ -303,6 +308,7 @@ impl Configuration {
     /// - **insecure**: `false` (don't be insecure, i.e. don't allow invalid certs)
     /// - **extensions**: `None`
     /// - **filter_size**: `None`
+    /// - **filter_regex**: `None`
     /// - **filter_word_count**: `None`
     /// - **filter_line_count**: `None`
     /// - **headers**: `None`
@@ -449,6 +455,10 @@ impl Configuration {
             config.extensions = arg.map(|val| val.to_string()).collect();
         }
 
+        if let Some(arg) = args.values_of("filter_regex") {
+            config.filter_regex = arg.map(|val| val.to_string()).collect();
+        }
+
         if let Some(arg) = args.values_of("filter_size") {
             config.filter_size = arg
                 .map(|size| {
@@ -647,6 +657,7 @@ impl Configuration {
         settings.stdin = settings_to_merge.stdin;
         settings.depth = settings_to_merge.depth;
         settings.filter_size = settings_to_merge.filter_size;
+        settings.filter_regex = settings_to_merge.filter_regex;
         settings.filter_word_count = settings_to_merge.filter_word_count;
         settings.filter_line_count = settings_to_merge.filter_line_count;
         settings.filter_status = settings_to_merge.filter_status;
@@ -756,6 +767,7 @@ mod tests {
             json = true
             depth = 1
             filter_size = [4120]
+            filter_regex = ["^ignore me$"]
             filter_word_count = [994, 992]
             filter_line_count = [34]
             filter_status = [201]
@@ -796,6 +808,7 @@ mod tests {
         assert_eq!(config.queries, Vec::new());
         assert_eq!(config.extensions, Vec::<String>::new());
         assert_eq!(config.filter_size, Vec::<u64>::new());
+        assert_eq!(config.filter_regex, Vec::<String>::new());
         assert_eq!(config.filter_word_count, Vec::<usize>::new());
         assert_eq!(config.filter_line_count, Vec::<usize>::new());
         assert_eq!(config.filter_status, Vec::<u16>::new());
@@ -956,6 +969,13 @@ mod tests {
         assert_eq!(config.extensions, vec!["html", "php", "js"]);
     }
 
+    #[test]
+    /// parse the test config and see that the value parsed is correct
+    fn config_reads_filter_regex() {
+        let config = setup_config_test();
+        assert_eq!(config.filter_regex, vec!["^ignore me$"]);
+    }
+
     #[test]
     /// parse the test config and see that the value parsed is correct
     fn config_reads_filter_size() {

diff --git a/src/filters.rs b/src/filters.rs
@@ -1,6 +1,7 @@
 use crate::config::CONFIGURATION;
 use crate::utils::get_url_path_length;
 use crate::FeroxResponse;
+use regex::Regex;
 use std::any::Any;
 use std::fmt::Debug;
 
@@ -237,9 +238,54 @@ impl FeroxFilter for SizeFilter {
     }
 }
 
+/// Simple implementor of FeroxFilter; used to filter out responses based on a given regular
+/// expression; specified using -X|--filter-regex
+#[derive(Debug)]
+pub struct RegexFilter {
+    /// Regular expression to be applied to the response body for filtering, compiled
+    pub compiled: Regex,
+
+    /// Regular expression as passed in on the command line, not compiled
+    pub raw_string: String,
+}
+
+/// implementation of FeroxFilter for RegexFilter
+impl FeroxFilter for RegexFilter {
+    /// Check `expression` against the response body, if the expression matches, the response
+    /// should be filtered out
+    fn should_filter_response(&self, response: &FeroxResponse) -> bool {
+        log::trace!("enter: should_filter_response({:?} {})", self, response);
+
+        let result = self.compiled.is_match(response.text());
+
+        log::trace!("exit: should_filter_response -> {}", result);
+
+        result
+    }
+
+    /// Compare one SizeFilter to another
+    fn box_eq(&self, other: &dyn Any) -> bool {
+        other.downcast_ref::<Self>().map_or(false, |a| self == a)
+    }
+
+    /// Return self as Any for dynamic dispatch purposes
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+}
+
+/// PartialEq implementation for RegexFilter
+impl PartialEq for RegexFilter {
+    /// Simple comparison of the raw string passed in via the command line
+    fn eq(&self, other: &RegexFilter) -> bool {
+        self.raw_string == other.raw_string
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
+    use reqwest::Url;
 
     #[test]
     /// just a simple test to increase code coverage by hitting as_any and the inner value
@@ -288,4 +334,83 @@ mod tests {
             filter
         );
     }
+
+    #[test]
+    /// just a simple test to increase code coverage by hitting as_any and the inner value
+    fn regex_filter_as_any() {
+        let raw = r".*\.txt$";
+        let compiled = Regex::new(raw).unwrap();
+        let filter = RegexFilter {
+            compiled,
+            raw_string: raw.to_string(),
+        };
+
+        assert_eq!(filter.raw_string, r".*\.txt$");
+        assert_eq!(
+            *filter.as_any().downcast_ref::<RegexFilter>().unwrap(),
+            filter
+        );
+    }
+
+    #[test]
+    /// test should_filter on WilcardFilter where static logic matches
+    fn wildcard_should_filter_when_static_wildcard_found() {
+        let resp = FeroxResponse {
+            text: String::new(),
+            wildcard: true,
+            url: Url::parse("http://localhost").unwrap(),
+            content_length: 100,
+            headers: reqwest::header::HeaderMap::new(),
+            status: reqwest::StatusCode::OK,
+        };
+
+        let filter = WildcardFilter {
+            size: 100,
+            dynamic: 0,
+        };
+
+        assert!(filter.should_filter_response(&resp));
+    }
+
+    #[test]
+    /// test should_filter on WilcardFilter where dynamic logic matches
+    fn wildcard_should_filter_when_dynamic_wildcard_found() {
+        let resp = FeroxResponse {
+            text: String::new(),
+            wildcard: true,
+            url: Url::parse("http://localhost/stuff").unwrap(),
+            content_length: 100,
+            headers: reqwest::header::HeaderMap::new(),
+            status: reqwest::StatusCode::OK,
+        };
+
+        let filter = WildcardFilter {
+            size: 0,
+            dynamic: 95,
+        };
+
+        assert!(filter.should_filter_response(&resp));
+    }
+
+    #[test]
+    /// test should_filter on RegexFilter where regex matches body
+    fn regexfilter_should_filter_when_regex_matches_on_response_body() {
+        let resp = FeroxResponse {
+            text: String::from("im a body response hurr durr!"),
+            wildcard: false,
+            url: Url::parse("http://localhost/stuff").unwrap(),
+            content_length: 100,
+            headers: reqwest::header::HeaderMap::new(),
+            status: reqwest::StatusCode::OK,
+        };
+
+        let raw = r"response...rr";
+
+        let filter = RegexFilter {
+            raw_string: raw.to_string(),
+            compiled: Regex::new(raw).unwrap(),
+        };
+
+        assert!(filter.should_filter_response(&resp));
+    }
 }
diff --git a/src/main.rs b/src/main.rs
@@ -113,15 +113,7 @@ async fn scan(
         return Err(Box::new(err));
     }
 
-    scanner::initialize(
-        words.len(),
-        CONFIGURATION.scan_limit,
-        &CONFIGURATION.extensions,
-        &CONFIGURATION.filter_status,
-        &CONFIGURATION.filter_line_count,
-        &CONFIGURATION.filter_word_count,
-        &CONFIGURATION.filter_size,
-    );
+    scanner::initialize(words.len(), &CONFIGURATION);
 
     let mut tasks = vec![];
 

diff --git a/src/parser.rs b/src/parser.rs
@@ -231,6 +231,18 @@ pub fn initialize() -> App<'static, 'static> {
                     "Filter out messages of a particular size (ex: -S 5120 -S 4927,1970)",
                 ),
         )
+        .arg(
+            Arg::with_name("filter_regex")
+                .short("X")
+                .long("filter-regex")
+                .value_name("REGEX")
+                .takes_value(true)
+                .multiple(true)
+                .use_delimiter(true)
+                .help(
+                    "Filter out messages via regular expression matching on the response's body (ex: -X '^ignore me$')",
+                ),
+        )
         .arg(
             Arg::with_name("filter_words")
                 .short("W")