Skip to content

Commit

Permalink
Merge pull request #7800 from github/henrymercer/js-atm-add-model-bui…
Browse files Browse the repository at this point in the history
…lding-pack

JS: Add model building pack for ML-powered queries
  • Loading branch information
henrymercer authored Feb 1, 2022
2 parents fb00a6c + 1460131 commit e622e51
Show file tree
Hide file tree
Showing 30 changed files with 993 additions and 0 deletions.
1 change: 1 addition & 0 deletions .codeqlmanifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"*/ql/examples/qlpack.yml",
"cpp/ql/test/query-tests/Security/CWE/CWE-190/semmle/tainted/qlpack.yml",
"javascript/ql/experimental/adaptivethreatmodeling/lib/qlpack.yml",
"javascript/ql/experimental/adaptivethreatmodeling/modelbuilding/qlpack.yml",
"javascript/ql/experimental/adaptivethreatmodeling/src/qlpack.yml",
"csharp/ql/campaigns/Solorigate/lib/qlpack.yml",
"csharp/ql/campaigns/Solorigate/src/qlpack.yml",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/**
* @name Debug result inclusion
* @description Use this query to understand why some alerts are included or excluded from the
* results of boosted queries. The results for this query are the union of the alerts
* generated by each boosted query. Each alert includes an explanation why it was
* included or excluded for each of the four security queries.
* @kind problem
* @problem.severity error
* @id adaptive-threat-modeling/js/debug-result-inclusion
*/

import javascript
import experimental.adaptivethreatmodeling.ATMConfig
import extraction.ExtractEndpointData

string getAReasonSinkExcluded(DataFlow::Node sinkCandidate, Query query) {
query instanceof NosqlInjectionQuery and
result = NosqlInjectionATM::SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate)
or
query instanceof SqlInjectionQuery and
result = SqlInjectionATM::SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate)
or
query instanceof TaintedPathQuery and
result = TaintedPathATM::SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate)
or
query instanceof XssQuery and
result = XssATM::SinkEndpointFilter::getAReasonSinkExcluded(sinkCandidate)
}

pragma[inline]
string getDescriptionForAlertCandidate(
DataFlow::Node sourceCandidate, DataFlow::Node sinkCandidate, Query query
) {
result = "excluded[reason=" + getAReasonSinkExcluded(sinkCandidate, query) + "]"
or
getATMCfg(query).isKnownSink(sinkCandidate) and
result = "excluded[reason=known-sink]"
or
not exists(getAReasonSinkExcluded(sinkCandidate, query)) and
not getDataFlowCfg(query).hasFlow(sourceCandidate, sinkCandidate) and
(
if
getDataFlowCfg(query).isSource(sourceCandidate) or
getDataFlowCfg(query).isSource(sourceCandidate, _)
then result = "no flow"
else result = "not a known source"
)
or
getDataFlowCfg(query).hasFlow(sourceCandidate, sinkCandidate) and
result = "included"
}

pragma[inline]
string getDescriptionForAlert(DataFlow::Node sourceCandidate, DataFlow::Node sinkCandidate) {
result =
concat(Query query |
|
query.getName() + ": " +
getDescriptionForAlertCandidate(sourceCandidate, sinkCandidate, query), ", "
)
}

from DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink
where cfg.hasFlow(source, sink)
select sink,
"This is an ATM result that may depend on $@ [" + getDescriptionForAlert(source, sink) + "]",
source, "a user-provided value"
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
private import javascript
private import extraction.Exclusions as Exclusions

/**
* Holds if the flow from `source` to `sink` should be excluded from the results of an end-to-end
* evaluation query.
*/
pragma[inline]
predicate isFlowExcluded(DataFlow::Node source, DataFlow::Node sink) {
Exclusions::isFileExcluded([source.getFile(), sink.getFile()])
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/**
* EndpointScoresIntegrationTest.ql
*
* Extract scores for each test endpoint that is an argument to a function call in the database.
* This is used by integration tests to verify that QL and the modeling codebase agree on the scores
* of a set of test endpoints.
*/

import javascript
import experimental.adaptivethreatmodeling.ATMConfig
import experimental.adaptivethreatmodeling.FeaturizationConfig
import experimental.adaptivethreatmodeling.EndpointScoring::ModelScoring as ModelScoring

/**
* A featurization config that featurizes endpoints that are arguments to function calls.
*
* This should only be used in extraction queries and tests.
*/
class FunctionArgumentFeaturizationConfig extends FeaturizationConfig {
FunctionArgumentFeaturizationConfig() { this = "FunctionArgumentFeaturization" }

override DataFlow::Node getAnEndpointToFeaturize() {
exists(DataFlow::CallNode call | result = call.getAnArgument())
}
}

query predicate endpointScores = ModelScoring::endpointScores/3;
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/**
* ModelCheck.ql
*
* Returns checksums of ATM models.
*/

/**
* The `availableMlModels` template predicate.
*
* This is populated by the evaluator with metadata for the available machine learning models.
*/
external predicate availableMlModels(
string modelChecksum, string modelLanguage, string modelName, string modelType
);

select any(string checksum | availableMlModels(checksum, "javascript", _, _))
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/**
* NosqlInjection.ql
*
* Version of the standard NoSQL injection query with an output relation ready to plug into the
* evaluation pipeline.
*/

import semmle.javascript.security.dataflow.NosqlInjection
import EndToEndEvaluation as EndToEndEvaluation

from
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource
where
cfg instanceof NosqlInjection::Configuration and
cfg.hasFlow(source, sink) and
not EndToEndEvaluation::isFlowExcluded(source, sink) and
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
source
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
endColumnSource)
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/**
* NosqlInjectionATM.ql
*
* Version of the boosted NoSQL injection query with an output relation ready to plug into the
* evaluation pipeline.
*/

import ATM::ResultsInfo
import EndToEndEvaluation as EndToEndEvaluation
import experimental.adaptivethreatmodeling.NosqlInjectionATM

from
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
where
cfg.hasFlow(source, sink) and
not EndToEndEvaluation::isFlowExcluded(source, sink) and
not isFlowLikelyInBaseQuery(source, sink) and
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
source
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
endColumnSource) and
getScoreForFlow(source, sink) = score
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/**
* NosqlInjectionATMLite.ql
*
* Arbitrarily ranked version of the boosted NoSQL injection query with an output relation ready to
* plug into the evaluation pipeline. This is useful (a) for evaluating the performance of endpoint
* filters, and (b) as a baseline to compare the model against.
*/

import ATM::ResultsInfo
import EndToEndEvaluation as EndToEndEvaluation
import experimental.adaptivethreatmodeling.NosqlInjectionATM

from
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
where
cfg.hasFlow(source, sink) and
not EndToEndEvaluation::isFlowExcluded(source, sink) and
not isFlowLikelyInBaseQuery(source, sink) and
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
source
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
endColumnSource) and
score = 0
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/**
* SqlInjection.ql
*
* Version of the standard SQL injection query with an output relation ready to plug into the
* evaluation pipeline.
*/

import semmle.javascript.security.dataflow.SqlInjection
import EndToEndEvaluation as EndToEndEvaluation

from
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource
where
cfg instanceof SqlInjection::Configuration and
cfg.hasFlow(source, sink) and
not EndToEndEvaluation::isFlowExcluded(source, sink) and
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
source
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
endColumnSource)
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/**
* SqlInjectionATM.ql
*
* Version of the boosted SQL injection query with an output relation ready to plug into the
* evaluation pipeline.
*/

import ATM::ResultsInfo
import EndToEndEvaluation as EndToEndEvaluation
import experimental.adaptivethreatmodeling.SqlInjectionATM

from
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
where
cfg.hasFlow(source, sink) and
not EndToEndEvaluation::isFlowExcluded(source, sink) and
not isFlowLikelyInBaseQuery(source, sink) and
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
source
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
endColumnSource) and
getScoreForFlow(source, sink) = score
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/**
* SqlInjectionATMLite.ql
*
* Arbitrarily ranked version of the boosted SQL injection query with an output relation ready to
* plug into the evaluation pipeline. This is useful (a) for evaluating the performance of endpoint
* filters, and (b) as a baseline to compare the model against.
*/

import ATM::ResultsInfo
import EndToEndEvaluation as EndToEndEvaluation
import experimental.adaptivethreatmodeling.SqlInjectionATM

from
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
where
cfg.hasFlow(source, sink) and
not EndToEndEvaluation::isFlowExcluded(source, sink) and
not isFlowLikelyInBaseQuery(source, sink) and
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
source
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
endColumnSource) and
score = 0
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/**
* TaintedPath.ql
*
* Version of the standard path injection query with an output relation ready to plug into the
* evaluation pipeline.
*/

import semmle.javascript.security.dataflow.TaintedPath
import EndToEndEvaluation as EndToEndEvaluation

from
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource
where
cfg instanceof TaintedPath::Configuration and
cfg.hasFlow(source, sink) and
not EndToEndEvaluation::isFlowExcluded(source, sink) and
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
source
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
endColumnSource)
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/**
* TaintedPathATM.ql
*
* Version of the boosted path injection query with an output relation ready to plug into the
* evaluation pipeline.
*/

import ATM::ResultsInfo
import EndToEndEvaluation as EndToEndEvaluation
import experimental.adaptivethreatmodeling.TaintedPathATM

from
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
where
cfg.hasFlow(source, sink) and
not EndToEndEvaluation::isFlowExcluded(source, sink) and
not isFlowLikelyInBaseQuery(source, sink) and
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
source
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
endColumnSource) and
getScoreForFlow(source, sink) = score
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/**
* TaintedPathATMLite.ql
*
* Arbitrarily ranked version of the boosted path injection query with an output relation ready to
* plug into the evaluation pipeline. This is useful (a) for evaluating the performance of endpoint
* filters, and (b) as a baseline to compare the model against.
*/

import ATM::ResultsInfo
import EndToEndEvaluation as EndToEndEvaluation
import experimental.adaptivethreatmodeling.TaintedPathATM

from
DataFlow::Configuration cfg, DataFlow::Node source, DataFlow::Node sink, string filePathSink,
int startLineSink, int endLineSink, int startColumnSink, int endColumnSink, string filePathSource,
int startLineSource, int endLineSource, int startColumnSource, int endColumnSource, float score
where
cfg.hasFlow(source, sink) and
not EndToEndEvaluation::isFlowExcluded(source, sink) and
not isFlowLikelyInBaseQuery(source, sink) and
sink.hasLocationInfo(filePathSink, startLineSink, startColumnSink, endLineSink, endColumnSink) and
source
.hasLocationInfo(filePathSource, startLineSource, startColumnSource, endLineSource,
endColumnSource) and
score = 0
select source, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
sink, startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink, score order by
score desc, startLineSource, startColumnSource, endLineSource, endColumnSource, filePathSource,
startLineSink, startColumnSink, endLineSink, endColumnSink, filePathSink
Loading

0 comments on commit e622e51

Please sign in to comment.