-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add CDDL validation using cddlparser and pyodide (#837)
There is no immediate parser in Node.js that could be used to validate CDDL extracts. Instead, the idea is to run the cddlparser Python package using a Python interpreter running in WebAssembly. This provides a CDDL validation guarantee for Webref. Some notes: - The version of cddlparser used by Strudy is specified in the `requirements.txt` file. Goal is for Dependabot to create a pull request automatically when a new version becomes available. - Loading could be optimized not to depend on `micropip` at all, but that requires hardcoding the Wheel URL of the cddlparser package somehow. - The line position in the anomalies report needs to be adjusted to account for the header that Reffy adds and that gets stripped during "expansion". That's hardcoded for now. I'd say that's good enough... - The CDDL parser stops as soon as it encounters an error, the list of anomalies only contains the first problem as a result. - The CDDL parser does not validate additional CDDL semantics for now.
- Loading branch information
Showing
7 changed files
with
249 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
# The CDDL parser used by Strudy is written in Python. | ||
# This file sets the version of the CDDL parser run by Strudy and makes it | ||
# possible for Dependabot to detect and propose updates. | ||
cddlparser==0.4.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
/** | ||
* Analyze the CDDL extracted during a crawl and create an anomalies report. | ||
* | ||
* The CDDL parser surrenders as soon as it encounters an error. As such, the | ||
* anomalies report will contain at most one anomaly per CDDL module, even | ||
* though there may be many more problems with the CDDL. | ||
* | ||
* Note: for specs that define multiple CDDL modules, anomalies will also be | ||
* reported for the "all" module. That may appear as duplicating the errors, | ||
* but will be useful to detect rules that get defined with the same name in | ||
* two different modules (once the CDDL parser actually detects that). | ||
* | ||
* The analysis makes no attempt for now at assembling CDDL modules that | ||
* are part of the same namespace. For that to become possible, the recipe to | ||
* follow to create the namespaces out of modules will have to be specified | ||
* somehow. | ||
*/ | ||
|
||
import { loadPyodide } from 'pyodide'; | ||
import fs from 'node:fs/promises'; | ||
import path from 'node:path'; | ||
import { fileURLToPath } from "node:url"; | ||
|
||
const scriptPath = path.dirname(fileURLToPath(import.meta.url)); | ||
|
||
// Python WebAssembly environment (initialized once) | ||
let pyodide = null; | ||
|
||
async function studyCddl (specs, { crawledResults = [] } = {}) { | ||
const report = []; // List of anomalies to report | ||
|
||
// Setup Python WebAssembly environment | ||
if (!pyodide) { | ||
pyodide = await loadPyodide(); | ||
|
||
// Retrieve the version of the cddlparser package to install from | ||
// the "requirements.txt" file at the root of the project | ||
const file = path.join(scriptPath, '..', '..', 'requirements.txt'); | ||
const contents = await fs.readFile(file, 'utf8'); | ||
const version = contents.match(/^cddlparser==(.*)/m)[1]; | ||
|
||
// Load micropip to "install" the cddlparser package. | ||
// An alternative would be to `loadPackage` cddlparser from a Wheel URL | ||
// directly to avoid having to install micropip. The Wheel URL in Pypi does | ||
// not seem predictable. But the one for GitHub releases could work, e.g.: | ||
// https://github.com/tidoust/cddlparser/releases/download/v0.4.0/cddlparser-0.4.0-py3-none-any.whl | ||
// This would make loading slightly faster and avoid introducing yet | ||
// another dependency. But it would hardcode the cddlparser repository name | ||
// whereas the project could perhaps be moved in the future. | ||
await pyodide.loadPackage('micropip'); | ||
const micropip = pyodide.pyimport('micropip'); | ||
await micropip.install(`cddlparser==${version}`); | ||
} | ||
|
||
// Python will report execution results to the standard output | ||
let stdout = ''; | ||
pyodide.setStdout({ | ||
batched: str => stdout += str + '\n' | ||
}); | ||
|
||
for (const spec of specs) { | ||
for (const cddlModule of (spec.cddl ?? [])) { | ||
stdout = ''; | ||
await pyodide.runPythonAsync(` | ||
from cddlparser import parse | ||
try: | ||
parse('''${cddlModule.cddl}''') | ||
except Exception as err: | ||
print(err) | ||
`); | ||
stdout = stdout.trim(); | ||
|
||
// If nothing was reported, that means parsing succeeded, CDDL is valid | ||
if (!stdout) { | ||
continue; | ||
} | ||
|
||
let message = cddlModule.name ? | ||
`In the "${cddlModule.name}" module, ` : | ||
''; | ||
message += stdout; | ||
|
||
// The parser reports the line number at which the error occurred... but | ||
// it gets run against the expanded extract, and expansion drops the | ||
// header that Reffy adds to the extracts to note the provenance. The line | ||
// number is off by 5 as a result. Let's fix that. It's certainly not | ||
// fantastic to hardcode the number of lines that compose the header, but | ||
// let's say that's good enough for now! | ||
const reLineNumber = / line (\d+):/; | ||
const match = message.match(reLineNumber); | ||
if (match) { | ||
let lineNumber = parseInt(match[1], 10); | ||
lineNumber += 5; | ||
message = message.replace(reLineNumber, ` line ${lineNumber}:`); | ||
} | ||
|
||
report.push({ | ||
name: 'invalidCddl', | ||
message, | ||
spec | ||
}); | ||
} | ||
} | ||
return report; | ||
} | ||
|
||
export default studyCddl; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
/** | ||
* Tests CDDL analysis. | ||
* | ||
* Note line numbers in reported anomalies are off by 5. That's normal, the | ||
* analysis compensates for the header that Reffy adds to the CDDL extracts | ||
* and that isn't included in the tests. | ||
*/ | ||
/* global describe, it */ | ||
|
||
import study from '../src/lib/study-cddl.js'; | ||
import { assertNbAnomalies, assertAnomaly } from './util.js'; | ||
|
||
describe('The CDDL analyser', function () { | ||
this.timeout(5000); | ||
|
||
const specUrl = 'https://www.w3.org/TR/spec'; | ||
const specUrl2 = 'https://www.w3.org/TR/spec2'; | ||
|
||
function toCrawlResult (cddl, cddl2) { | ||
const crawlResult = []; | ||
|
||
function addSpecAndCddl(url, cddl) { | ||
if (typeof cddl === 'string') { | ||
crawlResult.push({ url, cddl: [{ name: '', cddl }] }); | ||
} | ||
else if (Array.isArray(cddl)) { | ||
crawlResult.push({ url, cddl }); | ||
} | ||
else { | ||
crawlResult.push({ url, cddl: [cddl] }); | ||
} | ||
} | ||
|
||
addSpecAndCddl(specUrl, cddl); | ||
if (cddl2) { | ||
addSpecAndCddl(specUrl2, cddl2); | ||
} | ||
return crawlResult; | ||
} | ||
|
||
function analyzeCddl (cddl, cddl2) { | ||
const crawlResult = toCrawlResult(cddl, cddl2); | ||
return study(crawlResult); | ||
} | ||
|
||
it('reports no anomaly if CDDL is valid', async () => { | ||
const crawlResult = toCrawlResult('cddl = tstr'); | ||
const report = await study(crawlResult); | ||
assertNbAnomalies(report, 0); | ||
}); | ||
|
||
it('reports an anomaly when CDDL is invalid', async () => { | ||
const crawlResult = toCrawlResult('cddl'); | ||
const report = await study(crawlResult); | ||
assertNbAnomalies(report, 1); | ||
assertAnomaly(report, 0, { | ||
name: 'invalidCddl', | ||
message: 'CDDL syntax error - line 6: expected assignment (`=`, `/=`, `//=`) after `cddl`, got ``', | ||
spec: { url: specUrl } | ||
}); | ||
}); | ||
|
||
it('reports the CDDL module name', async () => { | ||
const crawlResult = toCrawlResult({ | ||
name: 'mo-mo-mo', | ||
cddl: 'nan = 0.' | ||
}); | ||
const report = await study(crawlResult); | ||
assertNbAnomalies(report, 1); | ||
assertAnomaly(report, 0, { | ||
name: 'invalidCddl', | ||
message: 'In the "mo-mo-mo" module, CDDL token error - line 6: expected number with fraction to have digits in fraction part, got `0.`', | ||
spec: { url: specUrl } | ||
}); | ||
}); | ||
|
||
it('validates all CDDL modules', async () => { | ||
const crawlResult = toCrawlResult([ | ||
{ name: 'all', cddl: 'cddl = tstr\nnobinary = 0b' }, | ||
{ name: 'valid', cddl: 'cddl = tstr' }, | ||
{ name: 'invalid', cddl: 'nobinary = 0b'} | ||
]); | ||
const report = await study(crawlResult); | ||
assertNbAnomalies(report, 2); | ||
assertAnomaly(report, 0, { | ||
name: 'invalidCddl', | ||
message: 'In the "all" module, CDDL token error - line 7: expected binary number to have binary digits, got `0b`', | ||
spec: { url: specUrl } | ||
}); | ||
assertAnomaly(report, 1, { | ||
name: 'invalidCddl', | ||
message: 'In the "invalid" module, CDDL token error - line 6: expected binary number to have binary digits, got `0b`', | ||
spec: { url: specUrl } | ||
}); | ||
}); | ||
}); |