Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CDDL validation using cddlparser and pyodide #837

Merged
merged 2 commits into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,9 @@ updates:
interval: daily
time: '09:00'
open-pull-requests-limit: 10
- package-ecosystem: pip
directory: "/"
schedule:
interval: daily
time: '09:00'
open-pull-requests-limit: 10
21 changes: 21 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
"commander": "12.1.0",
"gray-matter": "^4.0.3",
"jsdom": "^25.0.1",
"pyodide": "^0.26.4",
"reffy": "^18.0.1",
"semver": "^7.3.5",
"webidl2": "^24.2.2"
Expand Down
4 changes: 4 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# The CDDL parser used by Strudy is written in Python.
# This file sets the version of the CDDL parser run by Strudy and makes it
# possible for Dependabot to detect and propose updates.
cddlparser==0.4.0
107 changes: 107 additions & 0 deletions src/lib/study-cddl.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/**
* Analyze the CDDL extracted during a crawl and create an anomalies report.
*
* The CDDL parser surrenders as soon as it encounters an error. As such, the
* anomalies report will contain at most one anomaly per CDDL module, even
* though there may be many more problems with the CDDL.
*
* Note: for specs that define multiple CDDL modules, anomalies will also be
* reported for the "all" module. That may appear as duplicating the errors,
* but will be useful to detect rules that get defined with the same name in
* two different modules (once the CDDL parser actually detects that).
*
* The analysis makes no attempt for now at assembling CDDL modules that
* are part of the same namespace. For that to become possible, the recipe to
* follow to create the namespaces out of modules will have to be specified
* somehow.
*/

import { loadPyodide } from 'pyodide';
import fs from 'node:fs/promises';
import path from 'node:path';
import { fileURLToPath } from "node:url";

const scriptPath = path.dirname(fileURLToPath(import.meta.url));

// Python WebAssembly environment (initialized once)
let pyodide = null;

async function studyCddl (specs, { crawledResults = [] } = {}) {
const report = []; // List of anomalies to report

// Setup Python WebAssembly environment
if (!pyodide) {
pyodide = await loadPyodide();

// Retrieve the version of the cddlparser package to install from
// the "requirements.txt" file at the root of the project
const file = path.join(scriptPath, '..', '..', 'requirements.txt');
const contents = await fs.readFile(file, 'utf8');
const version = contents.match(/^cddlparser==(.*)/m)[1];

// Load micropip to "install" the cddlparser package.
// An alternative would be to `loadPackage` cddlparser from a Wheel URL
// directly to avoid having to install micropip. The Wheel URL in Pypi does
// not seem predictable. But the one for GitHub releases could work, e.g.:
// https://github.com/tidoust/cddlparser/releases/download/v0.4.0/cddlparser-0.4.0-py3-none-any.whl
// This would make loading slightly faster and avoid introducing yet
// another dependency. But it would hardcode the cddlparser repository name
// whereas the project could perhaps be moved in the future.
await pyodide.loadPackage('micropip');
const micropip = pyodide.pyimport('micropip');
await micropip.install(`cddlparser==${version}`);
}

// Python will report execution results to the standard output
let stdout = '';
pyodide.setStdout({
batched: str => stdout += str + '\n'
});

for (const spec of specs) {
for (const cddlModule of (spec.cddl ?? [])) {
stdout = '';
await pyodide.runPythonAsync(`
from cddlparser import parse
try:
parse('''${cddlModule.cddl}''')
except Exception as err:
print(err)
`);
stdout = stdout.trim();

// If nothing was reported, that means parsing succeeded, CDDL is valid
if (!stdout) {
continue;
}

let message = cddlModule.name ?
`In the "${cddlModule.name}" module, ` :
'';
message += stdout;

// The parser reports the line number at which the error occurred... but
// it gets run against the expanded extract, and expansion drops the
// header that Reffy adds to the extracts to note the provenance. The line
// number is off by 5 as a result. Let's fix that. It's certainly not
// fantastic to hardcode the number of lines that compose the header, but
// let's say that's good enough for now!
const reLineNumber = / line (\d+):/;
const match = message.match(reLineNumber);
if (match) {
let lineNumber = parseInt(match[1], 10);
lineNumber += 5;
message = message.replace(reLineNumber, ` line ${lineNumber}:`);
}

report.push({
name: 'invalidCddl',
message,
spec
});
}
}
return report;
}

export default studyCddl;
14 changes: 14 additions & 0 deletions src/lib/study.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import studyCddl from './study-cddl.js';
import studyDfns from './study-dfns.js';
import studyAlgorithms from './study-algorithms.js';
import studyBackrefs from './study-backrefs.js';
Expand Down Expand Up @@ -186,6 +187,19 @@ const anomalyGroups = [
study: studyWebIdl,
studyParams: ['curated'],
cc: true
},

{
name: 'cddl',
title: 'Problems with CDDL',
description: 'The following problems were identified when analyzing CDDL definitions',
types: [
{
name: 'invalidCddl',
title: 'Invalid CDDL syntax'
}
],
study: studyCddl
}
];

Expand Down
96 changes: 96 additions & 0 deletions test/study-cddl.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/**
* Tests CDDL analysis.
*
* Note line numbers in reported anomalies are off by 5. That's normal, the
* analysis compensates for the header that Reffy adds to the CDDL extracts
* and that isn't included in the tests.
*/
/* global describe, it */

import study from '../src/lib/study-cddl.js';
import { assertNbAnomalies, assertAnomaly } from './util.js';

describe('The CDDL analyser', function () {
this.timeout(5000);

const specUrl = 'https://www.w3.org/TR/spec';
const specUrl2 = 'https://www.w3.org/TR/spec2';

function toCrawlResult (cddl, cddl2) {
const crawlResult = [];

function addSpecAndCddl(url, cddl) {
if (typeof cddl === 'string') {
crawlResult.push({ url, cddl: [{ name: '', cddl }] });
}
else if (Array.isArray(cddl)) {
crawlResult.push({ url, cddl });
}
else {
crawlResult.push({ url, cddl: [cddl] });
}
}

addSpecAndCddl(specUrl, cddl);
if (cddl2) {
addSpecAndCddl(specUrl2, cddl2);
}
return crawlResult;
}

function analyzeCddl (cddl, cddl2) {
const crawlResult = toCrawlResult(cddl, cddl2);
return study(crawlResult);
}

it('reports no anomaly if CDDL is valid', async () => {
const crawlResult = toCrawlResult('cddl = tstr');
const report = await study(crawlResult);
assertNbAnomalies(report, 0);
});

it('reports an anomaly when CDDL is invalid', async () => {
const crawlResult = toCrawlResult('cddl');
const report = await study(crawlResult);
assertNbAnomalies(report, 1);
assertAnomaly(report, 0, {
name: 'invalidCddl',
message: 'CDDL syntax error - line 6: expected assignment (`=`, `/=`, `//=`) after `cddl`, got ``',
spec: { url: specUrl }
});
});

it('reports the CDDL module name', async () => {
const crawlResult = toCrawlResult({
name: 'mo-mo-mo',
cddl: 'nan = 0.'
});
const report = await study(crawlResult);
assertNbAnomalies(report, 1);
assertAnomaly(report, 0, {
name: 'invalidCddl',
message: 'In the "mo-mo-mo" module, CDDL token error - line 6: expected number with fraction to have digits in fraction part, got `0.`',
spec: { url: specUrl }
});
});

it('validates all CDDL modules', async () => {
const crawlResult = toCrawlResult([
{ name: 'all', cddl: 'cddl = tstr\nnobinary = 0b' },
{ name: 'valid', cddl: 'cddl = tstr' },
{ name: 'invalid', cddl: 'nobinary = 0b'}
]);
const report = await study(crawlResult);
assertNbAnomalies(report, 2);
assertAnomaly(report, 0, {
name: 'invalidCddl',
message: 'In the "all" module, CDDL token error - line 7: expected binary number to have binary digits, got `0b`',
spec: { url: specUrl }
});
assertAnomaly(report, 1, {
name: 'invalidCddl',
message: 'In the "invalid" module, CDDL token error - line 6: expected binary number to have binary digits, got `0b`',
spec: { url: specUrl }
});
});
});
Loading