Skip to content

Commit

Permalink
feat: Add webpage translation functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
SleeplessOne1917 committed Sep 26, 2023
1 parent 54250f2 commit 3da0e3d
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 9 deletions.
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,30 @@ directly:
- `getDocumentStatus()` (or `isDocumentTranslationComplete()`), and
- `downloadDocument()`

#### Translating webpages

Webpages can be translated as well by calling `translateWebpage()`. It has the same signature as `translateDocument` except for the
first parameter, which is the URL for the webpage you would like translated.

```javascript
// Translate the English DeepL wikipedia page into German:
await translator.translateWebpage(
'https://en.wikipedia.org/wiki/DeepL_Translator',
'DeepLWiki.html',
'en',
'de'
);
```

Like `translateDocument()`, `translateWebpage()` wraps multiple API calls: uploading, polling status until
the translation is complete, and downloading. If your application needs to
execute these steps individually, you can instead use the following functions
directly:

- `uploadWebpage()`,
- `getDocumentStatus()` (or `isDocumentTranslationComplete()`), and
- `downloadDocument()`

#### Document translation options

- `formality`: same as in [Text translation options](#text-translation-options).
Expand Down
42 changes: 33 additions & 9 deletions src/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -106,14 +106,16 @@ export class HttpClient {
url: string,
timeoutMs: number,
responseAsStream: boolean,
isDeepL: boolean,
options: SendRequestOptions,
): AxiosRequestConfig {
const headers = Object.assign({}, this.headers, options.headers);
logDebug(`isDeepL: ${isDeepL}`);

const axiosRequestConfig: AxiosRequestConfig = {
url,
method,
baseURL: this.serverUrl,
baseURL: isDeepL ? this.serverUrl : undefined,
headers,
responseType: responseAsStream ? 'stream' : 'text',
timeout: timeoutMs,
Expand Down Expand Up @@ -147,19 +149,26 @@ export class HttpClient {
/**
* Makes API request retrying if necessary, and returns (as Promise) response.
* @param method HTTP method, for example 'GET'
* @param url Path to endpoint, excluding base server URL.
* @param url Path to endpoint, excluding base server URL if DeepL API request, including base server URL if a webpage.
* @param options Additional options controlling request.
* @param responseAsStream Set to true if the return type is IncomingMessage.
* @return Fulfills with status code and response (as text or stream).
* @return Fulfills with status code, content type, and response (as text or stream).
*/
async sendRequestWithBackoff<TContent extends string | IncomingMessage>(
method: HttpMethod,
url: string,
options?: SendRequestOptions,
responseAsStream = false,
): Promise<{ statusCode: number; content: TContent }> {
): Promise<{ statusCode: number; content: TContent; contentType?: string }> {
let isDeepLUrl: boolean;
try {
isDeepLUrl = !!new URL(url);
} catch {
isDeepLUrl = true;
}

options = options === undefined ? {} : options;
logInfo(`Request to DeepL API ${method} ${url}`);
logInfo(`${isDeepLUrl ? 'Request to DeepL API' : 'Request to webpage'} ${method} ${url}`);
logDebug(`Request details: ${options.data}`);
const backoff = new BackoffTimer();
let response, error;
Expand All @@ -170,8 +179,14 @@ export class HttpClient {
url,
timeoutMs,
responseAsStream,
isDeepLUrl,
options,
);

if (!isDeepLUrl && axiosRequestConfig.headers) {
delete axiosRequestConfig.headers.Authorization;
}

try {
response = await HttpClient.sendAxiosRequest<TContent>(axiosRequestConfig);
error = undefined;
Expand Down Expand Up @@ -199,8 +214,12 @@ export class HttpClient {
}

if (response !== undefined) {
const { statusCode, content } = response;
logInfo(`DeepL API response ${method} ${url} ${statusCode}`);
const { statusCode, content, contentType } = response;
logInfo(
`${
isDeepLUrl ? 'DeepL API response' : 'Webpage response'
} ${method} ${url} ${statusCode}${!isDeepLUrl ? ` ${contentType}` : ''}`,
);
if (!responseAsStream) {
logDebug('Response details:', { content: content });
}
Expand All @@ -217,7 +236,7 @@ export class HttpClient {
*/
private static async sendAxiosRequest<TContent extends string | IncomingMessage>(
axiosRequestConfig: AxiosRequestConfig,
): Promise<{ statusCode: number; content: TContent }> {
): Promise<{ statusCode: number; content: TContent; contentType?: string }> {
try {
const response = await axios.request(axiosRequestConfig);

Expand All @@ -227,7 +246,12 @@ export class HttpClient {
response.data = JSON.stringify(response.data);
}
}
return { statusCode: response.status, content: response.data };

return {
statusCode: response.status,
content: response.data,
contentType: response.headers['content-type'],
};
} catch (axios_error_raw) {
const axiosError = axios_error_raw as AxiosError;
const message: string = axiosError.message || '';
Expand Down
2 changes: 2 additions & 0 deletions src/errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,5 @@ export class DocumentTranslationError extends DeepLError {
export class GlossaryNotFoundError extends DeepLError {}

export class DocumentNotReadyError extends DeepLError {}

export class WebsiteDownloadError extends DeepLError {}
66 changes: 66 additions & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
GlossaryNotFoundError,
QuotaExceededError,
TooManyRequestsError,
WebsiteDownloadError,
} from './errors';
import { GlossaryEntries } from './glossaryEntries';
import {
Expand Down Expand Up @@ -645,6 +646,37 @@ export class Translator {
}
}

/**
* Uploads the HTML of the specified webpage to DeepL to translate into given target language, waits for
* translation to complete, then downloads translated webpage to specified output path.
* @param webpageUrl String or URL containing the URL of the webpage to be translated.
* @param outputFile String containing file path to create translated document, or Stream or
* FileHandle to write translated document content.
* @param sourceLang Language code of input document, or null to use auto-detection.
* @param targetLang Language code of language to translate into.
* @param options Optional DocumentTranslateOptions object containing additional options controlling translation.
* @return Fulfills with a DocumentStatus object for the completed translation. You can use the
* billedCharacters property to check how many characters were billed for the document.
* @throws {Error} If no file exists at the input file path, or a file already exists at the output file path.
* @throws {DocumentTranslationError} If any error occurs during document upload, translation or
* download. The `documentHandle` property of the error may be used to recover the document.
*/
async translateWebpage(
webpageUrl: string | URL,
outputFile: string | fs.WriteStream | fs.promises.FileHandle,
sourceLang: SourceLanguageCode | null,
targetLang: TargetLanguageCode,
options?: DocumentTranslateOptions,
): Promise<DocumentStatus> {
return this.translateDocument(
Buffer.from(await this.getContentFromWebpage(webpageUrl)),
outputFile,
sourceLang,
targetLang,
{ filename: 'webpage.html', ...options },
);
}

/**
* Uploads specified document to DeepL to translate into target language, and returns handle associated with the document.
* @param inputFile String containing file path, stream containing file data, or FileHandle.
Expand Down Expand Up @@ -709,6 +741,28 @@ export class Translator {
}
}

/**
* Uploads specified webpage HTML to DeepL to translate into target language, and returns handle associated with the document.
* @param webpageUrl String or URL containing the URL of the webpage to be translated.
* @param sourceLang Language code of input document, or null to use auto-detection.
* @param targetLang Language code of language to translate into.
* @param options Optional DocumentTranslateOptions object containing additional options controlling translation.
* @return Fulfills with DocumentHandle associated with the in-progress translation.
*/
async uploadWebpage(
webpageUrl: string | URL,
sourceLang: SourceLanguageCode | null,
targetLang: TargetLanguageCode,
options?: DocumentTranslateOptions,
): Promise<DocumentHandle> {
return this.uploadDocument(
Buffer.from(await this.getContentFromWebpage(webpageUrl)),
sourceLang,
targetLang,
{ filename: 'webpage.html', ...options },
);
}

/**
* Retrieves the status of the document translation associated with the given document handle.
* @param handle Document handle associated with document.
Expand Down Expand Up @@ -1003,6 +1057,18 @@ export class Translator {
return libraryInfoString;
}

private async getContentFromWebpage(webpageUrl: string | URL): Promise<string> {
const { statusCode, content, contentType } =
await this.httpClient.sendRequestWithBackoff<string>('GET', webpageUrl.toString());
await checkStatusCode(statusCode, content);

if (!contentType?.includes('text/html')) {
throw new WebsiteDownloadError('URL to translate must return HTML');
}

return content;
}

/**
* HttpClient implements all HTTP requests and retries.
* @private
Expand Down

0 comments on commit 3da0e3d

Please sign in to comment.