|
@@ -9,6 +9,10 @@ import logger from '../../services/logger'
|
|
|
import _ from 'lodash'
|
|
|
import { getRemoteDataObjects } from './remoteStorageData'
|
|
|
import { TaskSink } from './workingProcess'
|
|
|
+import { hashFile } from '../helpers/hashing'
|
|
|
+import { parseBagId } from '../helpers/bagTypes'
|
|
|
+import { hexToString } from '@polkadot/util'
|
|
|
+import { ApiPromise } from '@polkadot/api'
|
|
|
const fsPromises = fs.promises
|
|
|
|
|
|
/**
|
|
@@ -53,12 +57,20 @@ export class DeleteLocalFileTask implements SyncTask {
|
|
|
*/
|
|
|
export class DownloadFileTask implements SyncTask {
|
|
|
id: string
|
|
|
+ expectedHash?: string
|
|
|
uploadsDirectory: string
|
|
|
tempDirectory: string
|
|
|
url: string
|
|
|
|
|
|
- constructor(baseUrl: string, id: string, uploadsDirectory: string, tempDirectory: string) {
|
|
|
+ constructor(
|
|
|
+ baseUrl: string,
|
|
|
+ id: string,
|
|
|
+ expectedHash: string | undefined,
|
|
|
+ uploadsDirectory: string,
|
|
|
+ tempDirectory: string
|
|
|
+ ) {
|
|
|
this.id = id
|
|
|
+ this.expectedHash = expectedHash
|
|
|
this.uploadsDirectory = uploadsDirectory
|
|
|
this.tempDirectory = tempDirectory
|
|
|
this.url = urljoin(baseUrl, 'api/v1/files', id)
|
|
@@ -71,27 +83,45 @@ export class DownloadFileTask implements SyncTask {
|
|
|
async execute(): Promise<void> {
|
|
|
const streamPipeline = promisify(pipeline)
|
|
|
const filepath = path.join(this.uploadsDirectory, this.id)
|
|
|
-
|
|
|
+ // We create tempfile first to mitigate partial downloads on app (or remote node) crash.
|
|
|
+ // This partial downloads will be cleaned up during the next sync iteration.
|
|
|
+ const tempFilePath = path.join(this.uploadsDirectory, this.tempDirectory, uuidv4())
|
|
|
try {
|
|
|
const timeoutMs = 30 * 60 * 1000 // 30 min for large files (~ 10 GB)
|
|
|
// Casting because of:
|
|
|
// https://stackoverflow.com/questions/38478034/pipe-superagent-response-to-express-response
|
|
|
const request = superagent.get(this.url).timeout(timeoutMs) as unknown as NodeJS.ReadableStream
|
|
|
-
|
|
|
- // We create tempfile first to mitigate partial downloads on app (or remote node) crash.
|
|
|
- // This partial downloads will be cleaned up during the next sync iteration.
|
|
|
- const tempFilePath = path.join(this.tempDirectory, uuidv4())
|
|
|
const fileStream = fs.createWriteStream(tempFilePath)
|
|
|
- await streamPipeline(request, fileStream)
|
|
|
|
|
|
+ request.on('response', (res) => {
|
|
|
+ if (!res.ok) {
|
|
|
+ logger.error(`Sync - unexpected status code(${res.statusCode}) for ${res?.request?.url}`)
|
|
|
+ }
|
|
|
+ })
|
|
|
+ await streamPipeline(request, fileStream)
|
|
|
+ await this.verifyDownloadedFile(tempFilePath)
|
|
|
await fsPromises.rename(tempFilePath, filepath)
|
|
|
} catch (err) {
|
|
|
logger.error(`Sync - fetching data error for ${this.url}: ${err}`, { err })
|
|
|
try {
|
|
|
- logger.warn(`Cleaning up file ${filepath}`)
|
|
|
- await fsPromises.unlink(filepath)
|
|
|
+ logger.warn(`Cleaning up file ${tempFilePath}`)
|
|
|
+ await fsPromises.unlink(tempFilePath)
|
|
|
} catch (err) {
|
|
|
- logger.error(`Sync - cannot cleanup file ${filepath}: ${err}`, { err })
|
|
|
+ logger.error(`Sync - cannot cleanup file ${tempFilePath}: ${err}`, { err })
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /** Compares expected and real IPFS hashes
|
|
|
+ *
|
|
|
+ * @param filePath downloaded file path
|
|
|
+ */
|
|
|
+ async verifyDownloadedFile(filePath: string): Promise<void> {
|
|
|
+ if (!_.isEmpty(this.expectedHash)) {
|
|
|
+ const hash = await hashFile(filePath)
|
|
|
+
|
|
|
+ if (hash !== this.expectedHash) {
|
|
|
+ throw new Error(`Invalid file hash. Expected: ${this.expectedHash} - real: ${hash}`)
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -101,19 +131,25 @@ export class DownloadFileTask implements SyncTask {
|
|
|
* Resolve remote storage node URLs and creates file downloading tasks (DownloadFileTask).
|
|
|
*/
|
|
|
export class PrepareDownloadFileTask implements SyncTask {
|
|
|
+ bagId: string
|
|
|
dataObjectId: string
|
|
|
operatorUrlCandidates: string[]
|
|
|
taskSink: TaskSink
|
|
|
uploadsDirectory: string
|
|
|
tempDirectory: string
|
|
|
+ api?: ApiPromise
|
|
|
|
|
|
constructor(
|
|
|
operatorUrlCandidates: string[],
|
|
|
+ bagId: string,
|
|
|
dataObjectId: string,
|
|
|
uploadsDirectory: string,
|
|
|
tempDirectory: string,
|
|
|
- taskSink: TaskSink
|
|
|
+ taskSink: TaskSink,
|
|
|
+ api?: ApiPromise
|
|
|
) {
|
|
|
+ this.api = api
|
|
|
+ this.bagId = bagId
|
|
|
this.dataObjectId = dataObjectId
|
|
|
this.taskSink = taskSink
|
|
|
this.operatorUrlCandidates = operatorUrlCandidates
|
|
@@ -131,6 +167,11 @@ export class PrepareDownloadFileTask implements SyncTask {
|
|
|
// And cloning it seems like a heavy operation.
|
|
|
const operatorUrlIndices: number[] = [...Array(this.operatorUrlCandidates.length).keys()]
|
|
|
|
|
|
+ if (_.isEmpty(this.bagId)) {
|
|
|
+ logger.error(`Sync - invalid task - no bagId for ${this.dataObjectId}`)
|
|
|
+ return
|
|
|
+ }
|
|
|
+
|
|
|
while (!_.isEmpty(operatorUrlIndices)) {
|
|
|
const randomUrlIndex = _.sample(operatorUrlIndices)
|
|
|
if (randomUrlIndex === undefined) {
|
|
@@ -146,12 +187,16 @@ export class PrepareDownloadFileTask implements SyncTask {
|
|
|
|
|
|
try {
|
|
|
const chosenBaseUrl = randomUrl
|
|
|
- const remoteOperatorIds: string[] = await getRemoteDataObjects(chosenBaseUrl)
|
|
|
+ const [remoteOperatorIds, hash] = await Promise.all([
|
|
|
+ getRemoteDataObjects(chosenBaseUrl),
|
|
|
+ this.getExpectedHash(),
|
|
|
+ ])
|
|
|
|
|
|
if (remoteOperatorIds.includes(this.dataObjectId)) {
|
|
|
const newTask = new DownloadFileTask(
|
|
|
chosenBaseUrl,
|
|
|
this.dataObjectId,
|
|
|
+ hash,
|
|
|
this.uploadsDirectory,
|
|
|
this.tempDirectory
|
|
|
)
|
|
@@ -165,4 +210,14 @@ export class PrepareDownloadFileTask implements SyncTask {
|
|
|
|
|
|
logger.warn(`Sync - cannot get operator URLs for ${this.dataObjectId}`)
|
|
|
}
|
|
|
+
|
|
|
+ async getExpectedHash(): Promise<string | undefined> {
|
|
|
+ if (this.api !== undefined) {
|
|
|
+ const convertedBagId = parseBagId(this.bagId)
|
|
|
+ const dataObject = await this.api.query.storage.dataObjectsById(convertedBagId, this.dataObjectId)
|
|
|
+ return hexToString(dataObject.ipfsContentId.toString())
|
|
|
+ }
|
|
|
+
|
|
|
+ return undefined
|
|
|
+ }
|
|
|
}
|