prepareData.ts 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. // import featuredContentJson from './data/featuredContent.json'
  2. import reportedChannelsJson from './data/reportedChannels.json'
  3. import reportedVideosJson from './data/reportedVideos.json'
  4. import videoEventsJson from './data/videoEvents.json'
  5. import { Report, VideoViewEvent } from '../../src/model'
  6. import { stringify } from 'csv-stringify/sync'
  7. import fs from 'fs'
  8. import path from 'path'
  9. import { randomAsHex } from '@polkadot/util-crypto'
  10. const OUTPUT_PATH = path.join(__dirname, '../../db/persisted')
  11. // type FeaturedContent = {
  12. // featuredVideosPerCategory: {
  13. // [categoryId: string]: {
  14. // videoId: string
  15. // videoCutUrl: string
  16. // }[]
  17. // }
  18. // videoHero: {
  19. // videoId: string
  20. // heroTitle: string
  21. // heroVideoCutUrl: string
  22. // heroPosterUrl: string
  23. // }
  24. // }
  25. type ReportedContent = { reporterIp: string; timestamp: { $date: string }; rationale: string }
  26. type ReportedChannel = ReportedContent & {
  27. channelId: string
  28. }
  29. type ReportedVideo = ReportedContent & {
  30. videoId: string
  31. }
  32. type VideoEvent = {
  33. videoId: string
  34. channelId: string
  35. timestamp: { $date: string }
  36. actorId: string
  37. type: string
  38. }
  39. const reportedChannels: ReportedChannel[] = reportedChannelsJson
  40. const reportedVideos: ReportedVideo[] = reportedVideosJson
  41. const videoEvents: VideoEvent[] = videoEventsJson
  42. console.log('Preparing Orion v1 data for import...')
  43. const reports = [...reportedChannels, ...reportedVideos].map(
  44. (rc) =>
  45. new Report({
  46. id: randomAsHex(16).replace('0x', ''),
  47. channelId: 'channelId' in rc ? rc.channelId : undefined,
  48. videoId: 'videoId' in rc ? rc.videoId : undefined,
  49. ip: rc.reporterIp,
  50. rationale: rc.rationale,
  51. timestamp: new Date(rc.timestamp.$date),
  52. })
  53. )
  54. let views = videoEvents
  55. .filter((e) => e.type === 'ADD_VIEW')
  56. .map(
  57. (v) =>
  58. new VideoViewEvent({
  59. ip: v.actorId,
  60. timestamp: new Date(v.timestamp.$date),
  61. videoId: v.videoId,
  62. })
  63. )
  64. .sort((a, b) => a.timestamp.getTime() - b.timestamp.getTime())
  65. if (process.env.EXCLUDE_DUPLICATE_VIEWS === 'true' && process.env.VIDEO_VIEW_PER_IP_TIME_LIMIT) {
  66. const timeLimitMs = parseInt(process.env.VIDEO_VIEW_PER_IP_TIME_LIMIT) * 1000
  67. const viewsReduced = views.reduce((reduced, v) => {
  68. return !reduced.find(
  69. (vr) =>
  70. vr.timestamp.getTime() > v.timestamp.getTime() - timeLimitMs &&
  71. vr.ip === v.ip &&
  72. vr.videoId === v.videoId
  73. )
  74. ? reduced.concat(v)
  75. : reduced
  76. }, [] as VideoViewEvent[])
  77. views = viewsReduced
  78. }
  79. views.forEach((v, i) => {
  80. v.id = `${v.videoId}-${views.slice(0, i).filter((v2) => v2.videoId === v.videoId).length + 1}`
  81. })
  82. const viewColumns: (keyof VideoViewEvent)[] = ['id', 'videoId', 'ip', 'timestamp']
  83. const reportColumns: (keyof Report)[] = [
  84. 'id',
  85. 'ip',
  86. 'channelId',
  87. 'videoId',
  88. 'timestamp',
  89. 'rationale',
  90. ]
  91. fs.writeFileSync(
  92. `${OUTPUT_PATH}/video_view_event`,
  93. stringify(views, { columns: viewColumns, cast: { date: (d) => d.toISOString() } })
  94. )
  95. console.log(
  96. `${views.length} video views saved to "${OUTPUT_PATH}/video_view_event". ` +
  97. `Will be imported during Orion v2 migration step.`
  98. )
  99. fs.writeFileSync(
  100. `${OUTPUT_PATH}/report`,
  101. stringify(reports, { columns: reportColumns, cast: { date: (d) => d.toISOString() } })
  102. )
  103. console.log(
  104. `${reports.length} reports saved to "${OUTPUT_PATH}/report". ` +
  105. `Will be imported during Orion v2 migration step.`
  106. )