Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // Đếm số lượng domain được quét trong danh sach theo tung loai
- db.getCollection('medusa_crawlUrls').aggregate([
- {$group: {_id: "$domainId"}},
- {$lookup: {from: "medusa_domains", localField: "_id", foreignField: "_id", as: "domain"}},
- {$unwind: "$domain"},
- {$project: {_id: "$domain._id", author: "$domain.author"}},
- {$group: {_id: "$author", total: {$sum: 1}, rows: {$push: "$$ROOT"}}}
- ], {allowDiskUse: true})
- // Out: 2 => 114, 1 => 778, 3 => 3057
- // Dem so link ket thuc boi hacked, hac9 thuoc danh sach cua anh Duc
- db.getCollection('medusa_crawlUrls').aggregate([
- {$match: {url: new RegExp(/hacked\.html|hac9\.html/)}},
- {$group: {_id: "$domainId", total: {$sum: 1}, rows: {$push: {url: "$url", _id: "$_id"}}}},
- {$sort: {total: -1}},
- {$lookup: {from: "medusa_domains", localField: "_id", foreignField: "_id", as: "domain"}},
- {$unwind: "$domain"},
- {$match: {"domain.author": 2}},
- {$project: {total: 1, rows: 1}},
- {$group: {_id: null, total: {$sum: 1}, rows: {$push: "$$ROOT"}}}
- ], {allowDiskUse: true})
- // Out: 104
- // Cac cau query ap dung trong danh sach 5K site viet nam. Thuc te so lieu duoc thong ke chua dat duoc muc 5K
- // xCache
- db.getCollection('medusa_crawlUrls').aggregate([
- {$match: {xCache: {$ne: null}}},
- {$group: {_id: {domainId: "$domainId", xCache: "$xCache"}}},
- {$lookup: {from: "medusa_domains", localField: "_id.domainId", foreignField: "_id", as: "domain"}},
- {$unwind: "$domain"},
- {$match: {"domain.author": {$in: [2, 3]}}},
- {$project: {domain: "$domain.domain", _id: 0, xCache: "$_id.xCache"}},
- {$group: {_id: "$xCache", total: {$sum: 1}, domains: {$push: "$domain"}}},
- {$sort: {total: -1}}
- ], {allowDiskUse:true})
- // cacheControl
- db.getCollection('medusa_crawlUrls').aggregate([
- {$match: {cacheControl: {$ne: null}}},
- {$group: {_id: {domainId: "$domainId", cacheControl: "$cacheControl"}}},
- {$lookup: {from: "medusa_domains", localField: "_id.domainId", foreignField: "_id", as: "domain"}},
- {$unwind: "$domain"},
- {$match: {"domain.author": {$in: [2, 3]}}},
- {$project: {domain: "$domain.domain", _id: 0, cacheControl: "$_id.cacheControl"}},
- {$group: {_id: "$cacheControl", total: {$sum: 1}, domains: {$push: "$domain"}}},
- {$sort: {total: -1}}
- ], {allowDiskUse:true})
- // domain
- db.getCollection('medusa_crawlUrls').aggregate([
- {$match: {server: {$ne: null}}},
- {$group: {_id: {domainId: "$domainId", server: "$server"}}},
- {$lookup: {from: "medusa_domains", localField: "_id.domainId", foreignField: "_id", as: "domain"}},
- {$unwind: "$domain"},
- {$match: {"domain.author": {$in: [2, 3]}}},
- {$project: {domain: "$domain.domain", _id: 0, server: "$_id.server"}},
- {$group: {_id: "$server", total: {$sum: 1}, domains: {$push: "$domain"}}},
- {$sort: {total: -1}}
- ], {allowDiskUse:true})
- db.getCollection('medusa_crawlUrls').aggregate([
- {$match: {"vector.iframe": {$gt: 0}}},
- {$group: {_id: "$domain._id", rows: {$push: "$$ROOT"}}}
- ], {allowDiskUse:true})
- // So domain chua hinh anh nhay cam
- db.getCollection('medusa_crawlUrls').aggregate([
- {$unwind: "$images"},
- {$match: {"images.highlight": true}},
- {$group: {_id: "$_id", domainId: {$first: "$domainId"}}},
- {$group: {_id: "$domainId", total: {$sum: 1}}},
- {$group: {_id: null, total: {$sum: "$total"}}}
- ], {allowDiskUse: true})
- // So domain chua tu khoa nhay cam
- db.getCollection('medusa_crawlUrls').aggregate([
- {$unwind: "$sensitiveStrings"},
- {$group: {_id: "$_id", domainId: {$first: "$domainId"}}},
- {$group: {_id: "$domainId", total: {$sum: 1}}},
- {$lookup: {from: "medusa_domains", localField: "_id", foreignField: "_id", as: "domain"}},
- {$unwind: "$domain"},
- {$group: {_id: "$domain.author", records: {$push: "$$ROOT"}, total: {$sum: 1}}}
- ], {allowDiskUse: true})
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement