// Use parseWithCheerio for efficient HTML parsingconst$=awaitparseWithCheerio();// Extract genre and shows directly from the HTML structureconstdata=$('[data-uia="collections-row"]').map((_,el)=>{constgenre=$(el).find('[data-uia="collections-row-title"]').text().trim();constitems=$(el).find('[data-uia="collections-title"]').map((_,itemEl)=>$(itemEl).text().trim()).get();return{genre,items};}).get();constgenres=data.map((d)=>d.genre);constshows=data.map((d)=>d.items);
在上面的代码片段中,我们使用CheerioparseWithCheerio解析当前页面的 HTML 内容,并从 HTML 结构中提取信息genres。shows
import{CheerioCrawler,log,Dataset}from"crawlee";constcrawler=newCheerioCrawler({requestHandler:async ({request,parseWithCheerio,pushData})=>{log.info(`Processing: ${request.url}`);// Use parseWithCheerio for efficient HTML parsingconst$=awaitparseWithCheerio();// Extract genre and shows directly from the HTML structureconstdata=$('[data-uia="collections-row"]').map((_,el)=>{constgenre=$(el).find('[data-uia="collections-row-title"]').text().trim();constitems=$(el).find('[data-uia="collections-title"]').map((_,itemEl)=>$(itemEl).text().trim()).get();return{genre,items};}).get();// Prepare data for pushingconstgenres=data.map((d)=>d.genre);constshows=data.map((d)=>d.items);awaitpushData({genres:genres,shows:shows,});},// Limit crawls for efficiencymaxRequestsPerCrawl:20,});awaitcrawler.run(["https://www.netflix.com/in/browse/genre/1191605"]);awaitDataset.exportToJSON("results");
import{useState}from"react";import"./App.css";importjsonDatafrom"../storage/key_value_stores/default/results.json";functionHeaderAndSelector({handleChange}){return (<><h1className="header">NetflixWebShowRecommender</h1>
<divclassName="genre-selector"><selectonChange={handleChange}className="select-genre"><optionvalue="">Selectyourgenre</option>
{jsonData[0].genres.map((genres,key)=>{return (<optionkey={key}value={key}>{genres}</option>
);})}</select>
</div>
</>
);}functionApp(){const[count,setCount]=useState(null);consthandleChange=(event)=>{constvalue=event.target.value;if (value)setCount(parseInt(value));};// Validate count to ensure it is within the bounds of the jsonData.shows arrayconstisValidCount=count!==null&&count<=jsonData[0].shows.length;return (<divclassName="app-container"><HeaderAndSelectorhandleChange={handleChange}/>
<divclassName="shows-container">{isValidCount&&(<><divclassName="shows-list"><ul>{jsonData[0].shows[count].slice(0,20).map((show,index)=>(<likey={index}className="show-item">{show}</li>
))}</ul>
</div>
<divclassName="shows-list"><ul>{jsonData[0].shows[count].slice(20).map((show,index)=>(<likey={index}className="show-item">{show}</li>
))}</ul>
</div>
</>
)}</div>
</div>
);}exportdefaultApp;