/* eslint-disable react/no-unescaped-entities */
import React from 'react';
import HelmetComponent from '../../../components/HelmetComponent';
import { useNavigate } from 'react-router-dom';
import img from './img/eightEthicalWebScrapingBestPractices2024.jpg';
import { eightEthicalWebScrapingBestPractices2024Path } from '../paths';

export default function EightEthicalWebScrapingBestPractices2024() {
  const navigate = useNavigate();
  return (
    <div className="max-w-7xl m-auto p-8">
      <HelmetComponent
        title="8 Ethical Web Scraping Best Practices 2024"
        description={
          'Discover 8 essential ethical practices for web scraping in 2024 to ensure compliance, respect, and better data quality.'
        }
        canonicalLink={`/blog/${eightEthicalWebScrapingBestPractices2024Path}`}
        imageSrc={img}
        type="article"
        keywords="ethical web scraping, data privacy, robots.txt, web scraping best practices, APIs, rate limiting, legal compliance"
      />

      <div className="text-lg">
        <div className="max-w-5xl m-auto">
          <h1 className="nm-default-page-heading">
            8 Ethical Web Scraping Best Practices 2024
          </h1>
          <div className="m-auto mt-8 text-left">
            <p className="mt-4">
              {
                'Discover 8 essential ethical practices for web scraping in 2024 to ensure compliance, respect, and better data quality.'
              }
            </p>
          </div>
          <div>
            <img
              src={img}
              alt="article preview"
              className="mt-8 w-full md:max-w-2xl m-auto"
            />
          </div>
        </div>

        <div>
          <h1 className="nm-default-page-heading">
            8 Ethical Web Scraping Best Practices 2024
          </h1>
          <p className="mt-4">
            Web scraping is powerful, but it comes with responsibilities. Here's
            how to do it right:
          </p>
          <ol className="list-disc list-inside px-2">
            <li className="mt-2">Follow robots.txt rules</li>
            <li className="mt-2">Respect website terms of service</li>
            <li className="mt-2">Use rate limiting</li>
            <li className="mt-2">Opt for APIs when available</li>
            <li className="mt-2">Protect data privacy and security</li>
            <li className="mt-2">Give credit to data sources</li>
            <li className="mt-2">Reduce server impact</li>
            <li className="mt-2">Stay updated on legal and ethical rules</li>
          </ol>
          <p className="mt-4">
            Why bother? These practices help you avoid legal issues, build good
            relationships, get better data, and keep the internet running
            smoothly.
          </p>
          <p className="mt-4">Quick Comparison:</p>
          <table className="w-full table-auto mt-4 border">
            <thead>
              <tr className="bg-gray-200 text-gray-600 text-sm md:text-base leading-normal">
                <th className="p-3 md:py-3 md:px-6">Practice</th>
                <th className="p-3 md:py-3 md:px-6">Why It Matters</th>
              </tr>
            </thead>
            <tbody className="text-xs md:text-sm">
              <tr>
                <td className="text-center p-4">Follow robots.txt</td>
                <td className="text-center p-4">Respects site owner wishes</td>
              </tr>
              <tr>
                <td className="text-center p-4">Use rate limiting</td>
                <td className="text-center p-4">Prevents server overload</td>
              </tr>
              <tr>
                <td className="text-center p-4">Use APIs</td>
                <td className="text-center p-4">
                  Gets cleaner, more reliable data
                </td>
              </tr>
              <tr>
                <td className="text-center p-4">Protect privacy</td>
                <td className="text-center p-4">Keeps you legally compliant</td>
              </tr>
              <tr>
                <td className="text-center p-4">Give credit</td>
                <td className="text-center p-4">Builds trust and goodwill</td>
              </tr>
            </tbody>
          </table>
          <p className="mt-4">
            Bottom line: Ethical scraping takes more effort, but it's worth it.
            It's about doing right by everyone online.
          </p>
          <h2 className="text-main-letters text-3xl font-heading mt-4">
            Related video from YouTube
          </h2>
          <iframe
            className="w-full h-96 mt-4"
            src="https://www.youtube-nocookie.com/embed/F_RzsU0iNxU"
            loading="lazy"
          ></iframe>
          <h2 className="text-main-letters text-3xl font-heading mt-4">
            Follow Robots.txt Rules
          </h2>
          <p className="mt-4">
            Think of robots.txt as a website's bouncer. It tells web scrapers
            where they can and can't go. Following these rules isn't just nice -
            it's essential for ethical scraping.
          </p>
          <p className="mt-4">Why? It's simple:</p>
          <ul className="list-disc list-inside px-2">
            <li className="mt-2">It's the right thing to do</li>
            <li className="mt-2">It keeps you out of legal hot water</li>
            <li className="mt-2">
              It prevents server overload and data breaches
            </li>
          </ul>
          <p className="mt-4">Here's how to play by the rules:</p>
          <ol className="list-disc list-inside px-2">
            <li className="mt-2">
              Find the robots.txt file (usually at example.com/robots.txt)
            </li>
            <li className="mt-2">Read it</li>
            <li className="mt-2">Program your scraper accordingly</li>
          </ol>
          <p className="mt-4">
            Real talk: In 2019,{' '}
            <a
              href="https://www.linkedin.com/"
              target="_blank"
              rel="noreferrer"
            >
              LinkedIn
            </a>{' '}
            sued{' '}
            <a
              href="https://en.wikipedia.org/wiki/HiQ_Labs_v._LinkedIn"
              target="_blank"
              rel="noreferrer"
            >
              hiQ Labs
            </a>{' '}
            for ignoring their robots.txt. The case went to the Supreme Court.
            That's how serious this can get.
          </p>
          <p className="mt-4">
            So, always check the robots.txt first. Here's what you might see:
          </p>
          <table className="w-full table-auto mt-4 border">
            <thead>
              <tr className="bg-gray-200 text-gray-600 text-sm md:text-base leading-normal">
                <th className="p-3 md:py-3 md:px-6">Directive</th>
                <th className="p-3 md:py-3 md:px-6">What it means</th>
                <th className="p-3 md:py-3 md:px-6">Example</th>
              </tr>
            </thead>
            <tbody className="text-xs md:text-sm">
              <tr>
                <td className="text-center p-4">User-agent</td>
                <td className="text-center p-4">Who the rules apply to</td>
                <td className="text-center p-4">User-agent: *</td>
              </tr>
              <tr>
                <td className="text-center p-4">Disallow</td>
                <td className="text-center p-4">No-go zones</td>
                <td className="text-center p-4">Disallow: /private/</td>
              </tr>
              <tr>
                <td className="text-center p-4">Allow</td>
                <td className="text-center p-4">Exceptions to Disallow</td>
                <td className="text-center p-4">Allow: /public/</td>
              </tr>
              <tr>
                <td className="text-center p-4">Crawl-delay</td>
                <td className="text-center p-4">Wait time between requests</td>
                <td className="text-center p-4">Crawl-delay: 10</td>
              </tr>
            </tbody>
          </table>
          <p className="mt-4">
            Remember: robots.txt is a guide, not a lock. Some ignore it. Don't
            be that guy.
          </p>
          <blockquote>
            <p className="mt-4">
              &quot;The robots.txt is the most sensitive file in the SEO
              universe. A single character can break a whole site.&quot; - Kevin
              Indig, Growth Advisor
            </p>
          </blockquote>
          <p className="mt-4">
            This isn't just about ethical scraping. It's about respecting the
            websites you interact with.
          </p>
          <h2 className="text-main-letters text-3xl font-heading mt-4">
            2. Follow Website Terms of Service
          </h2>
          <p className="mt-4">
            Web scraping isn't just about tech skills. It's about playing by the
            rules. Every site has its Terms of Service (ToS), and ignoring them
            can be risky.
          </p>
          <p className="mt-4">Why ToS matter:</p>
          <ul className="list-disc list-inside px-2">
            <li className="mt-2">They're legal contracts</li>
            <li className="mt-2">They set rules for using a site's data</li>
            <li className="mt-2">Breaking them can lead to trouble</li>
          </ul>
          <p className="mt-4">
            In 2019, LinkedIn sued hiQ Labs for scraping public profiles. The
            case went to the Supreme Court, showing how serious this can get.
          </p>
          <p className="mt-4">To stay safe:</p>
          <ol className="list-disc list-inside px-2">
            <li className="mt-2">Read the ToS before scraping</li>
            <li className="mt-2">
              Check for mentions of data collection or scraping
            </li>
            <li className="mt-2">If unsure, ask permission</li>
          </ol>
          <p className="mt-4">Sites handle scraping differently:</p>
          <table className="w-full table-auto mt-4 border">
            <thead>
              <tr className="bg-gray-200 text-gray-600 text-sm md:text-base leading-normal">
                <th className="p-3 md:py-3 md:px-6">ToS Stance</th>
                <th className="p-3 md:py-3 md:px-6">Meaning</th>
                <th className="p-3 md:py-3 md:px-6">Example</th>
              </tr>
            </thead>
            <tbody className="text-xs md:text-sm">
              <tr>
                <td className="text-center p-4">Prohibited</td>
                <td className="text-center p-4">No scraping</td>
                <td className="text-center p-4">
                  &quot;No automated access to the Service&quot;
                </td>
              </tr>
              <tr>
                <td className="text-center p-4">Conditional</td>
                <td className="text-center p-4">Limited scraping</td>
                <td className="text-center p-4">
                  &quot;Limited automation for personal use only&quot;
                </td>
              </tr>
              <tr>
                <td className="text-center p-4">Silent</td>
                <td className="text-center p-4">No mention</td>
                <td className="text-center p-4">
                  Be cautious, consider asking
                </td>
              </tr>
            </tbody>
          </table>
          <blockquote>
            <p className="mt-4">
              Just because data is public doesn't mean it's free to scrape.
            </p>
          </blockquote>
          <p className="mt-4">
            The 2015{' '}
            <a
              href="https://en.wikipedia.org/wiki/Ryanair"
              target="_blank"
              rel="noreferrer"
            >
              Ryanair
            </a>{' '}
            case proved this. The EU Court said{' '}
            <a
              href="https://en.wikipedia.org/wiki/Ryanair"
              target="_blank"
              rel="noreferrer"
            >
              Ryanair
            </a>{' '}
            could use its ToS to limit scraping, even for public flight data.
          </p>
          <p className="mt-4">
            <strong>Pro tip:</strong> Keep an eye on ToS changes for sites you
            scrape. They can update without warning, changing what's allowed.
          </p>
          <h2 className="text-main-letters text-3xl font-heading mt-4">
            3. Use Rate Limiting
          </h2>
          <p className="mt-4">
            Rate limiting is crucial for ethical web scraping. It helps you
            avoid server overload and blocking. Here's how to do it:
          </p>
          <p className="mt-4">
            <strong>Space out your requests</strong>
          </p>
          <p className="mt-4">
            Don't rapid-fire requests. Add delays between each one:
          </p>

          <p className="mt-4">
            <strong>Check robots.txt</strong>
          </p>
          <p className="mt-4">
            Honor the &quot;crawl-delay&quot; in robots.txt files.
          </p>
          <p className="mt-4">
            <strong>Mix up your delays</strong>
          </p>
          <p className="mt-4">
            Fixed delays can look fishy. Use random intervals:
          </p>
          <p className="mt-4">
            <strong>Watch and adjust</strong>
          </p>
          <p className="mt-4">See 429 errors? Slow down.</p>
          <p className="mt-4">
            <strong>Spread it out</strong>
          </p>
          <p className="mt-4">For big jobs, use multiple IPs:</p>

          <p className="mt-4">This limits requests to 10 per minute.</p>
          <p className="mt-4">
            Remember: Be polite to servers. Scrape responsibly!
          </p>
          <h2 className="text-main-letters text-3xl font-heading mt-4">
            4. Use APIs When Possible
          </h2>
          <p className="mt-4">
            APIs are often your best bet for scraping data. They give you direct
            access to the info you need without the hassle of parsing HTML.
          </p>
          <p className="mt-4">Why choose APIs for ethical web scraping?</p>
          <ul className="list-disc list-inside px-2">
            <li className="mt-2">
              <strong>Clean data</strong>: You get structured data in JSON or
              XML. Less cleanup work for you.
            </li>
            <li className="mt-2">
              <strong>Speed and reliability</strong>: API calls beat raw HTML
              scraping hands down.
            </li>
            <li className="mt-2">
              <strong>Lower blocking risk</strong>: Websites are less likely to
              block API requests.
            </li>
            <li className="mt-2">
              <strong>Clear rules</strong>: APIs come with terms of service. You
              know what you can and can't do.
            </li>
          </ul>
          <p className="mt-4">Using APIs effectively:</p>
          <p className="mt-4">
            1. Check if the site has an API before scraping.
          </p>
          <p className="mt-4">2. Read the API docs carefully.</p>
          <p className="mt-4">3. Use API keys for authentication.</p>
          <p className="mt-4">
            4. Stick to rate limits. Don't overload servers.
          </p>
          <p className="mt-4">Popular web scraping API comparison:</p>
          <table className="w-full table-auto mt-4 border">
            <thead>
              <tr className="bg-gray-200 text-gray-600 text-sm md:text-base leading-normal">
                <th className="p-3 md:py-3 md:px-6">Provider</th>
                <th className="p-3 md:py-3 md:px-6">Starting Price</th>
                <th className="p-3 md:py-3 md:px-6">Request Limit</th>
              </tr>
            </thead>
            <tbody className="text-xs md:text-sm">
              <tr>
                <td className="text-center p-4">
                  <a
                    href="https://oxylabs.io/products/scraper-api/web/pricing"
                    target="_blank"
                    rel="noreferrer"
                  >
                    Oxylabs
                  </a>
                </td>
                <td className="text-center p-4">$49</td>
                <td className="text-center p-4">17,500 results</td>
              </tr>
              <tr>
                <td className="text-center p-4">
                  <a
                    href="https://brightdata.com/pricing/web-scraper"
                    target="_blank"
                    rel="noreferrer"
                  >
                    Bright Data
                  </a>
                </td>
                <td className="text-center p-4">$500/month</td>
                <td className="text-center p-4">Varies</td>
              </tr>
              <tr>
                <td className="text-center p-4">
                  <a
                    href="https://smartproxy.com/scraping/web"
                    target="_blank"
                    rel="noreferrer"
                  >
                    Smartproxy
                  </a>
                </td>
                <td className="text-center p-4">$50</td>
                <td className="text-center p-4">Pay per request</td>
              </tr>
              <tr>
                <td className="text-center p-4">
                  <a
                    href="https://www.scraperapi.com/"
                    target="_blank"
                    rel="noreferrer"
                  >
                    ScraperAPI
                  </a>
                </td>
                <td className="text-center p-4">Free tier</td>
                <td className="text-center p-4">1,000 API credits</td>
              </tr>
            </tbody>
          </table>
          <p className="mt-4">
            APIs can be pricey, but they often save time and cut legal risks. As
            Akshay Kothari, CPO at{' '}
            <a href="https://www.notion.so/" target="_blank" rel="noreferrer">
              Notion
            </a>
            , puts it:
          </p>
          <blockquote>
            <p className="mt-4">
              <i>
                &quot;Using APIs isn't just about getting data. It's about
                building relationships with the platforms you're working
                with.&quot;
              </i>
            </p>
          </blockquote>
          <h2 className="text-main-letters text-3xl font-heading mt-4">
            5. Protect Data Privacy and Security
          </h2>
          <p className="mt-4">
            Web scraping often involves collecting personal data. That's a big
            responsibility. Here's how to handle it right:
          </p>
          <p className="mt-4">
            <strong>Encrypt everything</strong>: Use HTTPS and SSL for scraping
            and transmission. Store data in encrypted cloud services like{' '}
            <a href="https://aws.amazon.com/" target="_blank" rel="noreferrer">
              AWS
            </a>{' '}
            or{' '}
            <a
              href="https://cloud.google.com/"
              target="_blank"
              rel="noreferrer"
            >
              Google Cloud
            </a>
            .
          </p>
          <p className="mt-4">
            <strong>Follow data laws</strong>: Know GDPR and CCPA. Key points:
          </p>
          <ul className="list-disc list-inside px-2">
            <li className="mt-2">Get consent for personal data</li>
            <li className="mt-2">Inform people about data collection</li>
            <li className="mt-2">Allow data access and deletion</li>
            <li className="mt-2">Report breaches within 3 days (GDPR)</li>
          </ul>
          <p className="mt-4">
            <strong>Anonymize data</strong>: Remove personal details when
            possible. Use tools like{' '}
            <a href="https://fakerjs.dev/" target="_blank" rel="noreferrer">
              Faker
            </a>{' '}
            or{' '}
            <a
              href="https://amnesia.openaire.eu/"
              target="_blank"
              rel="noreferrer"
            >
              Anonymizer
            </a>{' '}
            for fake names and emails.
          </p>
          <p className="mt-4">
            <strong>Lock down access</strong>: Limit data viewing to authorized
            team members. Use strong authentication.
          </p>
          <p className="mt-4">
            <strong>Audit regularly</strong>: Check your scraping practices
            often. Look for potential issues.
          </p>
          <p className="mt-4">
            <strong>Have a breach plan</strong>: Know how to notify authorities
            and users if something goes wrong.
          </p>
          <p className="mt-4">
            Even public data needs protection. In 2022,{' '}
            <a href="https://about.meta.com/" target="_blank" rel="noreferrer">
              Meta
            </a>{' '}
            got a €265 million GDPR fine for scraped Facebook user data.
          </p>
          <blockquote>
            <p className="mt-4">
              <i>
                &quot;Data scraping can expose your important data, leaving you
                open to password breaches and phishing attacks.&quot; - Burton
                Kelso, Technology Expert
              </i>
            </p>
          </blockquote>
          <p className="mt-4">Stay safe with this checklist:</p>
          <table className="w-full table-auto mt-4 border">
            <thead>
              <tr className="bg-gray-200 text-gray-600 text-sm md:text-base leading-normal">
                <th className="p-3 md:py-3 md:px-6">Action</th>
                <th className="p-3 md:py-3 md:px-6">Why It Matters</th>
              </tr>
            </thead>
            <tbody className="text-xs md:text-sm">
              <tr>
                <td className="text-center p-4">Use encryption</td>
                <td className="text-center p-4">Protects intercepted data</td>
              </tr>
              <tr>
                <td className="text-center p-4">Check website terms</td>
                <td className="text-center p-4">Avoids legal issues</td>
              </tr>
              <tr>
                <td className="text-center p-4">Implement access controls</td>
                <td className="text-center p-4">Prevents unauthorized use</td>
              </tr>
              <tr>
                <td className="text-center p-4">Regularly delete old data</td>
                <td className="text-center p-4">Reduces exposure risk</td>
              </tr>
              <tr>
                <td className="text-center p-4">Monitor scraping activities</td>
                <td className="text-center p-4">Catches problems early</td>
              </tr>
            </tbody>
          </table>
          <h2 className="text-main-letters text-3xl font-heading mt-4">
            6. Give Credit to Data Sources
          </h2>
          <p className="mt-4">
            Scraping data? Great. Using it? Even better. But don't forget to
            give credit where it's due. It's not just polite—it's crucial.
          </p>
          <p className="mt-4">Why? Simple:</p>
          <ul className="list-disc list-inside px-2">
            <li className="mt-2">It shows respect</li>
            <li className="mt-2">It boosts your credibility</li>
            <li className="mt-2">It keeps you out of legal hot water</li>
            <li className="mt-2">
              It builds goodwill with the sites you scrape
            </li>
          </ul>
          <p className="mt-4">So, how do you do it right?</p>
          <p className="mt-4">
            1. <strong>Cite your sources</strong>
          </p>
          <p className="mt-4">
            Always mention where you got the data. No exceptions.
          </p>
          <p className="mt-4">
            2. <strong>Link back</strong>
          </p>
          <p className="mt-4">
            Include links to original sources when you can. It's a win-win.
          </p>
          <p className="mt-4">
            3. <strong>Check copyright</strong>
          </p>
          <p className="mt-4">
            Not all public-looking data is free to use. Do your homework.
          </p>
          <p className="mt-4">
            4. <strong>Ask permission</strong>
          </p>
          <p className="mt-4">
            For commercial stuff, reach out to site owners. Be upfront about
            your plans.
          </p>
          <p className="mt-4">
            5. <strong>Use APIs if available</strong>
          </p>
          <p className="mt-4">
            Many sites offer APIs. It's the legal, approved way to go. Always
            check first.
          </p>
          <p className="mt-4">Here's a quick guide:</p>
          <table className="w-full table-auto mt-4 border">
            <thead>
              <tr className="bg-gray-200 text-gray-600 text-sm md:text-base leading-normal">
                <th className="p-3 md:py-3 md:px-6">Data Type</th>
                <th className="p-3 md:py-3 md:px-6">How to Credit</th>
              </tr>
            </thead>
            <tbody className="text-xs md:text-sm">
              <tr>
                <td className="text-center p-4">Text</td>
                <td className="text-center p-4">
                  &quot;Data from [Website Name]&quot;
                </td>
              </tr>
              <tr>
                <td className="text-center p-4">Images</td>
                <td className="text-center p-4">
                  &quot;Image: [Photographer/Website]&quot;
                </td>
              </tr>
              <tr>
                <td className="text-center p-4">Stats</td>
                <td className="text-center p-4">
                  &quot;According to [Source Name], ...&quot;
                </td>
              </tr>
              <tr>
                <td className="text-center p-4">Live data</td>
                <td className="text-center p-4">
                  &quot;[Service Name] provides the data&quot;
                </td>
              </tr>
            </tbody>
          </table>
          <blockquote>
            <p className="mt-4">
              &quot;Credit your sources. It's common courtesy.&quot; - Grady
              Andersen, Author
            </p>
          </blockquote>
          <h2 className="text-main-letters text-3xl font-heading mt-4">
            7. Reduce Server Impact
          </h2>
          <p className="mt-4">
            Web scraping can hit servers hard. Let's keep it ethical by
            minimizing our footprint:
          </p>
          <p className="mt-4">
            1. <strong>Slow down</strong>
          </p>
          <p className="mt-4">Don't rapid-fire requests. Add delays.</p>

          <p className="mt-4">
            6. <strong>Stay alert</strong>
          </p>
          <p className="mt-4">
            Watch server responses. If you see slowdowns or errors, ease up.
          </p>
          <table className="w-full table-auto mt-4 border">
            <thead>
              <tr className="bg-gray-200 text-gray-600 text-sm md:text-base leading-normal">
                <th className="p-3 md:py-3 md:px-6">Code</th>
                <th className="p-3 md:py-3 md:px-6">Meaning</th>
                <th className="p-3 md:py-3 md:px-6">What to do</th>
              </tr>
            </thead>
            <tbody className="text-xs md:text-sm">
              <tr>
                <td className="text-center p-4">200</td>
                <td className="text-center p-4">OK</td>
                <td className="text-center p-4">Carry on</td>
              </tr>
              <tr>
                <td className="text-center p-4">429</td>
                <td className="text-center p-4">Too Many Requests</td>
                <td className="text-center p-4">Slow down</td>
              </tr>
              <tr>
                <td className="text-center p-4">503</td>
                <td className="text-center p-4">Service Unavailable</td>
                <td className="text-center p-4">Pause, retry later</td>
              </tr>
            </tbody>
          </table>
          <h2 className="text-main-letters text-3xl font-heading mt-4">
            8. Keep Up with Legal and Ethical Rules
          </h2>
          <p className="mt-4">
            Web scraping laws change fast. Stay ethical by keeping up. Here's
            how:
          </p>
          <ol className="list-disc list-inside px-2">
            <li className="mt-2">
              <strong>Check local laws</strong>: Rules vary. US? Public data
              scraping's often fine. EU? GDPR says no to personal data scraping.
            </li>
            <li className="mt-2">
              <strong>Watch court cases</strong>: They shape the rules. Take hiQ
              Labs vs. LinkedIn:
            </li>
          </ol>
          <blockquote>
            The court said scraping public LinkedIn profiles was legal. Big deal
            for US web scraping.
          </blockquote>
          <ol className="list-disc list-inside px-2">
            <li className="mt-2">
              <strong>Read terms of service</strong>: Many sites say no to
              scraping. Always check first.
            </li>
            <li className="mt-2">
              <strong>Follow data protection laws</strong>: GDPR and CCPA are
              strict on personal data.
            </li>
            <li className="mt-2">
              <strong>Get legal advice</strong>: When in doubt, ask a lawyer.
            </li>
            <li className="mt-2">
              <strong>Be open</strong>: Tell site owners what you're doing.
              Prevents headaches.
            </li>
            <li className="mt-2">
              <strong>Protect scraped data</strong>: Good security is a must.
            </li>
            <li className="mt-2">
              <strong>Stay informed</strong>: Join scraping forums. Follow tech
              law blogs.
            </li>
          </ol>
          <p className="mt-4">
            Stay updated, and you'll scrape data without breaking rules or
            hurting others.
          </p>
          <table className="w-full table-auto mt-4 border">
            <thead>
              <tr className="bg-gray-200 text-gray-600 text-sm md:text-base leading-normal">
                <th className="p-3 md:py-3 md:px-6">Do's</th>
                <th className="p-3 md:py-3 md:px-6">Don'ts</th>
              </tr>
            </thead>
            <tbody className="text-xs md:text-sm">
              <tr>
                <td className="text-center p-4">Scrape public data</td>
                <td className="text-center p-4">
                  Scrape personal info without consent
                </td>
              </tr>
              <tr>
                <td className="text-center p-4">Follow robots.txt</td>
                <td className="text-center p-4">
                  Ignore website terms of service
                </td>
              </tr>
              <tr>
                <td className="text-center p-4">Use APIs when available</td>
                <td className="text-center p-4">
                  Overload servers with requests
                </td>
              </tr>
              <tr>
                <td className="text-center p-4">Give credit to data sources</td>
                <td className="text-center p-4">
                  Sell scraped data without permission
                </td>
              </tr>
            </tbody>
          </table>
          <div className="bg-main-test1 p-8 rounded-xl mt-4 text-main-background ">
            <p className="text-3xl font-heading">Track sites with Notify Me</p>
            <p className="mt-4 text-base">
              {
                'Keep track of any website changes easily and receive instant alerts on Discord and Telegram. Start tracking sites that are important to you for free. All you need is a site URL!'
              }
            </p>
            <button
              className="btn btn-wide bg-main-background text-white mt-4"
              onClick={() => navigate('/')}
            >
              Start Tracking Now
            </button>
          </div>
          <h2 className="text-main-letters text-3xl font-heading mt-4">
            Conclusion
          </h2>
          <p className="mt-4">
            Web scraping is powerful, but it comes with responsibilities.
            Ethical practices aren't just about avoiding trouble—they build
            trust and respect online.
          </p>
          <p className="mt-4">
            Here's a quick recap of 8 key ethical web scraping practices for
            2024:
          </p>
          <ol className="list-disc list-inside px-2">
            <li className="mt-2">Follow robots.txt rules</li>
            <li className="mt-2">Respect website terms of service</li>
            <li className="mt-2">Use rate limiting</li>
            <li className="mt-2">Opt for APIs when available</li>
            <li className="mt-2">Protect data privacy and security</li>
            <li className="mt-2">Give credit to data sources</li>
            <li className="mt-2">Reduce server impact</li>
            <li className="mt-2">Stay updated on legal and ethical rules</li>
          </ol>
          <p className="mt-4">Why bother? These practices help you:</p>
          <ul className="list-disc list-inside px-2">
            <li className="mt-2">Dodge legal headaches</li>
            <li className="mt-2">Build good relationships with site owners</li>
            <li className="mt-2">Get better data</li>
            <li className="mt-2">
              Keep the internet running smoothly for everyone
            </li>
          </ul>
          <p className="mt-4">
            It's not just about what you CAN do—it's about what you SHOULD do.
          </p>
          <blockquote>
            <p className="mt-4">
              &quot;Our commitment to ethical data collection is not just about
              compliance; it's about setting a standard in the industry. We
              believe in harnessing the power of data while respecting
              individual privacy and promoting transparency.&quot; - Senior
              Executive, PromptCloud
            </p>
          </blockquote>
          <p className="mt-4">
            This quote shows why big players take ethics seriously. It's about
            doing the right thing, not just following rules.
          </p>
          <p className="mt-4">Here's a quick pros and cons breakdown:</p>
          <table className="w-full table-auto mt-4 border">
            <thead>
              <tr className="bg-gray-200 text-gray-600 text-sm md:text-base leading-normal">
                <th className="p-3 md:py-3 md:px-6">Pros</th>
                <th className="p-3 md:py-3 md:px-6">Cons</th>
              </tr>
            </thead>
            <tbody className="text-xs md:text-sm">
              <tr>
                <td className="text-center p-4">Builds trust</td>
                <td className="text-center p-4">Can be slower</td>
              </tr>
              <tr>
                <td className="text-center p-4">Avoids legal trouble</td>
                <td className="text-center p-4">May limit some data access</td>
              </tr>
              <tr>
                <td className="text-center p-4">Gets higher quality data</td>
                <td className="text-center p-4">Requires more planning</td>
              </tr>
              <tr>
                <td className="text-center p-4">
                  Helps maintain good web ecosystem
                </td>
                <td className="text-center p-4">Needs ongoing education</td>
              </tr>
            </tbody>
          </table>
          <p className="mt-4">
            Ethical web scraping might take more effort, but it's worth it in
            the long run. It's about building a better, more trustworthy
            internet for everyone.
          </p>
          <h2 className="text-main-letters text-3xl font-heading mt-4">FAQs</h2>
          <h3 className="text-main-letters text-2xl font-heading mt-4">
            What are the ethical considerations for web scraping?
          </h3>
          <p className="mt-4">
            Web scraping ethics boil down to one thing: transparency. Here's
            what that means:
          </p>
          <ul className="list-disc list-inside px-2">
            <li className="mt-2">Tell people you're scraping</li>
            <li className="mt-2">Explain why you're doing it</li>
            <li className="mt-2">Share how you're doing it</li>
            <li className="mt-2">Be clear about how you'll use the data</li>
          </ul>
          <p className="mt-4">
            Gabija Fatenaite from Oxylabs puts it this way:
          </p>
          <blockquote>
            <p className="mt-4">
              &quot;Transparency is the overriding principle in ethical web
              scraping practices. Those who participate in scraping activities
              should not leave out their scraping activities. They should share
              information about the purpose of their scraping, their methods,
              and how they use the data.&quot;
            </p>
          </blockquote>
          <p className="mt-4">But that's not all. You should also:</p>
          <ul className="list-disc list-inside px-2">
            <li className="mt-2">Follow website rules</li>
            <li className="mt-2">Keep data safe and private</li>
            <li className="mt-2">Give credit where it's due</li>
          </ul>
          <table className="w-full table-auto mt-4 border">
            <thead>
              <tr className="bg-gray-200 text-gray-600 text-sm md:text-base leading-normal">
                <th className="p-3 md:py-3 md:px-6">Ethical Practice</th>
                <th className="p-3 md:py-3 md:px-6">What It Means</th>
              </tr>
            </thead>
            <tbody className="text-xs md:text-sm">
              <tr>
                <td className="text-center p-4">Transparency</td>
                <td className="text-center p-4">
                  Be open about what you're doing
                </td>
              </tr>
              <tr>
                <td className="text-center p-4">Consent</td>
                <td className="text-center p-4">
                  Ask for permission when you can
                </td>
              </tr>
              <tr>
                <td className="text-center p-4">Data Protection</td>
                <td className="text-center p-4">Keep the data safe</td>
              </tr>
              <tr>
                <td className="text-center p-4">Attribution</td>
                <td className="text-center p-4">Give credit to your sources</td>
              </tr>
            </tbody>
          </table>
          <p className="mt-4">
            Here's how ethical web scraping can make a real difference:
          </p>
          <p className="mt-4">
            1.{' '}
            <a
              href="https://www.thorn.org/blog/a-new-chapter-for-spotlight-and-thorns-continued-commitment-to-child-safety/"
              target="_blank"
              rel="noreferrer"
            >
              Spotlight
            </a>
            : This tool helps cops find human trafficking victims on escort
            sites.
          </p>
          <p className="mt-4">
            2.{' '}
            <a
              href="https://cuni.cz/UKEN-1.html"
              target="_blank"
              rel="noreferrer"
            >
              Charles University
            </a>
            , Prague: They used web scrapers to build a translation model for
            Syrian and Moroccan migrants in Europe.
          </p>
          <p className="mt-4">
            3.{' '}
            <a
              href="https://commission.europa.eu/index_en"
              target="_blank"
              rel="noreferrer"
            >
              European Commission
            </a>
            : They teamed up with{' '}
            <a
              href="https://www.topmonks.com/"
              target="_blank"
              rel="noreferrer"
            >
              TopMonks
            </a>{' '}
            and{' '}
            <a href="https://apify.com/" target="_blank" rel="noreferrer">
              Apify
            </a>{' '}
            to check if businesses follow consumer protection rules.
          </p>
          <p className="mt-4">
            These examples show that when done right, web scraping can do a lot
            of good.
          </p>
        </div>
      </div>
    </div>
  );
}
