Get Product Data Using Woocommerce API Returns Duplicates - Node Js

I am trying to fetch product data from my website using woocommerce-api library in node js.

I’ve tested that code is working as expected with other websites. However in my case I get a lot of product duplicates.

I did some manual checks ( /products?id= ) but there were no ID duplicates at all. Ended up with the conclusion that something is going wrong with page rendering.

Here is the code:

var WooCommerceAPI = require('woocommerce-api');

var WooCommerce = new WooCommerceAPI({
    url: 'https://my-website.com',
    consumerKey: 'ck_1234',
    consumerSecret: 'cs_1234',
    wpAPI: true,
    version: 'wc/v3'
  });


function displayProductDetails(product) {
    var selectedFields = {
        id: product.id,
        sku: product.sku,
        title: product.name,
        price: product.regular_price,
    };
}

async function fetchAllProducts() {
    let page = 1;
    let allProducts = [];
    let productSKUs = new Set();
    let productIDs = new Set();
    let stotalproducts = 0;

    while (true) {
        try {
            const result = await WooCommerce.getAsync(`products?per_page=100&page=${page}&orderby=id&order=asc`);

            await delay(1500);
            const data = JSON.parse(result.toJSON().body);

            if (page === 1) {
                stotalproducts = result.headers['x-wp-total'];
                console.log(`Total Products: ${stotalproducts}`);
            }

            if (data.length === 0) {
                break;
            }

            console.log(`Fetched ${data.length} products from page ${page}`);
            let uniqueCount = 0;

            // Process and add products to the existing list if SKU and ID are unique
            data.forEach(product => {
                if (!productSKUs.has(product.sku) && !productIDs.has(product.id)) {
                    productSKUs.add(product.sku);
                    productIDs.add(product.id);
                    allProducts.push(product);
                    displayProductDetails(product);
                    uniqueCount++;
                } else {
                    console.log(`Duplicate found: SKU = ${product.sku}, ID = ${product.id}`);
                }
            });

            console.log(`Unique products added from page ${page}: ${uniqueCount}`);
            page++;
            await delay(3000);
        } catch (error) {
            console.error('Error fetching products:', error);
            break;
        }
    }

    console.log('Fetched products count:', allProducts.length);
    console.log('Unique SKUs count:', productSKUs.size);
    console.log('Unique IDs count:', productIDs.size);

    return allProducts;
}

function delay(time) {
    return new Promise(function(resolve) { 
        setTimeout(resolve, time)
    });
  }

This specific website has a total of 5681 products. After removing duplicates, unique products count was something like 3650. After adding some delay in the whole process, unique products count was variable from 3200 to 4100.

Things I gave a try without luck:

  • Cache Disable
  • Limit products per page (?per_page=30)
  • Filter products order (orderby=id)
  • Updated WooCommerce plugin to latest version.

Any ideas on my issue would be highly appreciated. Thank you from you time in advance!

The problem you’re encountering with duplicate products while fetching data using the WooCommerce API in your Node.js application likely stems from how your website handles pagination and product variations. Here are some potential causes and solutions:

1. Pagination and Variation Handling:

  • WooCommerce Pagination: The default WooCommerce pagination might not guarantee unique products within each page. Some variations can be grouped with the parent product, making them appear on the same page. You could try using the include parameter along with product variations’ IDs to retrieve specific variations instead of relying on pagination.

2. Variation Handling in API:

  • By default, the API might not explicitly return variations as separate products. Consider using the include parameter with variation IDs or the status parameter with variation to explicitly fetch variations alongside the parent product.

Here’s an updated version of your code incorporating these suggestions:

JavaScript

var WooCommerceAPI = require('woocommerce-api');

var WooCommerce = new WooCommerceAPI({
    url: 'https://my-website.com',
    consumerKey: 'ck_1234',
    consumerSecret: 'cs_1234',
    wpAPI: true,
    version: 'wc/v3'
});

function displayProductDetails(product) {
    var selectedFields = {
        id: product.id,
        sku: product.sku,
        title: product.name,
        price: product.regular_price,
    };
}

async function fetchAllProducts() {
    let page = 1;
    let allProducts = [];
    let productSKUs = new Set();
    let productIDs = new Set();
    let stotalproducts = 0;

    while (true) {
        try {
            // Fetch variations explicitly
            const result = await WooCommerce.getAsync(`products?per_page=100&page=${page}&include=${productIDs.join(',')}&status=variation`);

            await delay(1500);
            const data = JSON.parse(result.toJSON().body);

            if (page === 1) {
                stotalproducts = result.headers['x-wp-total'];
                console.log(`Total Products: ${stotalproducts}`);
            }

            if (data.length === 0) {
                break;
            }

            console.log(`Fetched ${data.length} products from page ${page}`);
            let uniqueCount = 0;

            // Process products, including variations
            data.forEach(product => {
                const isVariation = product.type === 'variation';

                if (isVariation) {
                    const variationID = product.id;
                    if (!productSKUs.has(product.sku) && !productIDs.has(variationID)) {
                        productSKUs.add(product.sku);
                        productIDs.add(variationID);
                        allProducts.push(product);
                        displayProductDetails(product);
                        uniqueCount++;
                    } else {
                        console.log(`Duplicate found (variation): SKU = ${product.sku}, ID = ${variationID}`);
                    }
                } else { // Parent product
                    productIDs.add(product.id); // Add parent ID for fetching variations in next pages
                }
            });

            console.log(`Unique products added from page ${page}: ${uniqueCount}`);
            page++;
            await delay(3000);
        } catch (error) {
            console.error('Error fetching products:', error);
            break;
        }
    }

    console.log('Fetched products count:', allProducts.length);
    console.log('Unique SKUs count:', productSKUs.size);
    console.log('Unique IDs count:', productIDs.size);

    return allProducts;
}

function delay(time) {
    return new Promise(function(resolve) {
        setTimeout(resolve, time)
    });
}

Additional Considerations:

  • API Rate Limits: WooCommerce API has rate limits. The introduced delay might help, but check the documentation and adjust accordingly.
  • Error Handling: Consider handling specific errors from the API responses.
  • Filtering: You can refine the API calls with additional filters like categories or tags if needed.

By explicitly fetching variations and keeping track of unique IDs across pages, you should be able to retrieve accurate product data with minimal duplicates.