Working with Clustering

About

Clustering is a data mining technique used to group similarly related objects together in groups or collections. It’s an unsupervised classification method, meaning that data is classified without any pre-trained labels or categories, and is used for exploratory data analysis to find hidden patterns or groupings in data. It’s a common technique used in text mining.

Clustering with the News API

Clustering is now available for the following News API endpoints;

We’ve also added three algorithms that you can choose from when clustering, depending on the type of data and format of results that you require.

1. STC (Suffix Tree Clustering)

STC is a linear time clustering algorithm (linear in the size of the document set), which is based on identifying phrases that are common to groups of documents. A phrase is an ordered sequence of one or more words. This algorithm treats documents as a string of words rather than a collection of words and thus operates using the proximity of information between words. Learn more

2. K-means

K-means clustering aims to partition n observations into k clusters in which each observation belongs to the cluster with the nearest mean, serving as a prototype of the cluster. Learn more

3. Lingo

Algorithm for clustering search results, which emphasizes cluster description quality, using algebraic transformations of the term-document matrix and frequent phrase extraction using suffix arrays. Learn more

Note

To get clusters with better quality, set the langauge parameter.

Examples

The following example shows results for the “Brexit AND Ireland” search query, using the lingo algorithm and only returning English language stories.

var AylienNewsApi = require('aylien-news-api');

var apiInstance = new AylienNewsApi.DefaultApi();

// Configure API key authorization: app_id
var app_id = apiInstance.apiClient.authentications['app_id'];
app_id.apiKey = "YOUR_APP_ID";

// Configure API key authorization: app_key
var app_key = apiInstance.apiClient.authentications['app_key'];
app_key.apiKey = "YOUR_APP_KEY";

var opts = {
  'text': 'ireland AND brexit',
  'language': ['en'],
  'cluster': true,
  'cluster_algorithm': 'lingo',
  'per_page': 100
};

var callback = function(error, data, response) {
  if (error) {
    console.error(error);
  } else {
    console.log('API called successfully. Returned data: ' + JSON.stringify(data));
  }
};
apiInstance.listStories(opts, callback);
import aylien_news_api
from aylien_news_api.rest import ApiException

# Configure API key authorization: app_id
aylien_news_api.configuration.api_key['X-AYLIEN-NewsAPI-Application-ID'] = 'YOUR_APP_ID'
# Configure API key authorization: app_key
aylien_news_api.configuration.api_key['X-AYLIEN-NewsAPI-Application-Key'] = 'YOUR_APP_KEY'

# create an instance of the API class
api_instance = aylien_news_api.DefaultApi()

text = 'ireland AND brexit'
language = ['en']
cluster = True
cluster_algorithm = 'lingo'
per_page = 100

try:
    # List stories
    api_response = api_instance.list_stories(
      language=language,
      text=text,
      cluster=cluster,
      cluster_algorithm=cluster_algorithm,
      per_page=per_page
    )
    print(api_response)
except ApiException as e:
    print("Exception when calling DefaultApi->list_stories: %s\n" % e)
<?php
require_once(__DIR__ . '/vendor/autoload.php');

// Configure API key authorization: app_id
Aylien\NewsApi\Configuration::getDefaultConfiguration()->setApiKey('X-AYLIEN-NewsAPI-Application-ID', 'YOUR_APP_ID');

// Configure API key authorization: app_key
Aylien\NewsApi\Configuration::getDefaultConfiguration()->setApiKey('X-AYLIEN-NewsAPI-Application-Key', 'YOUR_APP_KEY');

$api_instance = new Aylien\NewsApi\Api\DefaultApi();

$opts = array(
  'language' => ['en'],
  'text' => 'ireland AND brexit',
  'cluster' => true,
  'cluster_algorithm' => 'lingo',
  'per_page' => 100
);

try {
    $result = $api_instance->listStories($opts);
    print_r($result);
} catch (Exception $e) {
    echo 'Exception when calling DefaultApi->listStories: ', $e->getMessage(), "\n";
}
?>
package com.aylien.newsapisample;

import com.aylien.newsapi.*;
import com.aylien.newsapi.auth.*;
import com.aylien.newsapi.models.*;
import com.aylien.newsapi.parameters.*;
import com.aylien.newsapi.api.DefaultApi;

import java.util.*;

public class Main {

    public static void main(String[] args) {
        ApiClient defaultClient = Configuration.getDefaultApiClient();

        // Configure API key authorization: app_id
        ApiKeyAuth app_id = (ApiKeyAuth) defaultClient.getAuthentication("app_id");
        app_id.setApiKey("YOUR_APP_ID");

        // Configure API key authorization: app_key
        ApiKeyAuth app_key = (ApiKeyAuth) defaultClient.getAuthentication("app_key");
        app_key.setApiKey("YOUR_APP_KEY");

        DefaultApi apiInstance = new DefaultApi();

        StoriesParams.Builder storiesBuilder = StoriesParams.newBuilder();

        List<String> language = Arrays.asList("en");
        String text = "ireland AND brexit";
        boolean cluster = true;
        String clusterAlgorithm = "lingo";
        Integer perPage = 100;

        storiesBuilder.setLanguage(language);
        storiesBuilder.setText(text);
        storiesBuilder.setCluster(cluster);
        storiesBuilder.setClusterAlgorithm(clusterAlgorithm);
        storiesBuilder.setPerPage(perPage);

        try {
            Stories result = apiInstance.listStories(storiesBuilder.build());
            System.out.println(result);
        } catch (ApiException e) {
            System.err.println("Exception when calling DefaultApi#listStories");
            e.printStackTrace();
        }
    }
}
# Load the gem
require 'aylien_news_api'

# Setup authorization
AylienNewsApi.configure do |config|
  # Configure API key authorization: app_id
  config.api_key['X-AYLIEN-NewsAPI-Application-ID'] = 'YOUR_APP_ID'
  
  # Configure API key authorization: app_key
  config.api_key['X-AYLIEN-NewsAPI-Application-Key'] = 'YOUR_APP_KEY'
end

api_instance = AylienNewsApi::DefaultApi.new

opts = {
  :language => ["en"],
  :text => "ireland AND brexit",
  :cluster => true,
  :cluster_algorithm => "lingo",
  :per_page => 100
}

begin
  #List stories
  result = api_instance.list_stories(opts)
  puts result
rescue AylienNewsApi::ApiError => e
  puts "Exception when calling DefaultApi->list_stories: #{e}"
end
package main

// Import the library
import (
	"fmt"
	newsapi "github.com/AYLIEN/aylien_newsapi_go"
)

func main() {
	api := newsapi.NewDefaultApi()

	// Configure API key authorization: app_id
	api.Configuration.APIKeyPrefix["X-AYLIEN-NewsAPI-Application-ID"] = "YOUR_APP_ID"

	// Configure API key authorization: app_key
	api.Configuration.APIKeyPrefix["X-AYLIEN-NewsAPI-Application-Key"] = "YOUR_APP_KEY"

	text := "ireland AND brexit"
	cluster := "true"
	clusterAlgorithm := "lingo"
	language := []string{"en"}
	perPage := int32(100)

	storiesParams := &newsapi.StoriesParams{
		Language:         language,
		Text:             text,
		Cluster:          cluster,
		ClusterAlgorithm: clusterAlgorithm,
		PerPage:          perPage}

	storiesResponse, res, err := api.ListStories(storiesParams)
	if err != nil {
		panic(err)
	}
	_ = res

	fmt.Println(storiesResponse)
}
using System;
using Aylien.NewsApi.Api;
using Aylien.NewsApi.Client;
using Aylien.NewsApi.Model;
using System.Collections.Generic;

namespace ConsoleApplicationExample
{
    class Program
    {
        static void Main(string[] args)
        {
            // Configure API key authorization: app_id
            Configuration.Default.ApiKey.Add("X-AYLIEN-NewsAPI-Application-ID", "YOUR_APP_ID");

            // Configure API key authorization: app_key
            Configuration.Default.ApiKey.Add("X-AYLIEN-NewsAPI-Application-Key", "YOUR_APP_KEY");

            var apiInstance = new DefaultApi();

            var text = "ireland AND brexit";
            var cluster = true;
            var clusterAlgorithm = "lingo";
            var language = new List<String> { "en" };
            var perPage = 100;

            try
            {
                // List stories
                Stories result = apiInstance.ListStories(
                    text: text,
                    cluster: cluster,
                    language: language,
                    clusterAlgorithm: clusterAlgorithm,
                    perPage: perPage
                );
                Console.WriteLine(result);
            }
            catch (Exception e)
            {
                Console.WriteLine("Exception when calling DefaultApi.ListStories: " + e.Message);
            }
        }
    }
}