Skip to content

Document classification

Document classification determines what the document text is about by mapping it to the categories of a tree.

The Edge NL API carrying out classification have the following endpoint:

/api/analyze

In the reference section of this manual you will find all the information you need to perform document classification, specifically:

Here is an example of performing document classification on a short English test:

This example is based on the Python client you can find on GitHub.

The client gets user credentials from two environment variables:

EAI_USERNAME
EAI_PASSWORD

Set those variables with you account credentials before running the sample program below.

The program prints the list of categories.

from expertai.nlapi.edge.client import ExpertAiClient
client = ExpertAiClient()

text = "Michael Jordan was one of the best basketball players of all time. Scoring was Jordan's stand-out skill, but he still holds a defensive NBA record, with eight steals in a half."

output = client.classification(text)

print("Tab separated list of categories:")

for category in output.categories:
    print(category.id_, category.hierarchy, sep="\t")

This example is based on the Java client you can find on GitHub.

The client gets user credentials from two environment variables:

EAI_USERNAME
EAI_PASSWORD

Set those variables with you account credentials before running the sample program below.

The program prints the JSON response.

import ai.expert.nlapi.security.Authentication;
import ai.expert.nlapi.security.Authenticator;
import ai.expert.nlapi.security.BasicAuthenticator;
import ai.expert.nlapi.security.DefaultCredentialsProvider;
import ai.expert.nlapi.v2.API;
import ai.expert.nlapi.v2.edge.Analyzer;
import ai.expert.nlapi.v2.edge.AnalyzerConfig;
import ai.expert.nlapi.v2.message.AnalyzeResponse;
import ai.expert.nlapi.v2.model.Category;

import java.util.List;

public class Main {

    public static Authentication createAuthentication() throws Exception {
        DefaultCredentialsProvider credentialsProvider = new DefaultCredentialsProvider();
        Authenticator authenticator = new BasicAuthenticator(credentialsProvider);
        return new Authentication(authenticator);
    }

    public static Analyzer createAnalyzer() throws Exception {
        return new Analyzer(AnalyzerConfig.builder()
                                          .withVersion(API.Versions.V2)
                                          .withHost(API.DEFAULT_EDGE_HOST)
                                          .withAuthentication(createAuthentication())
                                          .build());
    }

    public static void main(String[] args) {
        try {
            String text = "Michael Jordan was one of the best basketball players of all time. Scoring was Jordan's stand-out skill, but he still holds a defensive NBA record, with eight steals in a half.";

            Analyzer analyzer = createAnalyzer();

            AnalyzeResponse classification = analyzer.classification(text);


            // Output JSON representation

            classification.prettyPrint();


            // Tab separated list of categories.

            System.out.println("Tab separated list of categories:");
            List<Category> categories = classification.getData().getCategories();

            categories.stream().forEach(c -> System.out.println(c.getId() + "\t" + c.getHierarchy()));
        }
        catch(Exception ex) {
            ex.printStackTrace();
        }
    }
}