lucene-solr-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Karthik Ramachandran <kramachand...@commvault.com>
Subject RE: JSON facet bucket list not correct with sharded query
Date Wed, 05 Apr 2017 00:35:08 GMT
Since the attachment was removed sending the code.

import java.util.List;
import java.util.Random;
import java.util.UUID;

import org.apache.solr.client.solrj.SolrRequest.METHOD;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.util.NamedList;

public class JsonFacetPagingTest {
  public static void main(String[] args) throws Throwable {
    final String SOLR_URL = "http://localhost:8983/solr";
    JsonFacetPagingTest tests = new JsonFacetPagingTest();
    // Uncomment below to add docs to the core.
    // tests.addDocumentsToCore(SOLR_URL, "fileduplicate01", 0, 4);
    // tests.addDocumentsToCore(SOLR_URL, "fileduplicate02", 0, 600);
    // tests.addDocumentsToCore(SOLR_URL, "fileduplicate02", 100, 600);
    // tests.addDocumentsToCore(SOLR_URL, "fileduplicate02", 200, 600);
    // tests.addDocumentsToCore(SOLR_URL, "fileduplicate02", 300, 600);
    // tests.addDocumentsToCore(SOLR_URL, "fileduplicate02", 400, 600);
    // tests.addDocumentsToCore(SOLR_URL, "fileduplicate02", 700, 800);

    // Uncomment below to run the queries in pages.
    // tests.testPaging(SOLR_URL, "fileduplicate01", 15);
  }

  protected void addDocumentsToCore(String solrURL, String coreName, int startIndex, int numberOfRecords)
      throws Exception {
    int endIndex = startIndex + numberOfRecords;
    if (numberOfRecords > 0 && endIndex > startIndex) {
      Random ran = new Random();
      HttpSolrClient client = new HttpSolrClient.Builder(solrURL).build();
      for (int index = startIndex; index <= endIndex; ++index) {
        SolrInputDocument doc = new SolrInputDocument();
        doc.addField("id", UUID.randomUUID().toString());
        doc.addField("filename", "filename-" + index);
        doc.addField("size", (1024L * ran.nextInt()));
        client.add(coreName, doc);
      }
      client.commit(coreName);
      client.close();
    }
  }

  @SuppressWarnings("unchecked")
  protected void testPaging(String solrURL, String coreName, int limit) throws Exception {
    Long offset = 0L;
    Long numBuckets = 0L;
    List<?> buckets = null;
    String facet = "{'duplicates':{'type':'terms','field':'filename','limit':%d,'offset':%d,'mincount':2,'numBuckets':true,'sort':'sum
desc','facet': {'sum':'sum(size)'}}}";
    HttpSolrClient client = new HttpSolrClient.Builder(solrURL).build();
    ModifiableSolrParams params = new ModifiableSolrParams();
    params.set(CommonParams.Q, "*:*");
    params.set(CommonParams.START, String.valueOf(CommonParams.START_DEFAULT));
    params.set(CommonParams.ROWS, String.valueOf(CommonParams.START_DEFAULT));
    params.set(ShardParams.SHARDS, solrURL + "/fileduplicate01," + solrURL + "/fileduplicate02");
    do {
      params.set("json.facet", String.format(facet, limit, offset));
      QueryResponse queryResponse = client.query(coreName, params, METHOD.POST);
      if (queryResponse != null && queryResponse.getResponse() != null) {
        NamedList<Object> facets = (NamedList<Object>) queryResponse.getResponse().get("facets");
        NamedList<Object> duplicates = (NamedList<Object>) facets.get("duplicates");
        numBuckets = ((Number) duplicates.get("numBuckets")).longValue();
        buckets = (List<?>) duplicates.get("buckets");
        System.out.println(String.format("Result for Offset:%4d ==> Number of Buckets:%4d,
Bucket Size:%4d, vals:%s",
            offset, numBuckets, buckets.size(), buckets));
        offset += limit;
      }
    } while (buckets != null && buckets.size() != 0 && offset <= numBuckets);
    client.close();
  }
}

With Thanks & Regards
Karthik Ramachandran


From: Karthik Ramachandran
Sent: Tuesday, April 4, 2017 8:32 PM
To: 'solr-user@lucene.apache.org' <solr-user@lucene.apache.org>
Subject: JSON facet bucket list not correct with sharded query

We are using JSON facet to list files that are duplicate(mincount: 2) in pages, after 2-3
page we don't any result even though there are more results.

Schema:
  <field name="filename" type="string" indexed="true" docValues="true"/>
  <field name="id" type="string" docValues="true" indexed="true" required="true"/>
  <field name="size" type="long" indexed="true" docValues="true"/>

Query:
http://localhost:8983/solr/fileduplicate01/select/?wt=json&q=*:*&start=0&rows=0&shards=localhost:8983/solr/fileduplicate01,localhost:8983/solr/fileduplicate02&json.facet={
"duplicates":{"type":"terms","field":"filename","limit":15,"offset":0,"mincount":2,"numBuckets":true,"sort":"sum
desc","facet": {"sum":"sum(size)"}}}

Create 2 cores named fileduplicate01 and fileduplicate01 with the same schema and run the
attached java to populate the data and run the query.

Any help is appreciated.


With Thanks & Regards
Karthik Ramachandran

***************************Legal Disclaimer***************************
"This communication may contain confidential and privileged material for the
sole use of the intended recipient. Any unauthorized review, use or distribution
by others is strictly prohibited. If you have received the message by mistake,
please advise the sender by reply email and delete the message. Thank you."
**********************************************************************

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message