cassandra-pr mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dineshjoshi <...@git.apache.org>
Subject [GitHub] cassandra pull request #244: Refactor and add samplers for CASSANDRA-14436
Date Sat, 04 Aug 2018 02:43:41 GMT
Github user dineshjoshi commented on a diff in the pull request:

    https://github.com/apache/cassandra/pull/244#discussion_r207697430
  
    --- Diff: src/java/org/apache/cassandra/metrics/FrequencySampler.java ---
    @@ -0,0 +1,105 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.cassandra.metrics;
    +
    +import java.util.Collections;
    +import java.util.List;
    +import java.util.stream.Collectors;
    +
    +import org.slf4j.Logger;
    +import org.slf4j.LoggerFactory;
    +
    +import com.clearspring.analytics.stream.StreamSummary;
    +
    +/**
    + * Find the most frequent sample. A sample adds to the sum of its key ie
    + * <p>add("x", 10); and add("x", 20); will result in "x" = 30</p> This uses
StreamSummary to only store the
    + * approximate cardinality (capacity) of keys. If the number of distinct keys exceed
the capacity, the error of the
    + * sample may increase depending on distribution of keys among the total set.
    + * 
    + * @param <T>
    + */
    +public abstract class FrequencySampler<T> extends Sampler<T>
    +{
    +    private static final Logger logger = LoggerFactory.getLogger(FrequencySampler.class);
    +    private boolean enabled = false;
    +
    +    private StreamSummary<T> summary;
    +
    +    /**
    +     * Start to record samples
    +     *
    +     * @param capacity
    +     *            Number of sample items to keep in memory, the lower this is
    +     *            the less accurate results are. For best results use value
    +     *            close to cardinality, but understand the memory trade offs.
    +     */
    +    public synchronized void beginSampling(int capacity)
    +    {
    +        if (!enabled)
    +        {
    +            summary = new StreamSummary<T>(capacity);
    +            enabled = true;
    +        }
    +    }
    +
    +    /**
    +     * Call to stop collecting samples, and gather the results
    +     * @param count Number of most frequent items to return
    +     */
    +    public synchronized List<Sample<T>> finishSampling(int count)
    +    {
    +        List<Sample<T>> results = Collections.EMPTY_LIST;
    +        if (enabled)
    +        {
    +            enabled = false;
    +            results = summary.topK(count)
    +                             .stream()
    +                             .map(c -> new Sample<T>(c.getItem(), c.getCount(),
c.getError()))
    +                             .collect(Collectors.toList());
    +        }
    +        return results;
    +    }
    +
    +    protected synchronized void insert(final T item, final long value)
    +    {
    +        // samplerExecutor is single threaded but still need
    +        // synchronization against jmx calls to finishSampling
    +        if (enabled && value > 0)
    +        {
    +            try
    +            {
    +                summary.offer(item, (int) Math.min(value, Integer.MAX_VALUE));
    +            } catch (Exception e)
    +            {
    +                logger.trace("Failure to offer sample", e);
    +            }
    +        }
    +    }
    +
    +    public boolean isEnabled()
    +    {
    +        return enabled;
    +    }
    +
    +    public void setEnabled(boolean enabled)
    +    {
    +        this.enabled = enabled;
    --- End diff --
    
    This allows the user of the class to enable the `FrequencySampler` without actually initializing
the `summary` variable. This will cause an NPE.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: pr-unsubscribe@cassandra.apache.org
For additional commands, e-mail: pr-help@cassandra.apache.org


Mime
View raw message