config.yaml.sample

# This is am example configuration file for Pivot, here you can add data sources as well as configure Pivot settings
# You can start by using this sample config by running `cp config_sample.yaml config.yaml`

# The port on which the Pivot server will listen on
port: 9090

# A Druid broker node that can serve data (only used if you have Druid based data source)
druidHost: localhost:8082

# The data sources that you have configured, these will appear, in order, inside the navigation menu of Pivot
# In general there can be two types of 'engine':
#   - native: a JSON file that is crunched within plywood itself (useful for small datasets and testing)
#   - druid: a Druid dataSource
dataSources:

  # Here we have a data source based on a single day of Wikipedia
  - name: static-wiki # This will go into the URL so no fancy characters allowed

    # This is the title that will grace this data source in the the menus
    title: Static Wikipedia

    # Use the native engine, all calculations will be done in Node.JS. Good for up to 100k rows of data.
    engine: native

    # The file representing the datasource relative to repo root
    source: assets/data/wikiticker-2015-09-12-sampled.json
    # This datasource was scraped using https://github.com/implydata/wikiticker
    # GitHub does not like large files so only a sampled file is checked in
    # There is also a non-sampled file with the filter: isAnonymous == true applied, to use it set:
    # source: assets/data/wikiticker-2015-09-12-anonymous.json
    # Run `assets/data-raw/process-wikiticker-2015-09-12` to get the full example file

    # The primary time attribute of the data refers to the attribute that must always be filtered on
    # This is particularly useful for Druid data sources as they must always have a time filter.
    timeAttribute: time

    # The latest time of the data, (if not set will default to Date.now())
    maxTime: 2015-09-13T00:00:00Z

    # The default duration for the time filter (if not set P3D is used)
    defaultDuration: P1D

    # The default sort measure name (if not set the first measure name is used)
    defaultSortMeasure: delta

    # The names of dimensions that are pinned by default (in order that they will appear in the pin bar
    defaultPinnedDimensions:
      - channel
      - namespace
      - isRobot

    # The list of dimensions defined in the UI. The order here will be reflected in the UI
    dimensions:
      # A general dimension looks like so:
      # - name: channel
      #   ^ the name of the dimension as used in the URL (you should try not to change these)
      #
      #   title: The Channel
      #   ^ (optional) the human readable title. If not set a title is generated from the 'name'
      #
      #   type: STRING
      #   ^ (optional) the Plywood type of the dimension. Defaults to STRING
      #
      #   expression: $channel
      #   ^ (optional) the Plywood bucketing expression for this dimension. Defaults to '$name'
      #     if, say, channel was called 'cnl' in the data you would put '$cnl' here

      - name: time
        type: TIME

      - name: channel

      - name: cityName

      - name: comment

      - name: countryIso
        title: Country ISO
        expression: $countryIsoCode

      - name: countryName

      - name: isAnonymous

      - name: isMinor

      - name: isNew

      - name: isRobot

      - name: isUnpatrolled

      - name: metroCode

      - name: namespace

      - name: page

      - name: regionIso
        title: Region ISO
        expression: $regionIsoCode

      - name: regionName

      - name: user

    # The list of measures defined in the UI. The order here will be reflected in the UI
    measures:
      # A general measure looks like so:
      #
      # - name: avg_revenue
      #   ^ the name of the dimension as used in the URL (you should try not to change these)
      #
      #   title: Average Revenue
      #   ^ (optional) the human readable title. If not set a title is generated from the 'name'
      #
      #   expression: $main.average($revenue)
      #   ^ (optional) the Plywood bucketing expression for this dimension. Defaults to '$main.sum($name)'
      #     this is the place to define your fancy formulas

      - name: count
        title: Rows
        expression: $main.count()

      - name: delta

      - name: avg_delta
        expression: $main.average($delta)

      - name: added

      - name: avg_added
        expression: $main.average($added)

      - name: deleted

      - name: avg_deleted
        expression: $main.average($deleted)

      - name: unique_users
        title: Unique Users
        expression: $main.countDistinct($user)


  # Here is an example of a Druid data source, this one is taken from the Druid wikipedia demo
  # It will work for you if you have setup the demo Wikipedia Editstream scraper
  - name: wiki
    title: Wikipedia Edits
    engine: druid # Set the engine to druid
    source: wikipedia # The druid dataSource

    timeAttribute: time # The time attribute (this needs to be set for Druid, but could be anything. You should leave it as 'time')

    dimensions:
      - name: time
        type: TIME

      - name: namespace

      - name: language

      - name: page

      - name: user

      - name: country

      - name: city

      - name: region

    measures:
      - name: count

      - name: delta

      - name: avg_delta
        expression: $main.sum($delta) / $main.sum($count)

      - name: added

      - name: avg_added
        expression: $main.sum($added) / $main.sum($count)

      - name: deleted

      - name: avg_deleted
        expression: $main.sum($deleted) / $main.sum($count)

      - name: unique_users
        title: Unique Users
        expression: $main.countDistinct($user_unique)


  # Here is an example of a Druid data source with cool crazy things in it for education
  # in this example custom aggregations are passed directly into Druid, this is useful if
  # you have custom sketches or trying to do something that Plywood does not (yet) support.
  # If you use this for something other than a custom sketch I would appreciate it if you could
  # file an issue in Plywood (https://github.com/implydata/plywood).
  - name: wiki-crazy
    title: Wikipedia Crazy
    engine: druid
    source: wikipedia

    timeAttribute: time # The time attribute (this needs to be set for Druid, but could be anything. You should leave it as 'time')

    options:
      customAggregations:
        boring:
          #accessType  <-- this is how this aggregate will be accessed from a postAgg (default is 'fieldAccess')
          aggregation:
            type: longSum
            fieldName: added

        mod1337:
          aggregation:
            type: javascript
            fieldNames: ['added']
            fnAggregate: "function(current, added) { return (current + added) % 1337 }"
            fnCombine: "function(partialA, partialB) { return (partialA + partialB) % 1337 }"
            fnReset: "function() { return 0; }"

    dimensions:
      - name: time
        type: TIME

      - name: namespace

      - name: language

      - name: page

      - name: user

      - name: country

      - name: city

      - name: region

    measures:
      - name: count

      - name: added

      - name: boring_added
        # Using the custom stuff defined above here
        expression: $main.custom(boring)

      - name: added1337
        expression: $main.custom(mod1337)

      - name: combined
        # Custom aggregates can be used in mathematical expressions
        expression: ($main.custom(boring) - $main.custom(mod1337)) / 1337