4

I have a JS object norm which I want to use inside mongo aggregation pipeline, like this:

var norm = { 
    1: 1, 
    2: 1.16,
    3: 1.413,
    4: 1.622,
    5: 1.6,
    6: 1.753,
    7: 3.001,
    8: 2.818,
    9: 3.291,
    10: 2.824,
    11: 2.993,
    12: 2.699,
    13: 1.099,
    14: 1.035,
    15: 1.172,
    16: 1.013,
    17: 0.9936,
    18: 1.069
};

db.mycoll.aggregate([
    {$match : 
        {"_id.day" : ISODate("2014-06-19T00:00:00.000Z"), 
         "_id.lt" : "l",
         "_id.rt" : "rltdlsts",
         "_id.m": false   }
    },

    {$unwind: '$value.rl'},

    {$match: {'value.rl.p': {$gte: 1, $lte: 18} } },

    {$group: {_id: '$value.rl.a', 
                v: {$sum: '$value.rl.v'},
                nv: { $sum: { $multiply: [ norm['$value.rl.p'], '$value.rl.v' ] } },
                c: {$sum: '$value.rl.c'}
            }},

    {$project: {
        _id: "$_id",
        'v': "$v",
        'c': "$c",
        'nv': "$nv"
      }
    },

    {$sort: {'_id': 1}}
])

I get results like this, where nv is always 0:

{
    "result" : [ 
        {
            "_id" : 1,
            "v" : 89172,
            "nv" : 0,
            "c" : 604
        }, 
        {
            "_id" : 4,
            "v" : 67872,
            "nv" : 0,
            "c" : 296
        }, 
        {
            "_id" : 5,
            "v" : 33999,
            "nv" : 0,
            "c" : 13
        }, 
        {
            "_id" : 6,
            "v" : 4727,
            "nv" : 0,
            "c" : 6
        }, 
        {
            "_id" : 8,
            "v" : 913118,
            "nv" : 0,
            "c" : 14055
        }, 
        {
            "_id" : 9,
            "v" : 204099,
            "nv" : 0,
            "c" : 3021
        }, 
        {
            "_id" : 11,
            "v" : 151711,
            "nv" : 0,
            "c" : 1075
        }, 
        {
            "_id" : 12,
            "v" : 196369,
            "nv" : 0,
            "c" : 601
        }, 
        {
            "_id" : 13,
            "v" : 277705,
            "nv" : 0,
            "c" : 2302
        }, 
        {
            "_id" : 14,
            "v" : 64005,
            "nv" : 0,
            "c" : 970
        }, 
        {
            "_id" : 15,
            "v" : 54558,
            "nv" : 0,
            "c" : 326
        }, 
        {
            "_id" : 16,
            "v" : 74576,
            "nv" : 0,
            "c" : 305
        }, 
        {
            "_id" : 17,
            "v" : 1144,
            "nv" : 0,
            "c" : 1
        }, 
        {
            "_id" : 18,
            "v" : 1023,
            "nv" : 0,
            "c" : 0
        }, 
        {
            "_id" : 19,
            "v" : 54511,
            "nv" : 0,
            "c" : 98
        }, 
        {
            "_id" : 20,
            "v" : 674,
            "nv" : 0,
            "c" : 0
        }, 
        {
            "_id" : 21,
            "v" : 3359,
            "nv" : 0,
            "c" : 4
        }, 
        {
            "_id" : 22,
            "v" : 496402,
            "nv" : 0,
            "c" : 3786
        }, 
        {
            "_id" : 23,
            "v" : 293212,
            "nv" : 0,
            "c" : 1904
        }, 
        {
            "_id" : 24,
            "v" : 764087,
            "nv" : 0,
            "c" : 8847
        }, 
        {
            "_id" : 25,
            "v" : 291358,
            "nv" : 0,
            "c" : 7012
        }, 
        {
            "_id" : 28,
            "v" : 2933,
            "nv" : 0,
            "c" : 27
        }
    ],
    "ok" : 1
}

How to fix this?

Neil Lunn
  • 148,042
  • 36
  • 346
  • 317
arun
  • 10,685
  • 6
  • 59
  • 81
  • 2
    You can't do that because the pipeline is evaluated once and then sent to the server for execution where `norm` isn't available. Map-Reduce is probably your best bet here. – JohnnyHK Jun 21 '14 at 02:29
  • 2
    right, you can't do exactly what you are doing, but you can set up an aggregation pipeline with conditional expressions to use the correct multiplier depending on the value of '$value.rl.p' - it won't look pretty, but it can be done programmatically. – Asya Kamsky Jun 21 '14 at 14:03
  • Thx, @JohnnyHK Will MR my way out :) – arun Jun 21 '14 at 15:01

1 Answers1

8

There are a few ways to approach this under the aggregation framework without resorting to mapReduce. Recent MongoDB 2.6 and greater versions have some operators to help here using $let and $map for defining a variable and processing the array.

Your external declaration looks better for these purposes like this:

var norm = [
    { "key": 1, "value": 1 }, 
    { "key": 2, "value": 1.16 },
    { "key": 3, "value": 1.413 },
    { "key": 4, "value": 1.622 },
    { "key": 5, "value":  1.6 },
    { "key": 6, "value": 1.753 },
    { "key": 7, "value":  3.001 },
    { "key": 8, "value":  2.818 },
    { "key": 9, "value": 3.291 },
    { "key": 10,"value": 2.824 },
    { "key": 11, "value": 2.993 },
    { "key": 12, "value": 2.699 },
    { "key": 13, "value": 1.099 },
    { "key": 14, "value": 1.035 },
    { "key": 15, "value": 1.172 },
    { "key": 16, "value": 1.013 },
    { "key": 17, "value": 0.9936 },
    { "key": 18, "value": 1.069 }
];

And then process the aggregate statement:

db.mycoll.aggregate([
    { "$match": {
        "_id.day" : ISODate("2014-06-19T00:00:00.000Z"), 
        "_id.lt" : "l",
        "_id.rt" : "rltdlsts",
        "_id.m": false
    }},
    { "$unwind": "$value.rl" },

    { "$match": { "value.rl.p": { "$gte": 1, "$lte": 18 } } },

    { "$project": {
        "value": 1,
        "norm": {
            "$let": {
               "vars": {
                   "norm": norm
               },
               "in": {
                   "$setDifference": [
                       { "$map": {
                           "input": "$$norm",
                           "as": "norm",
                           "in": {
                               "$cond": [
                                   { "$eq": [ "$$norm.key", "$value.rl.p" ] },
                                   "$$norm.value",
                                   false
                               ]
                           }
                       }},
                       [false]
                   ]
               }
            }               
        }
    }},
    { "$unwind": "$norm" }

    { "$group": {
        "_id": "$value.rl.a", 
        "v": { "$sum": "$value.rl.v" },
        "c": { "$sum": "$value.rl.c" },
        "nv": { "$sum": { "$multiply": [ "$norm", "$value.rl.v" ] } }
    }}
])

In that $project stage you are actually injecting the external declaration as an array variable into the pipeline and then processing each element to match your existing "value.rl.p" keys. This only returns the single matching value, so the further use of $unwind really only just makes the single element array result a singular value for use in the later $group statement.

The traditional approach in earlier versions where the operators are not supported is to use a nested $cond statement to evaluate each value:

db.mycoll.aggregate([
    { "$match": {
        "_id.day" : ISODate("2014-06-19T00:00:00.000Z"), 
        "_id.lt" : "l",
        "_id.rt" : "rltdlsts",
        "_id.m": false
    }},

    { "$unwind": "$value.rl" },

    { "$match": { "value.rl.p": { "$gte": 1, "$lte": 18 } } },

    { "$group": {
        "_id": "$value.rl.a", 
        "v": { "$sum": "$value.rl.v" },
        "c": { "$sum": "$value.rl.c" },
        "nv": { "$sum": { "$multiply": [ 
            { "$cond": [
                { "$eq": [ "$value.rl.p", 2 },
                1.16
                { "$cond": [
                    { "$eq": [ "$value.rl.p", 3 },
                    1.413,
                    { "$cond": [
                        { "$eq": [ "$value.rl.p", 4 },
                        1.622,
                        { "$cond": [
                            { "$eq": [ "$value.rl.p", 5 },
                            1.6,
                            { "$cond": [
                                { "$eq": [ "$value.rl.p", 6 },
                                1.753,
                                { "$cond": [
                                    { "$eq": [ "$value.rl.p", 7 },
                                    3.001,
                                    { "$cond": [
                                        { "$eq": [ "$value.rl.p", 8 },
                                        2.818,
                                        { "$cond": [
                                            { "$eq": [ "$value.rl.p", 9 },
                                            3.291,
                                            { "$cond": [
                                                { "$eq": [ "$value.rl.p", 10 },
                                                2.824,
                                                { "$cond": [
                                                    { "$eq": [ "$value.rl.p", 11 },
                                                    2.993,
                                                    { "$cond": [
                                                        { "$eq": [ "$value.rl.p", 12 },
                                                        2.699,
                                                        { "$cond": [
                                                            { "$eq": [ "$value.rl.p", 13 },
                                                            1.099,
                                                            { "$cond": [
                                                                { "$eq": [ "$value.rl.p", 14 },
                                                                1.035,
                                                                { "$cond": [
                                                                    { "$eq": [ "$value.rl.p", 15 },
                                                                    1.172,
                                                                    { "$cond": [
                                                                        { "$eq": [ "$value.rl.p", 16 },
                                                                        1.013,
                                                                        { "$cond": [
                                                                            { "$eq": [ "$value.rl.p", 17 },
                                                                            0.9936,
                                                                            { "$cond": [
                                                                                { "$eq": [ "$value.rl.p", 18 },
                                                                                1.069,
                                                                                1
                                                                            ]}
                                                                        ]}
                                                                    ]}
                                                                ]}
                                                            ]}
                                                        ]}
                                                    ]}
                                                ]}
                                            ]}
                                        ]}
                                    ]}
                                ]}
                            ]}
                        ]}
                    ]}
                ]}
            ]},
            "$value.rl.v" 
        ]}}
    }}
])

It looks noisy but it is the next most efficient form to the query previously shown above. In reality you would generate the pipeline stage is a similar way to as shown here.

Community
  • 1
  • 1
Neil Lunn
  • 148,042
  • 36
  • 346
  • 317