3

I have a user collection with following data

    [
  {
    "user_id": "5625c95ac2d34f27148b64fa",
    "friend_id": "561f40bac2d34f17148b462c"
  },
  {
    "user_id": "562744ccc2d34f27148b6eb7",
    "friend_id": "561f40bac2d34f17148b462c"
  },
  {
    "user_id": "56248eb9c2d34f2f148b5a18",
    "friend_id": "561f40bac2d34f17148b462c"
  },
  {
    "user_id": "561f40bac2d34f17148b462c",
    "friend_id": "561f3e06c2d34f27148b45f6"
  },
  {
    "user_id": "561f40bac2d34f17148b462c",
    "friend_id": "5620de97c2d34f2f148b578f"
  },
  {
    "user_id": "56276b52c2d34f27148b7128",
    "friend_id": "561f40bac2d34f17148b462c"
  },
  {
    "user_id": "561f40bac2d34f17148b462c",
    "friend_id": "56276b52c2d34f27148b7128"
  }
]

i need to fetch the documents in which combination of user_id and friend_id not repeated. i.e in the above example last two documents user_id repeated in friend_id of next document.

I tried with mongo aggrigate and group by but could not reduce it.

Blakes Seven
  • 49,422
  • 14
  • 129
  • 135
Ramesh Paul
  • 840
  • 4
  • 15
  • 31

1 Answers1

3

In order to do this you basically need to combine both user_id and friend_id values in a uniquely sorted combination. This means creating an array for each document with those members and sorting that array so that the order is always the same.

Then you can $group on that sorted array content to see which documents contain that same combination and then only return those that do not share that same combination.

This leads to this aggregate statement:

db.collection.aggregate([
    { "$project": {
        "user_id": 1,
        "friend_id": 1,
        "combined": {
            "$map": {
                "input": ["A","B"],
                "as": "el",
                "in": {
                    "$cond": [ 
                        { "$eq": [ "$$el", "A" ] },
                        "$user_id",
                        "$friend_id"
                    ]
                }
            }
        }            
    }},
    { "$unwind": "$combined" },
    { "$sort": { "combined": 1 } },
    { "$group": {
        "_id": "$_id",
        "combined": { "$push": "$combined" },
        "user_id": { "$first": "$user_id" },
        "friend_id": { "$first": "$friend_id"  }
    }},
    { "$group": {
        "_id": "$combined",
        "docs": { "$push": {
            "_id": "$_id",
            "user_id": "$user_id",
            "friend_id": "$friend_id"
        }}
    }},
    { "$redact": {
        "$cond": {
            "if": { "$ne": [{ "$size": "$docs" }, 1] },
            "then": "$$PRUNE",
            "else": "$$KEEP"
        }
    }}
])

The PHP translation for laravel means to need to access the raw collection object from the manager, where "collection" is the actual name of the collection in MongoDB:

$result = DB::collection("collection")->raw(function($collection) {
    return $collection->aggregate(
        array(
            array(
                '$project' => array(
                    'user_id' => 1,
                    'friend_id' => 1,
                    'combined' => array(
                        '$map' => array(
                            'input' => array("A","B"),
                            'as' => 'el',
                            'in' => array(
                                '$cond' => array(
                                    array( '$eq' => array( '$el', 'A' ) ),
                                    '$user_id',
                                    '$friend_id'
                                )
                            )
                        )
                    )
                )
            ),
            array( '$unwind' =>'$combined' ),
            array( '$sort' => array( 'combined' => 1 ) ),
            array(
                '$group' => array(
                    '_id' => '$_id',
                    'combined' => array( '$push' => '$combined' ),
                    'user_id' => array( '$first' => '$user_id' ),
                    'friend_id' => array( '$first' => '$friend_id' )
                )
            ),
            array(
                '$group' => array(
                    '_id' => '$combined',
                    'docs' => array(
                        '$push' => array(
                            '_id' => '$_id',
                            'user_id' => '$user_id',
                            'friend_id' => 'friend_id'
                        )
                    )
                )
            ),
            array(
                '$redact' => array(
                    '$cond' => array(
                        'if' => array( '$ne' => array( array( '$size' => '$docs'), 1) ),
                        'then' => '$$PRUNE',
                        'else' => '$$KEEP'
                    )
                )
            )
        )
    );
});

Or if your MongoDB version is less than 2.6, and you lack operators like $map and $redact, then you can still do this, but not as efficiently:

$result = DB::collection("collection")->raw(function($collection) {
    return $collection->aggregate(
        array(
            array(
                '$project' => array(
                    'user_id' => 1,
                    'friend_id' => 1,
                    'type' => array( '$const' => array( 'A', 'B' ) )
                )
            ),
            array( '$unwind' => '$type' ),
            array(
                '$group' => array(
                    '_id' => '$_id',
                    'user_id' => array( '$first' => '$user_id' ),
                    'friend_id' => array( '$first' => '$friend_id' ),
                    'combined' => array( 
                        '$push' => array(
                            '$cond' => array(
                                array( '$eq' => array( '$type', 'A' ) ),
                                '$user_id',
                                '$friend_id'
                            )
                        )
                    )
                )
            )
            array( '$unwind' =>'$combined' ),
            array( '$sort' => array( 'combined' => 1 ) ),
            array(
                '$group' => array(
                    '_id' => '$_id',
                    'combined' => array( '$push' => '$combined' ),
                    'user_id' => array( '$first' => '$user_id' ),
                    'friend_id' => array( '$first' => '$friend_id' )
                )
            ),
            array(
                '$group' => array(
                    '_id' => '$combined',
                    'docs' => array(
                        '$push' => array(
                            '_id' => '$_id',
                            'user_id' => '$user_id',
                            'friend_id' => 'friend_id'
                        )
                    ),
                    'count' => array( '$sum' => 1 )
                )
            ),
            array( '$match' => array( 'count' => 1 ) )
        )
    );
});

Where the first three stages mimic what the first stage is doing in the first example listing by putting both values in a single array. Of course the last two stages by "counting" the array members while grouping and then filtering out anything that does not have a "count" of 1.

In either case this leaves you with output that only lists the documents where that combination does not occur in either order:

{ 
    "_id" : [ "561f40bac2d34f17148b462c", "5625c95ac2d34f27148b64fa" ], 
    "docs" : [ 
        { 
            "_id" : ObjectId("56306f6cd2387ad4c95b0cc9"),
            "user_id" : "5625c95ac2d34f27148b64fa",
            "friend_id" : "561f40bac2d34f17148b462c" 
        } 
    ] 
}
{ 
    "_id" : [ "561f3e06c2d34f27148b45f6", "561f40bac2d34f17148b462c" ], 
    "docs" : [ 
        { 
            "_id" : ObjectId("56306f6cd2387ad4c95b0ccc"), 
            "user_id" : "561f40bac2d34f17148b462c",
            "friend_id" : "561f3e06c2d34f27148b45f6"
        }
    ]
}
{ 
    "_id" : [ "561f40bac2d34f17148b462c", "56248eb9c2d34f2f148b5a18" ], 
    "docs" : [
        { 
            "_id" : ObjectId("56306f6cd2387ad4c95b0ccb"), 
            "user_id" : "56248eb9c2d34f2f148b5a18", 
            "friend_id" : "561f40bac2d34f17148b462c"
        }
    ]
}
{ 
    "_id" : [ "561f40bac2d34f17148b462c", "5620de97c2d34f2f148b578f" ],
    "docs" : [
        { 
            "_id" : ObjectId("56306f6cd2387ad4c95b0ccd"), 
            "user_id" : "561f40bac2d34f17148b462c", 
            "friend_id" : "5620de97c2d34f2f148b578f"
        }
    ]
}
{ 
    "_id" : [ "561f40bac2d34f17148b462c", "562744ccc2d34f27148b6eb7" ], 
    "docs" : [ 
        { 
            "_id" : ObjectId("56306f6cd2387ad4c95b0cca"), 
            "user_id" : "562744ccc2d34f27148b6eb7", 
            "friend_id" : "561f40bac2d34f17148b462c"
        }
    ]
}

You can pretty up the output, but this serves the purpose of showing the ordered combination used along with the original document data.

Blakes Seven
  • 49,422
  • 14
  • 129
  • 135
  • when i tried this in mongo shell it's working file, but when tried with laravel it is throwing this error "exception: the group aggregate field name '$_id' cannot be an operator name" – Ramesh Paul Oct 28 '15 at 10:02
  • @RameshPaul That was a typo by me in the PHP listing. Corrected. It was shown correctly in the original listing as well as `"_id": "$combined"` which is where the typing mistake was. – Blakes Seven Oct 28 '15 at 10:12