利用json进行MongoDB文件转移
import pymongo
import json
class Transform:
def __init__(self):
self.MONGO_URI = 'mongodb://127.0.0.1:27017'
self.client = pymongo.MongoClient(self.MONGO_URI)
self.database = self.client['test']
self.collection = self.database['your_collection_name']
def write(self, st, num, file_path):
cur = self.collection.find().skip(st).limit(num)
with open(file_path.format(), "w") as f:
for item in cur:
del item['_id']
json.dump(item, f, ensure_ascii=False)
f.write('\n')
print("{}条数据加载入{}文件完成...".format(num, file_path))
def read(self, file_path):
with open(file_path,'r') as load_f:
while True:
line = load_f.readline()
if line:
r = json.loads(line)
print(r)
else:
break
def count(self):
print(collection.count())
if __name__ == '__main__':
transform = Transform()
for i in range(0, 85):
transform.write(i*10000, 10000, "json/record_%03d.json" % i)
python 多进程的简单使用
import pymongo
import json
import multiprocessing
import sys
def worker(st, num):
MONGO_URI = 'mongodb://127.0.0.1:27017'
client = pymongo.MongoClient(MONGO_URI)
database = client['test']
col1 = database['your_collection_name1']
col2 = database['your_collection_name2']
cur = col1.find().skip(st).limit(num)
for item in cur:
del item['_id']
raw_key = item['raw_key'] = item['year'] + item['area'] + item['college'] + item['major']
col2.update_many({'raw_key': raw_key}, {'$set': dict(item)}, upsert=True)
if __name__ == '__main__':
st = int(sys.argv[1])
ed = int(sys.argv[2])
process_num = int(sys.argv[3])
x = int((ed-st)/process_num)
for i in range(0, process_num):
multiprocessing.Process(target=worker, args=(st+x*i, x)).start()