Skip to content

Commit

Permalink
removing numpy dependency
Browse files Browse the repository at this point in the history
  • Loading branch information
mattbierbaum committed Mar 23, 2019
1 parent 8efb5a2 commit d325fa0
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions arxiv_public_data/internal_citations.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import os
import gzip
import json
import numpy as np
import math
from multiprocessing import Pool

from arxiv_public_data.regex_arxiv import REGEX_ARXIV_FLEXIBLE, clean
Expand Down Expand Up @@ -94,7 +94,12 @@ def citation_list_parallel(N=8):
log.info('Calculating citation network for {} articles'.format(len(articles)))

pool = Pool(N)
cites = pool.map(citation_list_inner, np.array_split(articles, N))

A = len(articles)
divs = list(range(0, A, math.ceil(A/N))) + [A]
chunks = [articles[s:e] for s, e in zip(divs[:-1], divs[1:])]

cites = pool.map(citation_list_inner, chunks)

allcites = {}
for c in cites:
Expand Down

0 comments on commit d325fa0

Please sign in to comment.