import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors


users = [
    { "id": 0, "name": "Hero" },
    { "id": 1, "name": "Dunn" },
    { "id": 2, "name": "Sue" },
    { "id": 3, "name": "Chi" },
    { "id": 4, "name": "Thor" },
    { "id": 5, "name": "Clive" },
    { "id": 6, "name": "Hicks" },
    { "id": 7, "name": "Devin" },
    { "id": 8, "name": "Kate" },
    { "id": 9, "name": "Klein" }
]


friendship_pairs = [(0, 1), (0, 2), (1, 2), (1, 3), (2, 3), (3, 4),
                    (4, 5), (5, 6), (5, 7), (6, 8), (7, 8), (8, 9)]


friendship_graph = nx.Graph()
friendship_graph.add_edges_from(friendship_pairs)
# 포지션을 임의로 생성하려면 아래 주석 제거
# pos = nx.spring_layout(friendship_graph)


pos = {0: np.array([-0.93170375, -0.02197117]),
        1: np.array([-0.72438717, -0.08068836]),
        2: np.array([-0.73451474,  0.06758806]),
        3: np.array([-0.47435  ,  0.0068244]),
        4: np.array([-0.11830089,  0.01805556]),
        5: np.array([0.2389443 , 0.02015169]),
        6: np.array([ 0.4986366 , -0.08446365]),
        7: np.array([0.50763805, 0.11006795]),
        8: np.array([ 0.7380376 , -0.00350229]),
        9: np.array([ 1.        , -0.03206219])}


nx.draw(friendship_graph, pos, with_labels=True, font_weight='bold')


friendships = {user["id"]: [] for user in users}

for i, j in friendship_pairs:
    friendships[i].append(j)
    friendships[j].append(i)


friendships

{0: [1, 2],
 1: [0, 2, 3],
 2: [0, 1, 3],
 3: [1, 2, 4],
 4: [3, 5],
 5: [4, 6, 7],
 6: [5, 8],
 7: [5, 8],
 8: [6, 7, 9],
 9: [8]}


def number_of_friends(user):
    """ user의 친구는 몇 명?
        입력값 예제: { "id": 0, "name": "Hero" } """
    user_id = user["id"]
    friend_ids = friendships[user_id]
    return len(friend_ids)


number_of_friends({ "id": 0, "name": "Hero" })

2


total_connections = sum(number_of_friends(user) for user in users)
total_connections

24


num_users = len(users)
avg_connections = total_connections / num_users
avg_connections

2.4


num_friends_by_id = [(user["id"], number_of_friends(user))
                     for user in users]

num_friends_by_id.sort(                             
       key=lambda id_and_friends: id_and_friends[1],
       reverse=True)                                
num_friends_by_id

[(1, 3),
 (2, 3),
 (3, 3),
 (5, 3),
 (8, 3),
 (0, 2),
 (4, 2),
 (6, 2),
 (7, 2),
 (9, 1)]


degreeCentrality = nx.degree_centrality(friendship_graph)


nx.draw(friendship_graph, pos, with_labels=True,
        node_color=list(degreeCentrality.values()),
        nodelist=list(degreeCentrality.keys()))


def foaf_ids_bad(user):
    return [foaf_id
            for friend_id in friendships[user["id"]]
            for foaf_id in friendships[friend_id]]


foaf_ids_bad(users[0])

[0, 2, 3, 0, 1, 3]


from collections import Counter                  

def friends_of_friends(user):
    user_id = user["id"]
    return Counter(
        foaf_id
        for friend_id in friendships[user_id]    
        for foaf_id in friendships[friend_id]    
        if foaf_id != user_id                           # user 자신 제외
            and foaf_id not in friendships[user_id]     # user의 (직접) 친구 제외
    )


friends_of_friends(users[3])

Counter({0: 2, 5: 1})


interests = [
    (0, "Hadoop"), (0, "Big Data"), (0, "HBase"), (0, "Java"),
    (0, "Spark"), (0, "Storm"), (0, "Cassandra"),
    (1, "NoSQL"), (1, "MongoDB"), (1, "Cassandra"), (1, "HBase"),
    (1, "Postgres"), (2, "Python"), (2, "scikit-learn"), (2, "scipy"),
    (2, "numpy"), (2, "statsmodels"), (2, "pandas"), (3, "R"), (3, "Python"),
    (3, "statistics"), (3, "regression"), (3, "probability"),
    (4, "machine learning"), (4, "regression"), (4, "decision trees"),
    (4, "libsvm"), (5, "Python"), (5, "R"), (5, "Java"), (5, "C++"),
    (5, "Haskell"), (5, "programming languages"), (6, "statistics"),
    (6, "probability"), (6, "mathematics"), (6, "theory"),
    (7, "machine learning"), (7, "scikit-learn"), (7, "Mahout"),
    (7, "neural networks"), (8, "neural networks"), (8, "deep learning"),
    (8, "Big Data"), (8, "artificial intelligence"), (9, "Hadoop"),
    (9, "Java"), (9, "MapReduce"), (9, "Big Data")
]


def data_scientists_who_like(target_interest):
    """특정분야(target interest)에 관심을 가진 아이디 찾기"""
    return [user_id
            for user_id, user_interest in interests
            if user_interest == target_interest]


data_scientists_who_like('Big Data')

[0, 8, 9]


from collections import defaultdict

user_ids_by_interest = defaultdict(list)

for user_id, interest in interests:
    user_ids_by_interest[interest].append(user_id)


user_ids_by_interest['Big Data']

[0, 8, 9]


user_ids_by_interest

defaultdict(list,
            {'Hadoop': [0, 9],
             'Big Data': [0, 8, 9],
             'HBase': [0, 1],
             'Java': [0, 5, 9],
             'Spark': [0],
             'Storm': [0],
             'Cassandra': [0, 1],
             'NoSQL': [1],
             'MongoDB': [1],
             'Postgres': [1],
             'Python': [2, 3, 5],
             'scikit-learn': [2, 7],
             'scipy': [2],
             'numpy': [2],
             'statsmodels': [2],
             'pandas': [2],
             'R': [3, 5],
             'statistics': [3, 6],
             'regression': [3, 4],
             'probability': [3, 6],
             'machine learning': [4, 7],
             'decision trees': [4],
             'libsvm': [4],
             'C++': [5],
             'Haskell': [5],
             'programming languages': [5],
             'mathematics': [6],
             'theory': [6],
             'Mahout': [7],
             'neural networks': [7, 8],
             'deep learning': [8],
             'artificial intelligence': [8],
             'MapReduce': [9]})


interests_by_user_id = defaultdict(list)

for user_id, interest in interests:
    interests_by_user_id[user_id].append(interest)


interests_by_user_id[3]

['R', 'Python', 'statistics', 'regression', 'probability']


def most_common_interests_with(user):
    return Counter(
        interested_user_id
        for interest in interests_by_user_id[user["id"]]
        for interested_user_id in user_ids_by_interest[interest]
        if interested_user_id != user["id"]
    )


most_common_interests_with(users[3])

Counter({5: 2, 2: 1, 6: 2, 4: 1})


salaries_and_tenures = [(83000, 8.7), (88000, 8.1),
                        (48000, 0.7), (76000, 6),
                        (69000, 6.5), (76000, 7.5),
                        (60000, 2.5), (83000, 10),
                        (48000, 1.9), (63000, 4.2)]


salaries = []
tenures = []

for sal, ten in salaries_and_tenures:
    salaries.append(sal)
    tenures.append(ten)


from matplotlib import pyplot as plt

plt.scatter(tenures, salaries)
plt.xlabel("Tenures")
plt.ylabel("Salaries")
plt.show()


salary_by_tenure = defaultdict(list)

for salary, tenure in salaries_and_tenures:
    salary_by_tenure[tenure].append(salary)

average_salary_by_tenure = {
    tenure: sum(salaries) / len(salaries)
    for tenure, salaries in salary_by_tenure.items()
}


average_salary_by_tenure

{8.7: 83000.0,
 8.1: 88000.0,
 0.7: 48000.0,
 6: 76000.0,
 6.5: 69000.0,
 7.5: 76000.0,
 2.5: 60000.0,
 10: 83000.0,
 1.9: 48000.0,
 4.2: 63000.0}


def tenure_bucket(tenure):
    if tenure < 2:
        return "less than two"          # 2년 미만
    elif tenure < 5:
        return "between two and five"   # 2년 이상 5년 미만
    else:
        return "more than five"         # 5년 이상


tenure_bucket(0.7)

'less than two'


tenure_bucket(2.5)

'between two and five'


tenure_bucket(7.5)

'more than five'


salary_by_tenure_bucket = defaultdict(list)

for salary, tenure in salaries_and_tenures:
    bucket = tenure_bucket(tenure)
    salary_by_tenure_bucket[bucket].append(salary)


salary_by_tenure_bucket

defaultdict(list,
            {'more than five': [83000, 88000, 76000, 69000, 76000, 83000],
             'less than two': [48000, 48000],
             'between two and five': [60000, 63000]})


average_salary_by_bucket = {
  tenure_bucket: sum(salaries) / len(salaries)
  for tenure_bucket, salaries in salary_by_tenure_bucket.items()
}


average_salary_by_bucket

{'more than five': 79166.66666666667,
 'less than two': 48000.0,
 'between two and five': 61500.0}


def predict_paid_or_unpaid(years_experience):
  if years_experience < 3.0:
    return "paid"
  elif years_experience < 8.5:
    return "unpaid"
  else:
    return "paid"


predict_paid_or_unpaid(2.5)

'paid'


predict_paid_or_unpaid(4.8)

'unpaid'


predict_paid_or_unpaid(11.3)

'paid'


interests = [
    (0, "Hadoop"), (0, "Big Data"), (0, "HBase"), (0, "Java"),
    (0, "Spark"), (0, "Storm"), (0, "Cassandra"),
    (1, "NoSQL"), (1, "MongoDB"), (1, "Cassandra"), (1, "HBase"),
    (1, "Postgres"), (2, "Python"), (2, "scikit-learn"), (2, "scipy"),
    (2, "numpy"), (2, "statsmodels"), (2, "pandas"), (3, "R"), (3, "Python"),
    (3, "statistics"), (3, "regression"), (3, "probability"),
    (4, "machine learning"), (4, "regression"), (4, "decision trees"),
    (4, "libsvm"), (5, "Python"), (5, "R"), (5, "Java"), (5, "C++"),
    (5, "Haskell"), (5, "programming languages"), (6, "statistics"),
    (6, "probability"), (6, "mathematics"), (6, "theory"),
    (7, "machine learning"), (7, "scikit-learn"), (7, "Mahout"),
    (7, "neural networks"), (8, "neural networks"), (8, "deep learning"),
    (8, "Big Data"), (8, "artificial intelligence"), (9, "Hadoop"),
    (9, "Java"), (9, "MapReduce"), (9, "Big Data")
]


words_and_counts = Counter(word
                           for user, interest in interests
                           for word in interest.lower().split())


for word, count in words_and_counts.most_common():
    if count > 1:
        print(word, count)

big 3
data 3
java 3
python 3
learning 3
hadoop 2
hbase 2
cassandra 2
scikit-learn 2
r 2
statistics 2
regression 2
probability 2
machine 2
neural 2
networks 2


for word, count in words_and_counts.most_common():
    if count > 2:
        print(word, count)

big 3
data 3
java 3
python 3
learning 3


def count_common_interests():
    """리턴값은 아래 모양의 사전 자료형임
        { 관심사: 인원수}"""


def common_interests(n):
    """리턴값은 아래 모양의 사전 자료형임
        { 관심사: 인원수}
        단, 인원수는 n 이상."""

1장 들어가기¶

핵심 인물 찾기¶

알아두기¶

알아두기: 조건제시법¶

알아두기¶

알아두기¶

연결 중심성(degree centrality)¶

알아두기¶

친구 추천¶

친구 공유¶

알아두기¶

공통 관심사¶

리스트(`list`) 대신에 사전(`dict`) 자료형 활용해야 하는 경우¶

알아두기¶

연봉과 경력¶

알아두기¶

알아두기¶

연습문제¶

참조¶

유료 계정¶

영어 단어¶

관심사 찾기¶

연습문제¶

연습문제¶

1장 들어가기¶

핵심 인물 찾기¶

알아두기¶

알아두기: 조건제시법¶

알아두기¶

알아두기¶

연결 중심성(degree centrality)¶

알아두기¶

친구 추천¶

친구 공유¶

알아두기¶

공통 관심사¶

리스트(list) 대신에 사전(dict) 자료형 활용해야 하는 경우¶

알아두기¶

연봉과 경력¶

알아두기¶

알아두기¶

연습문제¶

참조¶

유료 계정¶

영어 단어¶

관심사 찾기¶

연습문제¶

연습문제¶

리스트(`list`) 대신에 사전(`dict`) 자료형 활용해야 하는 경우¶