From 15778b3753af61687f3fd1ba27bcc39f5157d23b Mon Sep 17 00:00:00 2001
From: Nikita Babbar <115877450+NIKITA320495@users.noreply.github.com>
Date: Mon, 20 May 2024 13:47:53 +0530
Subject: [PATCH 1/5] Add files via upload

commit -m"created a webpage for analysis
> co-authored-by: NIKITA320495 nikitababb036@gmail.com
> co-authored-by: Leena2403 leenagoyal2403@gmail.com"
---
 functions.py | 316 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 316 insertions(+)
 create mode 100644 functions.py

diff --git a/functions.py b/functions.py
new file mode 100644
index 0000000..13ceff1
--- /dev/null
+++ b/functions.py
@@ -0,0 +1,316 @@
+import streamlit as st
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+import pycountry
+import numpy as np
+from scipy.stats import norm
+import random
+from scipy.stats import norm
+import re
+
+data = pd.read_csv('df2020.csv')
+df2018 = pd.read_csv('df2018.csv')
+full_data2018 = pd.read_csv('../Data/survey_results_public_2018.csv')
+full_data2019=pd.read_csv('../Data/survey_results_public_2019.csv')
+full_df2020 = pd.read_csv('../Data/survey_results_public_2020.csv')
+df2019 = pd.read_csv('df2019.csv')
+df2020 = data[(data['SalaryUSD'] < 200000)]
+
+# features for job satisfaction
+results = pd.read_csv("results.csv")
+
+
+#######################################
+# VISUALISATION STARTS
+#######################################
+
+######-Nikita-########
+
+def plot_boxplot(data, x, y, title):
+    fig = go.Figure()
+    for group_name, group_data in data.groupby(x):
+        fig.add_trace(go.Box(y=group_data[y], name=group_name))
+    fig.update_layout(title=title, xaxis_title=x, yaxis_title=y)
+    st.plotly_chart(fig)
+
+#########################################################################    
+
+def plot_bar_plotly(df, column_name, top_n=10, height=450, width=700):
+    df_counts = df[column_name].value_counts().head(top_n).reset_index()
+    df_counts.columns = [column_name, 'Count']
+    
+    fig = px.bar(df_counts, x=column_name, y='Count', 
+                 labels={column_name: column_name, 'Count': 'Number of Developers'},
+                 color=column_name, color_discrete_sequence=px.colors.qualitative.Pastel)
+    
+    fig.update_layout(xaxis_title=column_name, yaxis_title='Number of Developers')
+    fig.update_layout(height=height, width=width)
+
+    return st.plotly_chart(fig)
+
+
+def plot_pie_plotly(df, column_name,top_n=10,  height=400, width=400 ):
+    participation_rate = df[column_name].value_counts().keys().tolist()[:top_n]
+    count = df[column_name].value_counts().tolist()[:top_n]
+
+    fig_pie = go.Figure(data=[go.Pie(labels=participation_rate, values=count)])
+    fig_pie.update_layout(title='Top {} Distribution'.format(column_name))
+    fig_pie.update_layout(height=height, width=width)
+
+    st.plotly_chart(fig_pie)    
+
+def plot_value_counts_plotly(df, column_name):
+    colors = ['lightseagreen', 'lightgreen', 'lightyellow', 'lightcoral', 'lightsalmon', 'lavender']
+
+    counts = df[column_name].value_counts()
+    fig = go.Figure(go.Bar(x=counts.index, y=counts.values, marker_color=random.choice(colors)))
+    fig.update_layout(title=f'Value Counts for {column_name}', xaxis_title='Response', yaxis_title='Count')
+    return fig
+
+
+def generate_normal_distribution_plots(df, column, top_n=10):
+    countries = df[column].value_counts().sort_values(ascending=False)[:top_n].index.tolist()
+
+    for country in countries:
+        temp_salaries = df.loc[df[column] == country, 'SalaryUSD']
+
+        #normal distribution curve
+        x_values = np.linspace(temp_salaries.min(), temp_salaries.max(), 100)
+        y_values = norm.pdf(x_values, temp_salaries.mean(), temp_salaries.std())
+
+        fig = go.Figure(data=go.Scatter(x=x_values, y=y_values))
+        
+        # mean line
+        fig.add_shape(type="line",
+                      x0=temp_salaries.mean(), y0=0,
+                      x1=temp_salaries.mean(), y1=norm.pdf(temp_salaries.mean(), temp_salaries.mean(), temp_salaries.std()),
+                      line=dict(color="red", width=2, dash="dash"))
+
+        fig.update_layout(title='Normal Distribution of Annual Salaries in {}'.format(country),
+                          xaxis_title="Annual Salary in USD",
+                          yaxis_title="Density")
+        fig.update_layout(height=400, width=370)
+
+        # st.plotly_chart(fig)
+        yield fig
+
+
+def plot_age_distribution(df, column_name):
+    df['Age_range'] = np.where((df[column_name] >= 15) & (df[column_name] <= 19), '15 - 19 years', 'Age_unknown')
+    df['Age_range'] = np.where((df[column_name] >= 20) & (df[column_name] <= 24), '20 - 24 years', df['Age_range'])
+    df['Age_range'] = np.where((df[column_name] >= 25) & (df[column_name] <= 29), '25 - 29 years', df['Age_range'])
+    df['Age_range'] = np.where((df[column_name] >= 30) & (df[column_name] <= 34), '30 - 34 years', df['Age_range'])
+    df['Age_range'] = np.where((df[column_name] >= 35) & (df[column_name] <= 39), '35 - 39 years', df['Age_range'])
+    df['Age_range'] = np.where((df[column_name] >= 40) & (df[column_name] <= 45), '40 - 45 years', df['Age_range'])
+    df['Age_range'] = np.where((df[column_name] >= 46), '46 and above years', df['Age_range'])
+
+    df_age = df.groupby(['Age_range']).size().reset_index(name='Count')
+    df_age.sort_values(by=['Count'], ascending=False, inplace=True)
+
+    # Plotly bar chart
+    fig = go.Figure(data=go.Bar(
+        x=df_age['Count'],
+        y=df_age['Age_range'],
+        orientation='h'
+    ))
+
+    # Update layout
+    fig.update_layout(
+        xaxis_title='Count',
+        yaxis_title='Age Range',
+        yaxis=dict(autorange="reversed")
+    )
+
+    st.plotly_chart(fig)
+
+def counts(df, column_name, year):
+    language_counts = df[column_name].str.split(';', expand=True).stack().value_counts().to_frame(name=year)
+    language_counts[column_name] = language_counts.index
+    language_counts.reset_index(drop=True, inplace=True)
+    language_counts = language_counts[[column_name, year]]
+    return language_counts
+
+def compare_column_and_plot(column):
+    languagedesire_2018 = counts(df2018, column, '2018')
+    languagedesire_2019 = counts(df2019, column, '2019')
+    languagedesire_2020 = counts(df2020, column, '2020')
+
+    # Merge language counts for both years
+    languagedesire_all = pd.merge(languagedesire_2018, languagedesire_2019, on=column, how='outer')
+    languagedesire_all = pd.merge(languagedesire_all, languagedesire_2020, on=column, how='outer')
+
+    
+    # Fill NaN values with 0 and convert counts to integers
+    languagedesire_all.fillna(0, inplace=True)
+    languagedesire_all['2018'] = languagedesire_all['2018'].astype(int)
+    languagedesire_all['2019'] = languagedesire_all['2019'].astype(int)
+    languagedesire_all['2020'] = languagedesire_all['2020'].astype(int)
+    
+    
+    languagedesire_all.set_index(column, inplace=True)
+
+    languagedesire19_20 = languagedesire_all.div(languagedesire_all.sum())
+
+    st.write(languagedesire19_20.head(5))
+    fig = go.Figure()
+
+    for column in languagedesire19_20.columns:
+        fig.add_trace(go.Bar(x=languagedesire19_20.index, y=languagedesire19_20[column], name=column))
+
+    fig.update_layout(
+        xaxis_title=column,
+        yaxis_title='Percentages',
+        font=dict(size=14),
+        barmode='group',
+        height=600,
+        width=800
+    )
+
+
+    st.plotly_chart(fig)
+
+def generate_choropleth(df, column_name):
+    grouped_df = df.groupby('Country').size().reset_index(name='Respondents')
+
+    # ISO country code from the country name
+    def get_country_code(name):
+        try:
+            return pycountry.countries.lookup(name).alpha_3
+        except LookupError:
+            return None
+
+    # Adding country code column
+    grouped_df['Country_code'] = grouped_df['Country'].apply(get_country_code)
+
+    #choropleth map
+    fig = px.choropleth(grouped_df,
+                        locations="Country_code",
+                        color=column_name,
+                        hover_name="Country",
+                        projection="natural earth",
+                        color_continuous_scale='Peach',
+                        range_color=[0, 10000],
+                        labels={column_name: 'Respondents'}
+                        )
+    fig.update_layout(height=600, width=900)
+    return st.plotly_chart(fig)
+
+def gender_vs_top5countries(df):
+    all_data = df.groupby(['Country', 'Gender']).size().reset_index(name='Count')
+    all_data['Total'] = all_data.groupby('Country')['Count'].transform('sum')
+    all_data['Percentage'] = all_data['Count'] / all_data['Total'] * 100
+
+
+    top_countries = all_data.groupby('Country')['Total'].max().nlargest(5).index
+    top_data = all_data[all_data['Country'].isin(top_countries)]
+
+    # men and women data
+    men_data = top_data[top_data['Gender'] == 'Man']
+    women_data = top_data[top_data['Gender'] == 'Woman']
+
+    fig = go.Figure()
+
+    #bars for 'Men'
+    fig.add_trace(go.Bar(x=men_data['Country'], y=men_data['Percentage'], name='Men', marker_color='darkblue'))
+
+    #bars for 'Women'
+    fig.add_trace(go.Bar(x=women_data['Country'], y=women_data['Percentage'], name='Women', marker_color='#5E96E9'))
+
+    fig.update_layout(
+        title='Gender vs Top 5 Countries in 2019',
+        xaxis_title='Top 5 Countries',
+        yaxis_title='Percentage',
+        barmode='group'
+    )
+
+    return fig
+def heighest_paying_2019():
+    ds = df2019[df2019['DevType'].str.contains('Data scientist') == True ]
+    ds_mean_salary = ds.groupby('Country')['SalaryUSD'].mean().reset_index(name='Mean')
+    ds_mean_salary.sort_values(by=['Mean'], ascending=False, inplace=True)
+    ds_mean_salary = ds_mean_salary[(ds_mean_salary['Mean'] <= 280000)]
+    Top_mean_salary = ds_mean_salary[:10]
+    
+    fig = px.bar(Top_mean_salary, x='Mean', y='Country', orientation='h',
+                 labels={'Mean': 'Average Salary in US$', 'Country': 'Country'},
+                 title='The Top 10 highest paying data scientist countries in 2019')
+    
+    fig.update_layout(yaxis={'categoryorder':'total ascending'}, 
+                      title={'x':0.5, 'xanchor': 'center', 'yanchor': 'top'})
+    st.plotly_chart(fig)
+def heighest_paying(df):
+    ds = df[df['DevType'].str.contains('Data scientist') == True ]
+    ds_mean_salary = ds.groupby('Country')['SalaryUSD'].mean().reset_index(name='Mean')
+    ds_mean_salary.sort_values(by=['Mean'], ascending=False, inplace=True)
+    ds_mean_salary = ds_mean_salary[(ds_mean_salary['Mean'] <= 280000)]
+    Top_mean_salary = ds_mean_salary[:10]
+    
+    fig = px.bar(Top_mean_salary, x='Mean', y='Country', orientation='h',
+                 labels={'Mean': 'Average Salary in US$', 'Country': 'Country'},
+                 title='The Top 10 highest paying data scientist countries ')
+    
+    fig.update_layout(yaxis={'categoryorder':'total ascending'}, 
+                      title={'x':0.5, 'xanchor': 'center', 'yanchor': 'top'})
+    st.plotly_chart(fig)
+def plot_value_counts_plotly(column_name, df, column):
+    values = df[column_name].value_counts()
+    fig = go.Figure(data=[go.Bar(x=values.index, y=values.values, marker_color=random.choice(['lightseagreen', 'lightgreen', 'lightyellow', 'lightcoral', 'lightsalmon', 'lavender']))])
+    fig.update_layout(title=f'Value Counts for {column_name}', xaxis_title='Response', yaxis_title='Count')
+    column.plotly_chart(fig)
+
+def ai_graphs():
+    st.title('AI Survey Responses')
+    df = full_data2018[['AIDangerous', 'AIInteresting', 'AIResponsible', 'AIFuture']]
+
+    df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
+
+    short_mapping = {
+        'Algorithms making important decisions': 'Algorithms',
+        'Artificial intelligence surpassing human intelligence ("the singularity")': 'AI Singularity',
+        'Evolving definitions of "fairness" in algorithmic versus human decisions': 'Fairness Evolution',
+        "Increasing automation of jobs": 'Automation',
+        "The developers or the people creating the AI": 'Developers',
+        "A governmental or other regulatory body": 'Government/Regulatory',
+        "Prominent industry leaders": 'Industry Leaders',
+        "Nobody": 'No Responsibility',
+        "I'm excited about the possibilities more than worried about the dangers.": 'Excited about AI Future',
+        "I'm worried about the dangers more than I'm excited about the possibilities.": 'Worried about AI Future',
+        "I don't care about it, or I haven't thought about it.": 'Indifferent about AI Future'
+    }
+
+    df.replace(short_mapping, inplace=True)
+
+    col1, col2 = st.columns(2)
+
+    plot_value_counts_plotly('AIDangerous', df, col1)
+    plot_value_counts_plotly('AIInteresting', df, col1)
+    plot_value_counts_plotly('AIResponsible', df, col2)
+    plot_value_counts_plotly('AIFuture', df, col2)   
+
+
+def result_plot(data):
+    new_index = data.Rates.sort_values(ascending=False).index
+    sorted_results = data.reindex(new_index)
+
+    filtered_results = sorted_results[np.abs(sorted_results.Rates) > 0.1]
+
+    #Plotly figure
+    fig = px.bar(
+        filtered_results,
+        x='Rates',
+        y='Columns',
+        orientation='h',
+        labels={'Rates': 'Negative and Positive Features', 'Columns': 'Features'},
+    )
+
+    fig.update_layout(
+        xaxis_title='Negative and Positive Features',
+        yaxis_title='Features',
+        title_font_size=25,
+        xaxis_title_font_size=25,
+        yaxis_title_font_size=25,
+        height=800,
+    )
+
+    st.plotly_chart(fig, use_container_width=True)
\ No newline at end of file

From ef1d36ceb547b95b78647a2076e031373b3313c4 Mon Sep 17 00:00:00 2001
From: Nikita Babbar <115877450+NIKITA320495@users.noreply.github.com>
Date: Mon, 20 May 2024 22:40:25 +0530
Subject: [PATCH 2/5] readme.md

---
 readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/readme.md b/readme.md
index 875c4ad..0ddf79e 100644
--- a/readme.md
+++ b/readme.md
@@ -1,6 +1,6 @@
 # Stack Overflow Analysis Guidelines
 
-## 👨‍💻 Demo Video
+## 👨‍💻 Demo Video :
 
 [Watch the demo video](https://user-images.githubusercontent.com/30715153/168960157-e9448ea4-206c-44c0-bbd5-5e4770c0411f.mp4)
 

From f75babb997c73fd27af04351615f5dd3020cf684 Mon Sep 17 00:00:00 2001
From: Nikita Babbar <115877450+NIKITA320495@users.noreply.github.com>
Date: Mon, 20 May 2024 22:42:56 +0530
Subject: [PATCH 3/5] update readme.md

---
 readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/readme.md b/readme.md
index 0ddf79e..875c4ad 100644
--- a/readme.md
+++ b/readme.md
@@ -1,6 +1,6 @@
 # Stack Overflow Analysis Guidelines
 
-## 👨‍💻 Demo Video :
+## 👨‍💻 Demo Video
 
 [Watch the demo video](https://user-images.githubusercontent.com/30715153/168960157-e9448ea4-206c-44c0-bbd5-5e4770c0411f.mp4)
 

From d267a4d2ccca4aec838ee15bdd2d9eaa392714cf Mon Sep 17 00:00:00 2001
From: Nikita Babbar <115877450+NIKITA320495@users.noreply.github.com>
Date: Mon, 20 May 2024 22:43:53 +0530
Subject: [PATCH 4/5] Update readme.md

updated readme
---
 readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/readme.md b/readme.md
index 875c4ad..5d76071 100644
--- a/readme.md
+++ b/readme.md
@@ -1,6 +1,6 @@
 # Stack Overflow Analysis Guidelines
 
-## 👨‍💻 Demo Video
+## 👨‍💻 Demo Video 
 
 [Watch the demo video](https://user-images.githubusercontent.com/30715153/168960157-e9448ea4-206c-44c0-bbd5-5e4770c0411f.mp4)
 

From 024294cbded1705f7c79671f6c9d26dff5c10d36 Mon Sep 17 00:00:00 2001
From: Nikita Babbar <115877450+NIKITA320495@users.noreply.github.com>
Date: Mon, 20 May 2024 22:50:47 +0530
Subject: [PATCH 5/5] Delete functions.py

---
 functions.py | 316 ---------------------------------------------------
 1 file changed, 316 deletions(-)
 delete mode 100644 functions.py

diff --git a/functions.py b/functions.py
deleted file mode 100644
index 13ceff1..0000000
--- a/functions.py
+++ /dev/null
@@ -1,316 +0,0 @@
-import streamlit as st
-import pandas as pd
-import plotly.express as px
-import plotly.graph_objects as go
-import pycountry
-import numpy as np
-from scipy.stats import norm
-import random
-from scipy.stats import norm
-import re
-
-data = pd.read_csv('df2020.csv')
-df2018 = pd.read_csv('df2018.csv')
-full_data2018 = pd.read_csv('../Data/survey_results_public_2018.csv')
-full_data2019=pd.read_csv('../Data/survey_results_public_2019.csv')
-full_df2020 = pd.read_csv('../Data/survey_results_public_2020.csv')
-df2019 = pd.read_csv('df2019.csv')
-df2020 = data[(data['SalaryUSD'] < 200000)]
-
-# features for job satisfaction
-results = pd.read_csv("results.csv")
-
-
-#######################################
-# VISUALISATION STARTS
-#######################################
-
-######-Nikita-########
-
-def plot_boxplot(data, x, y, title):
-    fig = go.Figure()
-    for group_name, group_data in data.groupby(x):
-        fig.add_trace(go.Box(y=group_data[y], name=group_name))
-    fig.update_layout(title=title, xaxis_title=x, yaxis_title=y)
-    st.plotly_chart(fig)
-
-#########################################################################    
-
-def plot_bar_plotly(df, column_name, top_n=10, height=450, width=700):
-    df_counts = df[column_name].value_counts().head(top_n).reset_index()
-    df_counts.columns = [column_name, 'Count']
-    
-    fig = px.bar(df_counts, x=column_name, y='Count', 
-                 labels={column_name: column_name, 'Count': 'Number of Developers'},
-                 color=column_name, color_discrete_sequence=px.colors.qualitative.Pastel)
-    
-    fig.update_layout(xaxis_title=column_name, yaxis_title='Number of Developers')
-    fig.update_layout(height=height, width=width)
-
-    return st.plotly_chart(fig)
-
-
-def plot_pie_plotly(df, column_name,top_n=10,  height=400, width=400 ):
-    participation_rate = df[column_name].value_counts().keys().tolist()[:top_n]
-    count = df[column_name].value_counts().tolist()[:top_n]
-
-    fig_pie = go.Figure(data=[go.Pie(labels=participation_rate, values=count)])
-    fig_pie.update_layout(title='Top {} Distribution'.format(column_name))
-    fig_pie.update_layout(height=height, width=width)
-
-    st.plotly_chart(fig_pie)    
-
-def plot_value_counts_plotly(df, column_name):
-    colors = ['lightseagreen', 'lightgreen', 'lightyellow', 'lightcoral', 'lightsalmon', 'lavender']
-
-    counts = df[column_name].value_counts()
-    fig = go.Figure(go.Bar(x=counts.index, y=counts.values, marker_color=random.choice(colors)))
-    fig.update_layout(title=f'Value Counts for {column_name}', xaxis_title='Response', yaxis_title='Count')
-    return fig
-
-
-def generate_normal_distribution_plots(df, column, top_n=10):
-    countries = df[column].value_counts().sort_values(ascending=False)[:top_n].index.tolist()
-
-    for country in countries:
-        temp_salaries = df.loc[df[column] == country, 'SalaryUSD']
-
-        #normal distribution curve
-        x_values = np.linspace(temp_salaries.min(), temp_salaries.max(), 100)
-        y_values = norm.pdf(x_values, temp_salaries.mean(), temp_salaries.std())
-
-        fig = go.Figure(data=go.Scatter(x=x_values, y=y_values))
-        
-        # mean line
-        fig.add_shape(type="line",
-                      x0=temp_salaries.mean(), y0=0,
-                      x1=temp_salaries.mean(), y1=norm.pdf(temp_salaries.mean(), temp_salaries.mean(), temp_salaries.std()),
-                      line=dict(color="red", width=2, dash="dash"))
-
-        fig.update_layout(title='Normal Distribution of Annual Salaries in {}'.format(country),
-                          xaxis_title="Annual Salary in USD",
-                          yaxis_title="Density")
-        fig.update_layout(height=400, width=370)
-
-        # st.plotly_chart(fig)
-        yield fig
-
-
-def plot_age_distribution(df, column_name):
-    df['Age_range'] = np.where((df[column_name] >= 15) & (df[column_name] <= 19), '15 - 19 years', 'Age_unknown')
-    df['Age_range'] = np.where((df[column_name] >= 20) & (df[column_name] <= 24), '20 - 24 years', df['Age_range'])
-    df['Age_range'] = np.where((df[column_name] >= 25) & (df[column_name] <= 29), '25 - 29 years', df['Age_range'])
-    df['Age_range'] = np.where((df[column_name] >= 30) & (df[column_name] <= 34), '30 - 34 years', df['Age_range'])
-    df['Age_range'] = np.where((df[column_name] >= 35) & (df[column_name] <= 39), '35 - 39 years', df['Age_range'])
-    df['Age_range'] = np.where((df[column_name] >= 40) & (df[column_name] <= 45), '40 - 45 years', df['Age_range'])
-    df['Age_range'] = np.where((df[column_name] >= 46), '46 and above years', df['Age_range'])
-
-    df_age = df.groupby(['Age_range']).size().reset_index(name='Count')
-    df_age.sort_values(by=['Count'], ascending=False, inplace=True)
-
-    # Plotly bar chart
-    fig = go.Figure(data=go.Bar(
-        x=df_age['Count'],
-        y=df_age['Age_range'],
-        orientation='h'
-    ))
-
-    # Update layout
-    fig.update_layout(
-        xaxis_title='Count',
-        yaxis_title='Age Range',
-        yaxis=dict(autorange="reversed")
-    )
-
-    st.plotly_chart(fig)
-
-def counts(df, column_name, year):
-    language_counts = df[column_name].str.split(';', expand=True).stack().value_counts().to_frame(name=year)
-    language_counts[column_name] = language_counts.index
-    language_counts.reset_index(drop=True, inplace=True)
-    language_counts = language_counts[[column_name, year]]
-    return language_counts
-
-def compare_column_and_plot(column):
-    languagedesire_2018 = counts(df2018, column, '2018')
-    languagedesire_2019 = counts(df2019, column, '2019')
-    languagedesire_2020 = counts(df2020, column, '2020')
-
-    # Merge language counts for both years
-    languagedesire_all = pd.merge(languagedesire_2018, languagedesire_2019, on=column, how='outer')
-    languagedesire_all = pd.merge(languagedesire_all, languagedesire_2020, on=column, how='outer')
-
-    
-    # Fill NaN values with 0 and convert counts to integers
-    languagedesire_all.fillna(0, inplace=True)
-    languagedesire_all['2018'] = languagedesire_all['2018'].astype(int)
-    languagedesire_all['2019'] = languagedesire_all['2019'].astype(int)
-    languagedesire_all['2020'] = languagedesire_all['2020'].astype(int)
-    
-    
-    languagedesire_all.set_index(column, inplace=True)
-
-    languagedesire19_20 = languagedesire_all.div(languagedesire_all.sum())
-
-    st.write(languagedesire19_20.head(5))
-    fig = go.Figure()
-
-    for column in languagedesire19_20.columns:
-        fig.add_trace(go.Bar(x=languagedesire19_20.index, y=languagedesire19_20[column], name=column))
-
-    fig.update_layout(
-        xaxis_title=column,
-        yaxis_title='Percentages',
-        font=dict(size=14),
-        barmode='group',
-        height=600,
-        width=800
-    )
-
-
-    st.plotly_chart(fig)
-
-def generate_choropleth(df, column_name):
-    grouped_df = df.groupby('Country').size().reset_index(name='Respondents')
-
-    # ISO country code from the country name
-    def get_country_code(name):
-        try:
-            return pycountry.countries.lookup(name).alpha_3
-        except LookupError:
-            return None
-
-    # Adding country code column
-    grouped_df['Country_code'] = grouped_df['Country'].apply(get_country_code)
-
-    #choropleth map
-    fig = px.choropleth(grouped_df,
-                        locations="Country_code",
-                        color=column_name,
-                        hover_name="Country",
-                        projection="natural earth",
-                        color_continuous_scale='Peach',
-                        range_color=[0, 10000],
-                        labels={column_name: 'Respondents'}
-                        )
-    fig.update_layout(height=600, width=900)
-    return st.plotly_chart(fig)
-
-def gender_vs_top5countries(df):
-    all_data = df.groupby(['Country', 'Gender']).size().reset_index(name='Count')
-    all_data['Total'] = all_data.groupby('Country')['Count'].transform('sum')
-    all_data['Percentage'] = all_data['Count'] / all_data['Total'] * 100
-
-
-    top_countries = all_data.groupby('Country')['Total'].max().nlargest(5).index
-    top_data = all_data[all_data['Country'].isin(top_countries)]
-
-    # men and women data
-    men_data = top_data[top_data['Gender'] == 'Man']
-    women_data = top_data[top_data['Gender'] == 'Woman']
-
-    fig = go.Figure()
-
-    #bars for 'Men'
-    fig.add_trace(go.Bar(x=men_data['Country'], y=men_data['Percentage'], name='Men', marker_color='darkblue'))
-
-    #bars for 'Women'
-    fig.add_trace(go.Bar(x=women_data['Country'], y=women_data['Percentage'], name='Women', marker_color='#5E96E9'))
-
-    fig.update_layout(
-        title='Gender vs Top 5 Countries in 2019',
-        xaxis_title='Top 5 Countries',
-        yaxis_title='Percentage',
-        barmode='group'
-    )
-
-    return fig
-def heighest_paying_2019():
-    ds = df2019[df2019['DevType'].str.contains('Data scientist') == True ]
-    ds_mean_salary = ds.groupby('Country')['SalaryUSD'].mean().reset_index(name='Mean')
-    ds_mean_salary.sort_values(by=['Mean'], ascending=False, inplace=True)
-    ds_mean_salary = ds_mean_salary[(ds_mean_salary['Mean'] <= 280000)]
-    Top_mean_salary = ds_mean_salary[:10]
-    
-    fig = px.bar(Top_mean_salary, x='Mean', y='Country', orientation='h',
-                 labels={'Mean': 'Average Salary in US$', 'Country': 'Country'},
-                 title='The Top 10 highest paying data scientist countries in 2019')
-    
-    fig.update_layout(yaxis={'categoryorder':'total ascending'}, 
-                      title={'x':0.5, 'xanchor': 'center', 'yanchor': 'top'})
-    st.plotly_chart(fig)
-def heighest_paying(df):
-    ds = df[df['DevType'].str.contains('Data scientist') == True ]
-    ds_mean_salary = ds.groupby('Country')['SalaryUSD'].mean().reset_index(name='Mean')
-    ds_mean_salary.sort_values(by=['Mean'], ascending=False, inplace=True)
-    ds_mean_salary = ds_mean_salary[(ds_mean_salary['Mean'] <= 280000)]
-    Top_mean_salary = ds_mean_salary[:10]
-    
-    fig = px.bar(Top_mean_salary, x='Mean', y='Country', orientation='h',
-                 labels={'Mean': 'Average Salary in US$', 'Country': 'Country'},
-                 title='The Top 10 highest paying data scientist countries ')
-    
-    fig.update_layout(yaxis={'categoryorder':'total ascending'}, 
-                      title={'x':0.5, 'xanchor': 'center', 'yanchor': 'top'})
-    st.plotly_chart(fig)
-def plot_value_counts_plotly(column_name, df, column):
-    values = df[column_name].value_counts()
-    fig = go.Figure(data=[go.Bar(x=values.index, y=values.values, marker_color=random.choice(['lightseagreen', 'lightgreen', 'lightyellow', 'lightcoral', 'lightsalmon', 'lavender']))])
-    fig.update_layout(title=f'Value Counts for {column_name}', xaxis_title='Response', yaxis_title='Count')
-    column.plotly_chart(fig)
-
-def ai_graphs():
-    st.title('AI Survey Responses')
-    df = full_data2018[['AIDangerous', 'AIInteresting', 'AIResponsible', 'AIFuture']]
-
-    df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
-
-    short_mapping = {
-        'Algorithms making important decisions': 'Algorithms',
-        'Artificial intelligence surpassing human intelligence ("the singularity")': 'AI Singularity',
-        'Evolving definitions of "fairness" in algorithmic versus human decisions': 'Fairness Evolution',
-        "Increasing automation of jobs": 'Automation',
-        "The developers or the people creating the AI": 'Developers',
-        "A governmental or other regulatory body": 'Government/Regulatory',
-        "Prominent industry leaders": 'Industry Leaders',
-        "Nobody": 'No Responsibility',
-        "I'm excited about the possibilities more than worried about the dangers.": 'Excited about AI Future',
-        "I'm worried about the dangers more than I'm excited about the possibilities.": 'Worried about AI Future',
-        "I don't care about it, or I haven't thought about it.": 'Indifferent about AI Future'
-    }
-
-    df.replace(short_mapping, inplace=True)
-
-    col1, col2 = st.columns(2)
-
-    plot_value_counts_plotly('AIDangerous', df, col1)
-    plot_value_counts_plotly('AIInteresting', df, col1)
-    plot_value_counts_plotly('AIResponsible', df, col2)
-    plot_value_counts_plotly('AIFuture', df, col2)   
-
-
-def result_plot(data):
-    new_index = data.Rates.sort_values(ascending=False).index
-    sorted_results = data.reindex(new_index)
-
-    filtered_results = sorted_results[np.abs(sorted_results.Rates) > 0.1]
-
-    #Plotly figure
-    fig = px.bar(
-        filtered_results,
-        x='Rates',
-        y='Columns',
-        orientation='h',
-        labels={'Rates': 'Negative and Positive Features', 'Columns': 'Features'},
-    )
-
-    fig.update_layout(
-        xaxis_title='Negative and Positive Features',
-        yaxis_title='Features',
-        title_font_size=25,
-        xaxis_title_font_size=25,
-        yaxis_title_font_size=25,
-        height=800,
-    )
-
-    st.plotly_chart(fig, use_container_width=True)
\ No newline at end of file