import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
from pandas_profiling import ProfileReport


# For dealing with xlsx format. 
!pip install openpyxl

Requirement already satisfied: openpyxl in c:\users\rosha\appdata\local\programs\python\python39\lib\site-packages (3.0.7)
Requirement already satisfied: et-xmlfile in c:\users\rosha\appdata\local\programs\python\python39\lib\site-packages (from openpyxl) (1.1.0)


df = pd.read_excel("./airlines_dataset.xlsx")
df.head()


df.tail()


df.shape

(10683, 11)


df.info()

# Every column is string object except the price

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10683 entries, 0 to 10682
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Airline          10683 non-null  object
 1   Date_of_Journey  10683 non-null  object
 2   Source           10683 non-null  object
 3   Destination      10683 non-null  object
 4   Route            10682 non-null  object
 5   Dep_Time         10683 non-null  object
 6   Arrival_Time     10683 non-null  object
 7   Duration         10683 non-null  object
 8   Total_Stops      10682 non-null  object
 9   Additional_Info  10683 non-null  object
 10  Price            10683 non-null  int64 
dtypes: int64(1), object(10)
memory usage: 918.2+ KB


df.describe(include='all')


print('All airlines:',df.Airline.unique(),'\n')
# Names of every airlines

print('Total airlines count:',len(df.Airline.unique()),'\n')
# Total unique airlines count

print(df.Airline.describe(),'\n')
# Description of the column consisting of most frequent airways

print(df.Airline.isnull().unique(),'\n')
# Checking if there are any null values in the column

print(df.Airline.value_counts())
# Each airline with the number of records it has

All airlines: ['IndiGo' 'Air India' 'Jet Airways' 'SpiceJet' 'Multiple carriers' 'GoAir'
 'Vistara' 'Air Asia' 'Vistara Premium economy' 'Jet Airways Business'
 'Multiple carriers Premium economy' 'Trujet'] 

Total airlines count: 12 

count           10683
unique             12
top       Jet Airways
freq             3849
Name: Airline, dtype: object 

[False] 

Jet Airways                          3849
IndiGo                               2053
Air India                            1752
Multiple carriers                    1196
SpiceJet                              818
Vistara                               479
Air Asia                              319
GoAir                                 194
Multiple carriers Premium economy      13
Jet Airways Business                    6
Vistara Premium economy                 3
Trujet                                  1
Name: Airline, dtype: int64


df.Airline.value_counts().plot.pie(figsize=(9,9),labeldistance=None, legend=True)

<AxesSubplot:ylabel='Airline'>


# Plotting Price with respect to Airline
sns.catplot(y="Price", x="Airline", data=df.sort_values("Price", ascending=False), kind="boxen", height=6,aspect=3)

<seaborn.axisgrid.FacetGrid at 0x253a1e3bd60>


 date = pd.to_datetime(df.Date_of_Journey) 
#Converting Date_of_Journey object into a datetime64 object
date_only = date.dt.date 
# Keeping the date part only as the time by default is 00:00:00
print(date_only.sort_values(ascending=True).iloc[[0,1,df.shape[0]/2,-2,-1]],'\n')
# Printing top 2, middle and bottom 2 rows

959     2019-01-03
6336    2019-01-03
3292    2019-05-24
4997    2019-12-06
1081    2019-12-06
Name: Date_of_Journey, dtype: object


print("Earliest date is:", date_only.min())
print("Last date is:", date_only.max())

# The lower bound and the upper bound of data are January 3 to December 6, 2019.

Earliest date is: 2019-01-03
Last date is: 2019-12-06


print(date_only.value_counts(dropna=False).sort_index())

# Finding total flights in each day of the year

2019-01-03    199
2019-01-04    257
2019-01-05    277
2019-01-06    342
2019-03-03    315
2019-03-04    110
2019-03-05     90
2019-03-06    333
2019-03-15    162
2019-03-18    156
2019-03-21    423
2019-03-24    323
2019-03-27    299
2019-04-15     89
2019-04-18     67
2019-04-21     82
2019-04-24     92
2019-04-27     94
2019-05-15    405
2019-05-18    504
2019-05-21    497
2019-05-24    286
2019-05-27    382
2019-06-03    403
2019-06-04    100
2019-06-05    282
2019-06-06    503
2019-06-15    328
2019-06-18    105
2019-06-21    109
2019-06-24    351
2019-06-27    355
2019-09-03    302
2019-09-04    125
2019-09-05    484
2019-09-06    495
2019-12-03    142
2019-12-04     63
2019-12-05    259
2019-12-06    493
Name: Date_of_Journey, dtype: int64


date_only.value_counts().sort_index(ascending=False).plot(kind='barh',figsize=(10,10))

# Most flights were on May 18 (Saturday), June 6 (Thursday), September 6 (Friday) and December 6 (Friday).

<AxesSubplot:>


# Since the data consists of the year 2019 and it is repeated throughout the column, the year can be removed. 

# Renaming the column
df.rename(columns={"Date_of_Journey": "Date_of_Journey_2019"}, inplace=True)


# Removing the redundant date
df.Date_of_Journey_2019 = df.Date_of_Journey_2019.str.slice(stop=-5)

df


# Separating Journey Day
df['Journey_Day'] = pd.to_datetime(df.Date_of_Journey_2019, format="%d/%m").dt.day


# Separating Journey Month
df['Journey_Month'] = pd.to_datetime(df.Date_of_Journey_2019, format="%d/%m").dt.month


# dropping journey column
df.drop(columns=['Date_of_Journey_2019'],axis=1,inplace=True)

df


print(df.Source.unique(),'\n')
# Unique sources for airlines departure

print(df.Source.value_counts(),'\n')
# Departure source count from most to least

print(df.Source.isnull().unique(),'\n')
# Checking if there are NaN values in source

print(df.Destination.unique(),'\n')
# Unique destinations for airlines arrival

print(df.Destination.value_counts(),'\n')
# Arrival destination count from most to least

print(df.Destination.isnull().unique(),'\n')
# Checking if there are NaN values in destination

['Banglore' 'Kolkata' 'Delhi' 'Chennai' 'Mumbai'] 

Delhi       4537
Kolkata     2871
Banglore    2197
Mumbai       697
Chennai      381
Name: Source, dtype: int64 

[False] 

['New Delhi' 'Banglore' 'Cochin' 'Kolkata' 'Delhi' 'Hyderabad'] 

Cochin       4537
Banglore     2871
Delhi        1265
New Delhi     932
Hyderabad     697
Kolkata       381
Name: Destination, dtype: int64 

[False]


delhi_count = (df.Destination.values == ['Delhi']).sum()
print("Delhi count: ", delhi_count,'\n')
# Total instances of delhi data in destination

new_delhi_count = (df.Destination.values == ['New Delhi']).sum()
print("New Delhi count: ", new_delhi_count,'\n')
# Total instances of new delhi data in destination

# Total delhi instances
print("Total delhi count: ",delhi_count+new_delhi_count)
# Delhi would still come to the third to the most visited destinations after Cochin and Banglore.

Delhi count:  1265 

New Delhi count:  932 

Total delhi count:  2197


# Plotting price with respect to source
sns.catplot(y="Price", x="Source", data=df.sort_values("Price", ascending=False), kind="boxen", height=6,aspect=3)

<seaborn.axisgrid.FacetGrid at 0x2539a234d30>


len(df.Route.unique())
# All the plane flew in 129 unique routes

129


df.Route[0:10]
# Checking 10 instances of routes

0                BLR → DEL
1    CCU → IXR → BBI → BLR
2    DEL → LKO → BOM → COK
3          CCU → NAG → BLR
4          BLR → NAG → DEL
5                CCU → BLR
6          BLR → BOM → DEL
7          BLR → BOM → DEL
8          BLR → BOM → DEL
9          DEL → BOM → COK
Name: Route, dtype: object


df.Total_Stops[0:10]
# Checking 10 instances of total stops

0    non-stop
1     2 stops
2     2 stops
3      1 stop
4      1 stop
5    non-stop
6      1 stop
7      1 stop
8      1 stop
9      1 stop
Name: Total_Stops, dtype: object


print(df.Route.isnull().unique(),'\n')
# Checking if there are NaN values in route

# There ARE NaN values!!!

[False  True]


print(df.Total_Stops.isnull().unique(),'\n')
# Checking if there are NaN values in steps

# There ARE NaN values!!!

[False  True]


# Replacing stops string values with an integer

df.Total_Stops.replace(to_replace=['non-stop','1 stop','2 stops', '3 stops','4 stops'],value=[0,1,2,3,4], inplace=True)


stops = df.Total_Stops
stops.unique()

array([ 0.,  2.,  1.,  3., nan,  4.])


# Counting the nan values

stops.isnull().value_counts()

False    10682
True         1
Name: Total_Stops, dtype: int64


# Finding the index of nan values

print(np.argwhere(np.isnan(np.array(stops))))

df.iloc[9039]

[[9039]]

Airline               Air India
Source                    Delhi
Destination              Cochin
Route                       NaN
Dep_Time                  09:45
Arrival_Time       09:25 07 May
Duration                23h 40m
Total_Stops                 NaN
Additional_Info         No info
Price                      7480
Journey_Day                   6
Journey_Month                 5
Name: 9039, dtype: object


# Finding the most common routes and stops on the same airline with same sources, destinations and durations.

df[['Route','Total_Stops']].where((df.Source=='Delhi') & (df.Destination=='Cochin') &(df.Airline=='Air India') & (df.Duration=='23h 40m')).dropna().set_index('Route').value_counts()

# Since the most common route is DEL → HYD → MAA → COK with total_stops 2, the values shall be replaced now.

Total_Stops
2.0            14
1.0            12
dtype: int64


# Filling the values of routes
df.iloc[9039,3] = 'DEL → HYD → MAA → COK'

# Filling the value of stops
df.iloc[9039,7] = 2


# The value is replaced and no other nan values exist.

print(df.iloc[9039],'\n')

print(df.Total_Stops.isnull().value_counts())

Airline                        Air India
Source                             Delhi
Destination                       Cochin
Route              DEL → HYD → MAA → COK
Dep_Time                           09:45
Arrival_Time                09:25 07 May
Duration                         23h 40m
Total_Stops                          2.0
Additional_Info                  No info
Price                               7480
Journey_Day                            6
Journey_Month                          5
Name: 9039, dtype: object 

False    10683
Name: Total_Stops, dtype: int64


# Converting the float values to integers

df.Total_Stops = df.Total_Stops.apply(np.int64)
df.Total_Stops

0        0
1        2
2        2
3        1
4        1
        ..
10678    0
10679    0
10680    0
10681    0
10682    2
Name: Total_Stops, Length: 10683, dtype: int64


 df.head(4)


df.Additional_Info.value_counts(normalize=True).mul(100).round(8)

No info                         78.114762
In-flight meal not included     18.552841
No check-in baggage included     2.995413
1 Long layover                   0.177853
Change airports                  0.065525
Business class                   0.037443
No Info                          0.028082
2 Long layover                   0.009361
1 Short layover                  0.009361
Red-eye flight                   0.009361
Name: Additional_Info, dtype: float64


df.drop(['Additional_Info'], axis = 1, inplace = True)


df.head(3)


# Checking if any nan values on departure time
df.Dep_Time.isna().unique()

array([False])


# Formatting data into a more structured format
df['Dep_Hour'] = pd.to_datetime(df.Dep_Time).dt.hour

df['Dep_Min'] = pd.to_datetime(df.Dep_Time).dt.minute

# df.drop(['Dep_Time'], axis=1, inplace=True)

# dep_time =  pd.to_datetime(df.Dep_Time).dt.time
# print(dep_time)

# df.Dep_Time = dep_time


df.head()


# df.Arrival_Time


df['Arrival_Hour'] = pd.to_datetime(df.Arrival_Time).dt.hour
df['Arrival_Min'] = pd.to_datetime(df.Arrival_Time).dt.minute
# df.drop(['Arrival_Time'],axis=1, inplace=True)
# arrival_time =  pd.to_datetime(df.Arrival_Time).dt.time
# df.Arrival_Time = arrival_time


df.tail()


df["Dep_Time"]

0        22:20
1        05:50
2        09:25
3        18:05
4        16:50
         ...  
10678    19:55
10679    20:45
10680    08:20
10681    11:30
10682    10:55
Name: Dep_Time, Length: 10683, dtype: object


# Duration can be extracted like this.

dep_time =  pd.to_datetime(df.Dep_Time).dt.time
df.Dep_Time = dep_time

arrival_time =  pd.to_datetime(df.Arrival_Time).dt.time
df.Arrival_Time = arrival_time

from datetime import datetime, date

def time_between(df):
    return datetime.combine(date.today(), df["Arrival_Time"]) - datetime.combine(date.today(), df["Dep_Time"])

duration = df.apply(time_between, axis=1).astype('str')
duration

0        -1 days +02:50:00
1          0 days 07:25:00
2        -1 days +19:00:00
3          0 days 05:25:00
4          0 days 04:45:00
               ...        
10678      0 days 02:30:00
10679      0 days 02:35:00
10680      0 days 03:00:00
10681      0 days 02:40:00
10682      0 days 08:20:00
Length: 10683, dtype: object


df['Duration_hours']=duration.str.slice(start=-8,stop=-6)
df['Duration_mins']=duration.str.slice(start=-5,stop=-3)

df


# Dropping redundant columns

df.drop(columns=['Duration','Dep_Time','Arrival_Time'],inplace=True)

df


# Since route with the total_stops is redundant, the route column can be removed. 
df.drop(columns=['Route'],inplace=True)

df


sns.displot(df, x=df.Price,height=8.27, aspect=11.7/8.27)
# Visualizing the distribution of price

# The data is heavily distributed between 0 to 30,000.

<seaborn.axisgrid.FacetGrid at 0x253accc6730>


df.duplicated().value_counts()

# Total of 223 duplicated columns

False    10460
True       223
dtype: int64


df.drop_duplicates(inplace=True)


df.reset_index(drop=True, inplace=True)


df.duplicated().value_counts()

# No any data is duplicated.

False    10460
dtype: int64

df


df.Airline

0             IndiGo
1          Air India
2        Jet Airways
3             IndiGo
4             IndiGo
            ...     
10455       Air Asia
10456      Air India
10457    Jet Airways
10458        Vistara
10459      Air India
Name: Airline, Length: 10460, dtype: object


# Generating dummy variables and dropping the first to avoid the dummy variable trap

Airline = pd.get_dummies(df.Airline, drop_first=True,prefix='Airline')
Airline


df.Source.unique()

array(['Banglore', 'Kolkata', 'Delhi', 'Chennai', 'Mumbai'], dtype=object)


Source = pd.get_dummies(df.Source, drop_first=True,prefix='Source')
Source


# Similar with the destination column
Destination = pd.get_dummies(df.Destination, drop_first=True,prefix='Destination')
Destination


# All the encoded columns are now appended into the dataset

df = pd.concat([df,Airline,Source,Destination], axis=1)


df.drop(columns=['Airline','Source','Destination'], axis=1, inplace=True)

df


sns.boxplot(x=df.Price)

<AxesSubplot:xlabel='Price'>


# Finding outliers in the price column using Z-score
price_arr = np.array(df.Price)


outliers_index = []

def detect_outliers(data):
    threshold = 3 #3 standard deviation
    mean = np.mean(data)
    std = np.std(data)

    for i,value in enumerate(data):
        z_score = (value-mean)/std
        if np.abs(z_score)>threshold:
            outliers_index.append(i)
    return outliers_index


outliers_index = detect_outliers(price_arr)
print(outliers_index)

[123, 396, 486, 510, 597, 628, 657, 784, 825, 935, 945, 958, 974, 1194, 1244, 1339, 1420, 1462, 1474, 1625, 1650, 1778, 1909, 2044, 2087, 2096, 2483, 2543, 2604, 2621, 2677, 2904, 3010, 3088, 3231, 3372, 3505, 3667, 3974, 4476, 4627, 4779, 4960, 5080, 5312, 5378, 5597, 5636, 5645, 5654, 5673, 5680, 5789, 5916, 6240, 6332, 6497, 6509, 6526, 6901, 7253, 7258, 7279, 7437, 7454, 7515, 7608, 7619, 7649, 7794, 7909, 7967, 8325, 8344, 8408, 8466, 8715, 8796, 8813, 8844, 8873, 9038, 9081, 9455, 9536, 9785, 9862, 9920, 9965, 9987, 9992, 10146, 10157, 10176, 10229, 10299]


df.iloc[123].Price

27430


# Dropping all the outliers
df.drop(outliers_index,axis=0,inplace=True)


# Previous outlier data on the column has been deleted!

df.iloc[123].Price

3540


 sns.boxplot(x=df.Price)

<AxesSubplot:xlabel='Price'>


df.columns

Index(['Total_Stops', 'Price', 'Journey_Day', 'Journey_Month', 'Dep_Hour',
       'Dep_Min', 'Arrival_Hour', 'Arrival_Min', 'Duration_hours',
       'Duration_mins', 'Airline_Air India', 'Airline_GoAir', 'Airline_IndiGo',
       'Airline_Jet Airways', 'Airline_Jet Airways Business',
       'Airline_Multiple carriers',
       'Airline_Multiple carriers Premium economy', 'Airline_SpiceJet',
       'Airline_Trujet', 'Airline_Vistara', 'Airline_Vistara Premium economy',
       'Source_Chennai', 'Source_Delhi', 'Source_Kolkata', 'Source_Mumbai',
       'Destination_Cochin', 'Destination_Delhi', 'Destination_Hyderabad',
       'Destination_Kolkata', 'Destination_New Delhi'],
      dtype='object')


# Rearranging the columns

X = df.loc[:,['Journey_Day', 'Journey_Month', 'Dep_Hour',
       'Dep_Min', 'Arrival_Hour', 'Arrival_Min', 'Duration_hours',
       'Duration_mins', 'Airline_Air India', 'Airline_GoAir', 'Airline_IndiGo',
       'Airline_Jet Airways', 'Airline_Jet Airways Business',
       'Airline_Multiple carriers',
       'Airline_Multiple carriers Premium economy', 'Airline_SpiceJet',
       'Airline_Trujet', 'Airline_Vistara', 'Airline_Vistara Premium economy',
       'Source_Chennai', 'Source_Delhi', 'Source_Kolkata', 'Source_Mumbai',
       'Destination_Cochin', 'Destination_Delhi', 'Destination_Hyderabad',
       'Destination_Kolkata', 'Destination_New Delhi','Total_Stops']]


X.tail()


X_prime = df.loc[:,['Journey_Day', 'Journey_Month', 'Dep_Hour',
       'Dep_Min', 'Arrival_Hour', 'Arrival_Min', 'Duration_hours',
       'Duration_mins','Total_Stops']]


# Checking correlation
plt.figure(figsize=(10,10))
sns.heatmap(X_prime.corr(),annot=True)
plt.show()

# Highly correlated values can be dropped from here.


y = df.Price


# Important feature selection using ExtraTreesRegressor

from sklearn.ensemble import ExtraTreesRegressor
selection = ExtraTreesRegressor()
selection.fit(X,y)

ExtraTreesRegressor()


# Trees regressor has found the important features

print(selection.feature_importances_)

[8.84505832e-02 7.30460088e-02 2.40616732e-02 2.24943039e-02
 2.28831276e-02 1.99045743e-02 1.33983891e-01 1.71266483e-02
 1.73809256e-02 2.70220294e-03 1.93073292e-02 1.55322108e-01
 0.00000000e+00 2.29486866e-02 1.15775356e-03 4.21383542e-03
 2.09388912e-04 7.20350652e-03 1.33075561e-04 6.91186561e-04
 1.60152289e-02 8.95431280e-03 6.55324079e-03 1.87667878e-02
 1.95896673e-02 8.43652587e-03 6.04024879e-04 1.42815753e-02
 2.73577827e-01]


# Plotting the values for better visualization
plt.figure(figsize=(12,8))
feature_imp = pd.Series(selection.feature_importances_, index=X.columns)
feature_imp.nlargest(20).plot(kind='barh')
plt.show()


profile = ProfileReport(df,title="Airlines Profiling Report")


profile

Airline Data Analysis by Roshan Parajuli

Short introduction about dataset:

Airline

Date_of_journey

Source and Destination

Routes and Stops

Additional_Info

Departure Time, Arrival Time and Duration

Price

Removing duplicate data

Handling categorical data

Finding outliers

Feature selection

Profiling report

Data Wrangling

Data Cleaning

Data Integration

Data Reduction

Data Transformation

	Airline	Date_of_Journey	Source	Destination	Route	Dep_Time	Arrival_Time	Duration	Total_Stops	Additional_Info	Price
0	IndiGo	24/03/2019	Banglore	New Delhi	BLR → DEL	22:20	01:10 22 Mar	2h 50m	non-stop	No info	3897
1	Air India	1/05/2019	Kolkata	Banglore	CCU → IXR → BBI → BLR	05:50	13:15	7h 25m	2 stops	No info	7662
2	Jet Airways	9/06/2019	Delhi	Cochin	DEL → LKO → BOM → COK	09:25	04:25 10 Jun	19h	2 stops	No info	13882
3	IndiGo	12/05/2019	Kolkata	Banglore	CCU → NAG → BLR	18:05	23:30	5h 25m	1 stop	No info	6218
4	IndiGo	01/03/2019	Banglore	New Delhi	BLR → NAG → DEL	16:50	21:35	4h 45m	1 stop	No info	13302

	Airline	Date_of_Journey	Source	Destination	Route	Dep_Time	Arrival_Time	Duration	Total_Stops	Additional_Info	Price
10678	Air Asia	9/04/2019	Kolkata	Banglore	CCU → BLR	19:55	22:25	2h 30m	non-stop	No info	4107
10679	Air India	27/04/2019	Kolkata	Banglore	CCU → BLR	20:45	23:20	2h 35m	non-stop	No info	4145
10680	Jet Airways	27/04/2019	Banglore	Delhi	BLR → DEL	08:20	11:20	3h	non-stop	No info	7229
10681	Vistara	01/03/2019	Banglore	New Delhi	BLR → DEL	11:30	14:10	2h 40m	non-stop	No info	12648
10682	Air India	9/05/2019	Delhi	Cochin	DEL → GOI → BOM → COK	10:55	19:15	8h 20m	2 stops	No info	11753

	Airline	Date_of_Journey	Source	Destination	Route	Dep_Time	Arrival_Time	Duration	Total_Stops	Additional_Info	Price
count	10683	10683	10683	10683	10682	10683	10683	10683	10682	10683	10683.000000
unique	12	44	5	6	128	222	1343	368	5	10	NaN
top	Jet Airways	18/05/2019	Delhi	Cochin	DEL → BOM → COK	18:55	19:00	2h 50m	1 stop	No info	NaN
freq	3849	504	4537	4537	2376	233	423	550	5625	8345	NaN
mean	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	9087.064121
std	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	4611.359167
min	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1759.000000
25%	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	5277.000000
50%	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	8372.000000
75%	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	12373.000000
max	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	79512.000000

	Airline	Date_of_Journey_2019	Source	Destination	Route	Dep_Time	Arrival_Time	Duration	Total_Stops	Additional_Info	Price
0	IndiGo	24/03	Banglore	New Delhi	BLR → DEL	22:20	01:10 22 Mar	2h 50m	non-stop	No info	3897
1	Air India	1/05	Kolkata	Banglore	CCU → IXR → BBI → BLR	05:50	13:15	7h 25m	2 stops	No info	7662
2	Jet Airways	9/06	Delhi	Cochin	DEL → LKO → BOM → COK	09:25	04:25 10 Jun	19h	2 stops	No info	13882
3	IndiGo	12/05	Kolkata	Banglore	CCU → NAG → BLR	18:05	23:30	5h 25m	1 stop	No info	6218
4	IndiGo	01/03	Banglore	New Delhi	BLR → NAG → DEL	16:50	21:35	4h 45m	1 stop	No info	13302
...	...	...	...	...	...	...	...	...	...	...	...
10678	Air Asia	9/04	Kolkata	Banglore	CCU → BLR	19:55	22:25	2h 30m	non-stop	No info	4107
10679	Air India	27/04	Kolkata	Banglore	CCU → BLR	20:45	23:20	2h 35m	non-stop	No info	4145
10680	Jet Airways	27/04	Banglore	Delhi	BLR → DEL	08:20	11:20	3h	non-stop	No info	7229
10681	Vistara	01/03	Banglore	New Delhi	BLR → DEL	11:30	14:10	2h 40m	non-stop	No info	12648
10682	Air India	9/05	Delhi	Cochin	DEL → GOI → BOM → COK	10:55	19:15	8h 20m	2 stops	No info	11753

	Airline_Air India	Airline_GoAir	Airline_IndiGo	Airline_Jet Airways	Airline_Jet Airways Business	Airline_Multiple carriers	Airline_Multiple carriers Premium economy	Airline_SpiceJet	Airline_Trujet	Airline_Vistara	Airline_Vistara Premium economy
0	0	0	1	0	0	0	0	0	0	0	0
1	1	0	0	0	0	0	0	0	0	0	0
2	0	0	0	1	0	0	0	0	0	0	0
3	0	0	1	0	0	0	0	0	0	0	0
4	0	0	1	0	0	0	0	0	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...
10455	0	0	0	0	0	0	0	0	0	0	0
10456	1	0	0	0	0	0	0	0	0	0	0
10457	0	0	0	1	0	0	0	0	0	0	0
10458	0	0	0	0	0	0	0	0	0	1	0
10459	1	0	0	0	0	0	0	0	0	0	0