Click here to Skip to main content
15,945,119 members
Please Sign up or sign in to vote.
1.00/5 (1 vote)
See more:
this is my code to scrape the player data from this link
http://howstat.com/cricket/Statistics/IPL/PlayerList.asp
Python
from bs4 import BeautifulSoup
import pandas as pd
import requests as rq
import numpy as np
def remove(string):
    ns=""
    for i in string:
        if(not i.isspace()):
            ns+=i
    return ns
def remove_char(str):
    ns=""
    bad_chars = [':']
    for i in str:
        if i not in bad_chars:
            ns+=i
    return ns
def player_stat(url):
    get_url=rq.get(url)
    soup = BeautifulSoup(get_url.text,"html.parser")
    
    try:
        table = soup.find('table',attrs= {'border': '0', 'width': '270' , 'cellpadding': '4', 'class': 'desktop'})
        table2 = soup.find('table', attrs= {'border': '0', 'width': '600', 'cellpadding': '0', 'cellspacing': '0'})
        
        name=table2.find('td', attrs={'width': '125'})
        
        nm= name.text
        res= " ".join(nm.split())
        play={}
        namedic = {'Name': res}
        play.update(namedic)
        
        rows = table.findAll('tr')
        
        for row in rows:
            cells= row.findAll('td')
            try:
                a=" ".join(cells[0].text.split())
                b=" ".join(cells[1].text.split())
                a = remove_char(a)
                print(a)
                if(not b):
                    b  = 0
                if b!= 'N/A':
                    updict = {a : b}
                else:
                    updict = {a : np.nan}
            except:
                updict = {a : "na"}
            play.update(updict)
            
        return play
    except:
        return()
base_url = "http://howstat.com/cricket/Statistics/IPL/PlayerOverview.asp?PlayerID="

result = pd.DataFrame()
num=0
player_id=['3916', '4002', '4331', '5863', '3694', '3180', '4756', '4191', '2159', '4104', '4658', '4059', '4539', '5859', '4131', '4271', '4826', '4138', '3236', '4205', '3983', '4134', '4136', '4106', '5019', '4940', '5858', '4155', '4119', '4120', '4777', '5970', '4933', '4121', '4170', '4107', '4556', '3788', '4749', '4146', '4673', '3982', '2113', '6538', '3959', '3608', '3929', '3900', '3057', '2999', '5660', '3739', '4663', '4659', '6528', '4655', '3951', '4705', '4384', '4054', '4211', '4040', '4409', '4159', '4428', '4089', '4041', '4075', '3757', '4152', '4055', '4061', '4511', '3631', '2996', '3460', '4158', '3329', '2116', '3924', '3911', '3908', '3166', '3703', '6517', '4062', '3888', '4726', '4394', '4169', '4063', '4557', '4670', '4380', '4030', '1979', '4091', '4092', '3351', '4157', '4042', '3136', '4779', '4043', '6507', '3766', '3067', '2973', '5924', '4093', '4249', '4001', '3909', '4787', '4693', '3995', '4739', '3973', '4056', '4176', '5332', '6537', '3936', '3988', '3927', '4696', '3241', '6533', '3724', '5849', '4207', '2668', '4064', '3832', '3243', '2197', '3740', '4180', '4190', '4195', '2060', '3847', '4171', '4945', '4559', '3208', '4337', '4074', '3125', '6082', '3930', '4185', '5955', '3643', '4609', '2223', '3845', '1977', '2148', '2263', '4162', '4931', '3107', '4188', '2059', '5866', '6203', '4032', '3159', '2217', '3615', '4161', '2080', '2192', '4206', '4139', '4076', '3569', '4378', '4094', '4668', '5853', '3633', '4212', '4307', '2707', '3887', '4558', '2138', '4045', '4095', '4935', '4077', '4209', '3638', '3662', '3831', '1976', '3799', '4386', '3679', '4273', '3680', '4257', '4681', '3455', '3330', '2068', '3998', '4404', '3273', '3595', '3160', '3850', '4217', '3823', '4057', '4544', '4387', '5971', '4122', '4198', '3644', '4033', '2208', '4751', '4196', '5857', '4130', '5774', '5974', '4135', '0872', '2104', '4179', '3244', '3334', '4238', '4575', '2053', '4029', '2213', '2043', '3746', '4150', '3056', '4149', '3340', '4303', '3210', '2209', '5856', '2743', '4034', '4168', '4545', '4204', '2258', '6512', '4408', '2245', '3842', '4669', '4144', '5846', '4731', '4202', '4151', '3600', '4685', '3147', '4652', '4066', '3991', '3514', '6579', '0962', '4398', '4053', '5848', '4757', '2991', '3657', '2079', '2205', '2139', '3939', '4538', '4692', '4541', '3781', '4108', '4193', '4244', '3171', '4145', '4079', '4694', '4203', '4096', '3178', '4046', '4080', '6527', '6534', '4772', '4701', '3912', '3561', '1098', '3478', '2995', '3628', '3969', '3984', '4825', '3008', '3685', '3632', '1133', '3735', '3736', '4097', '3545', '3787', '5949', '5969', '4780', '3783', '4567', '3839', '2262', '3108', '4186', '3345', '4850', '3767', '2964', '3245', '3101', '3665', '3993', '4675', '4773', '4753', '4026', '3416', '3162', '3451', '3989', '4140', '4714', '3864', '4208', '3985', '1234', '4081', '4414', '4411', '4035', '4036', '4766', '4392', '4123', '4936', '4173', '3697', '4164', '3922', '4183', '4664', '4543', '3704', '3986', '4038', '2168', '2970', '4201', '4636', '4200', '3531', '4929', '2137', '3789', '3574', '2811', '4949', '4948', '3725', '4082', '4083', '4399', '4552', '3790', '4542', '4943', '4124', '4189', '3642', '4311', '4310', '4125', '3352', '4147', '3035', '5968', '4197', '3156', '3550', '6613', '5980', '4167', '3854', '4742', '4194', '4175', '3994', '3747', '3049', '5860', '4647', '3238', '3331', '3465', '2039', '3691', '2041', '4587', '5850', '3164', '4605', '4937', '5979', '4109', '4930', '4110', '4589', '2201', '4939', '3826', '3974', '4339', '3889', '4137', '3287', '5509', '4133', '4216', '3149', '6570', '4400', '4111', '3696', '4484', '4942', '4154', '5851', '4112', '4085', '4069', '4113', '4650', '3996', '4199', '3611', '4000', '3573', '4321', '4775', '4325', '3836', '4838', '3532', '4098', '3759', '4405', '6535', '4047', '4656', '5978', '3132', '4141', '3106', '4114', '3228', '5847', '4099', '2255', '4048', '4126', '4374', '2226', '4058', '4667', '4429', '3907', '4116', '4174', '4172', '2220', '4049', '4070', '4767', '2878', '4732', '6544', '3928', '2974', '4132', '5861', '2152', '5976', '4178', '4177', '3426', '4562', '4027', '3470', '4554', '6539', '4115', '4028', '3923', '3474', '4759', '5662', '4768', '4377', '4160', '2124', '2879', '6520', '4755', '4729', '4769', '2880', '4100', '2882', '3447', '4101', '4555', '6523', '4665', '3332', '4657', '6509', '3354', '3157', '3014', '4748', '3756', '4243', '2888', '3502', '4127', '6508', '3542', '4512', '3327', '2893', '4184', '4156', '4626', '3830', '3242', '4389', '3883', '6607', '2899', '4401', '4210', '4086', '4192', '4213', '4050', '4071', '4549', '4128', '2907', '2975', '3319', '4166', '4118', '4072', '3122', '3348', '3407', '1735', '4379', '4393', '6178', '3824', '3700', '4822', '4143', '3126', '6522', '3530', '3833', '6510', '4679', '4102', '4648', '3743', '4383', '3699', '3100', '6204', '3838', '4181', '4087', '3355', '2000', '4051', '4817', '3658', '3339', '3129', '4938', '3288', '2090', '4163', '4117', '3626', '4165', '4513', '3782', '5865', '4678', '3463', '4052', '3910', '4088', '1856', '3637', '4684', '3017', '3325', '4228', '4420', '3817', '3846', '4421', '3499', '4142', '4390', '5977', '4073', '3786', '4103', '4153', '4148', '2211', '2949', '4527', '4129', '2095']
for x in player_id:
    url=base_url+str(x)
    stats = player_stat(url)
    if stats:
        df1=pd.Dataframe(stats, index=[num])
        num=num+1
        
pd.set_option('display.max_columns', None)
results = result.drop(['Batting', 'Fielding', 'Bowling'], axis=0)
results = result
results.drop(results.columns[16],axis=1, inplace=True)
result1=results
result1.fillna("0", inplace=True)
result.head()
print(len(result1.columns))
print(len(result1))
data= result.to_csv('ssssample.csv', index=False)

but I get this error: "['Batting' 'Fielding' 'Bowling'] not found in axis"

can someone tell me how to clear this error or how to modify this code to make it scrape the players data from the website

What I have tried:

i tried to resolve the problem but i couldnt
Posted
Updated 4-Apr-23 22:19pm
v2
Comments
Richard Deeming 5-Apr-23 4:21am    
I've removed the link from your question, since we get a lot of "cricket score" spammers here, and you don't want to look like one of them. :)
Prasanna Ravi JR 9-Apr-23 7:00am    
oh ok

1 solution

results.drop(results.columns[16],axis=1, inplace=True)

This drop appears to be causing the problem. If you look at the link below I think it will solve your problem.


https://stackoverflow.com/questions/73509378/getting-keyerror-not-found-in-axis-when-trying-to-drop-a-column-from-datafram[^]
 
Share this answer
 
Comments
Prasanna Ravi JR 9-Apr-23 7:00am    
thank you

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900