January
5th,
2013
I was using the Advanced Search in Github the other day and thought: what if I could use this in a terminal. So I started to try out some things in Python which led to the following script.
Some comments about the script

- It is an interactive script that you run from a terminal, main options are (n) for new search and (s) for show script snippet. You first search for a keyword and optionally a programming language and number result pages to parse. See an example at the end of this post ...
- It takes these args and builds the right search URL (base url =https://github.com/search?q=) and uses html2text.theinfo.org to strip out the html (I tried the remote versionhere, for local use just download and import the html2text Python module, see my last post for an example).
- It filters the relevant html with re.split(r"seconds)|## Breakdown", html) - this is based on what html2text makes of github's html markup.
- When choosing (s) and then a number of the search result the method "show_script_context" imports the raw script and shows the line that matches the search string with 8 lines before and after (like grep -A8 -B8 would do)
- You can use Conque to run this in a split window in Vim which allows you to copy output to the script you are working on.
The code
See below and on github:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: Bob Belderbos / written: Dec 2012
# Purpose: have an interactive github cli search app
#
import re, sys, urllib, pprint
# import html2text # -- to use local version
class GithubSearch:
""" This is a command line wrapper around Github's Advanced Search
https://github.com/search """
def __init__(self):
""" Setup variables """
self.searchTerm = ""
self.scripts = []
self.show_menu()
def show_menu(self):
""" Show a menu to interactively use this program """
prompt = """
(N)ew search
(S)how more context (github script)
(Q)uit
Enter choice: """
while True:
chosen = False
while not chosen:
try:
choice = raw_input(prompt).strip().lower()
except (EOFError, KeyboardInterrupt):
choice = 'q'
except:
sys.exit("Not a valid option")
print 'nYou picked: [%s]' % choice
if choice not in 'nsq':
print "This is an invalid option, try again"
else:
chosen = True
if choice == 'q': sys.exit("Goodbye!")
if choice == 'n': self.new_search()
if choice == 's': self.show_script_context()
def new_search(self):
""" Take the input field info for the advanced git search """
#Â reset script url tracking list and counter
self.scripts = []
self.counter = 0
#Â take user input to define the search
try:
self.searchTerm = raw_input("Enter search term: ").strip().lower().replace(" ", "+")
except:
sys.exit("Error handling this search term, exiting ...")
lang = raw_input("Filter on programming language (press Enter to include all): ").strip().lower()
try:
prompt = "Number of search pages to process (default = 3): "
numSearchPages = int(raw_input(prompt).strip()[0])
except:
numSearchPages = 3
# get the search results
for page in range(1,numSearchPages+1):
results = self.get_search_results(page, lang)
for result in results[1].split("##"): # each search result is divided by ##
self.parse_search_result(result)
def get_search_results(self, page, lang):
""" Query github's advanced search and re.split for the relevant piece of info
RFE: have a branch to use html2text local copy if present, vs. remote if not """
githubSearchUrl = "https://github.com/search?q="
searchUrl = urllib.quote_plus("%s%s&p=%s&ref=searchbar&type=Code&l=%s" %
(githubSearchUrl, self.searchTerm, page, lang))
html2textUrl = "http://html2text.theinfo.org/?url="
queryUrl = html2textUrl+searchUrl
html = urllib.urlopen(queryUrl).read()
return re.split(r"seconds)|## Breakdown", html)
def parse_search_result(self, result):
""" Process the search results, also store each script URL in a list for reference """
lines = result.split("n")
source = "".join(lines[0:2])
pattern = re.compile(r".*((.*?))s+((.*?)).*")
m = pattern.match(source)
if m != None:
self.counter += 1
url = "https://raw.github.com%s" % m.group(1).replace("tree/", "")
lang = m.group(2)
self.print_banner(lang, url)
self.scripts.append(url) # keep track of script links
for line in lines[2:]:
# ignore pagination markup
if "github.com" in line or "https://git" in line or "[Next" in line: continue
if line.strip() == "": continue
print line
def print_banner(self, lang, url):
""" Print the script, lang, etc. in a clearly formatted way """
print "n" + "+" * 125
print "(%i) %s / src: %s" % (self.counter, lang, url)
def show_script_context(self, script_num=""):
""" Another menu option to show more context from the github script
surrounding or leading up to the search term """
if len(self.scripts) == 0:
print "There are no search results yet, so cannot show any scripts yet."
return False
script_num = int(raw_input("Enter search result number: ").strip())
script = self.scripts[script_num-1] # list starts with index 0 = 1 less than counter
a = urllib.urlopen(script)
if a.getcode() != 200:
print "The requested script did not give a 200 return code"
return False
lines = a.readlines()
a.close()
if len(lines) == 0:
print "Did not get content back from script, maybe it is gone?"
return False
num_context_lines = 8
print "nExtracting more context for search term <%s> ..." % self.searchTerm
print "Showing %i lines before and after the match in the original script hosted here:n%sn" %
(num_context_lines, script)
for i, line in enumerate(lines):
if self.searchTerm.lower() in line.lower():
print "n... %s found at line %i ..." % (self.searchTerm, i)
j = i - num_context_lines
for x in lines[i-num_context_lines : i+num_context_lines]:
if self.searchTerm.lower() in x.lower():
print "%i ---> %s" % (j, x), # makes the match stand out
else:
print "%i %s" % (j, x),
j += 1
###Â instant
github = GithubSearch()
See it in action
$ vi github_search.py
(N)ew search
(S)how more context (github script)
(Q)uit
Enter choice: N
You picked: [n]
Enter search term: os.system
Filter on programming language (press Enter to include all): python
Number of search pages to process (default = 3):
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
(1) Python / src: https://raw.github.com/fhopecc/stxt/1a14c802362047af4c9f6d5ec2312a57cbc9bca6/task/setup_win.py
import os
_os.system_(
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
(2) Python / src: https://raw.github.com/fhopecc/stxt/325dc6e2cbfecc9d071264f71aee7b156a8a6970/task/shutdown.py
import os
_os.system_('shutdown -s -f')
..
..
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
(9) Python / src: https://raw.github.com/rob0r/snmpnetif/b6228f3ba6c55a7f8119af3a1bd4c014f5533b9b/snmpnetif.py
(True):
try:
# clear the screen
if os.name == 'nt': clearscreen = _os.system_
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
(10) Python / src: https://raw.github.com/trey0/geocamShare/98029ffb1d26784346f7a2e5984048e8764df116/djangoWsgi.py
.mkstemp('djangoWsgiSourceMe.txt')
os.close(fd)
_os.system_('bash -c "(source %s/sourceme.sh && printenv > %s
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
(11) Python / src: https://raw.github.com/jphcoi/MLMTC/48029dd647dc17173ed94693deccbb8d7bb42ed6/map_builder/CFpipe.py
_sys
try:
_os.system_(command_sys)
except:
'----------------------------------detection de communaut
(N)ew search
(S)how more context (github script)
(Q)uit
Enter choice: s
You picked: [s]
Enter search result number: 9
Extracting more context for search term <os.system> ...
Showing 8 lines before and after the match in the original script hosted here:
https://raw.github.com/rob0r/snmpnetif/b6228f3ba6c55a7f8119af3a1bd4c014f5533b9b/snmpnetif.py
... os.system found at line 250 ...
242 ifidx = self.ifactive()
243
244 # get active interface names
245 ifnames = self.ifnames(ifidx)
246
247 while(True):
248 try:
249 # clear the screen
250 ---> if os.name == 'nt': clearscreen = os.system('cls')
251 ---> if os.name == 'posix': clearscreen = os.system('clear')
252
253 # print the device name and uptime
254 print(devicename)
255 print('Device uptime: {0}n').format(self.devuptime())
256
257 # print stats if the first loop has run
..
(N)ew search
(S)how more context (github script)
(Q)uit
Enter choice: n
You picked: [n]
Enter search term: grep
Filter on programming language (press Enter to include all): perl
Number of search pages to process (default = 3):
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
(1) Perl / src: https://raw.github.com/xPapers/xPapers/1fe2bf177e3d37f2024d00601340627a8ded85ad/lib/xPapers/Cat.pm
->catCount($me->catCount-1);
$me->save;
$me->clear_cache;
# detach
$me->cat_memberships([_grep_
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
(2) Perl / src: https://raw.github.com/roethigj/Lx-Office-Anpassungen/e06afb2fc94573bc4a305a41e95a8b7a812e2db0/SL/IS.pm
->{TEMPLATE_ARRAYS}->{$_} }, "") } _grep_({ $_ ne "description" } @arrays));
}
$form
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
(3) Perl / src: https://raw.github.com/roethigj/Lx-Office-Anpassungen/e06afb2fc94573bc4a305a41e95a8b7a812e2db0/SL/OE.pm
->{sort} && _grep_($form->{sort}, keys(%allowed_sort_columns))) {
$sortorder = $allowed
(N)ew search
(S)how more context (github script)
(Q)uit
Enter choice: s
You picked: [s]
Enter search result number: 3
Extracting more context for search term <grep> ...
Showing 8 lines before and after the match in the original script hosted here:
https://raw.github.com/roethigj/Lx-Office-Anpassungen/e06afb2fc94573bc4a305a41e95a8b7a812e2db0/SL/OE.pm
... grep found at line 210 ...
202 "ordnumber" => "o.ordnumber",
203 "quonumber" => "o.quonumber",
204 "name" => "ct.name",
205 "employee" => "e.name",
206 "salesman" => "e.name",
207 "shipvia" => "o.shipvia",
208 "transaction_description" => "o.transaction_description"
209 );
210 ---> if ($form->{sort} && grep($form->{sort}, keys(%allowed_sort_columns))) {
211 $sortorder = $allowed_sort_columns{$form->{sort}} . " ${sortdir}";
212 }
213 $query .= qq| ORDER by | . $sortorder;
214
215 my $sth = $dbh->prepare($query);
216 $sth->execute(@values) ||
217 $form->dberror($query . " (" . join(", ", @values) . ")");
... grep found at line 1135 ...
1127 my $sameitem = "";
1128 foreach $item (sort { $a->[1] cmp $b->[1] } @partsgroup) {
1129 $i = $item->[0];
1130
1131 if ($item->[1] ne $sameitem) {
1132 push(@{ $form->{TEMPLATE_ARRAYS}->{description} }, qq|$item->[1]|);
1133 $sameitem = $item->[1];
1134
1135 ---> map({ push(@{ $form->{TEMPLATE_ARRAYS}->{$_} }, "") } grep({ $_ ne "description" } @arrays));
1136 }
1137
1138 $form->{"qty_$i"} = $form->parse_amount($myconfig, $form->{"qty_$i"});
1139
1140 if ($form->{"id_$i"} != 0) {
1141
1142 # add number, description and qty to $form->{number}, ....
..
(N)ew search
(S)how more context (github script)
(Q)uit
Enter choice: q
You picked: [q]
Goodbye!
shell returned 1
