把参考文献列表搬过来

把此前整理的参考文献搬过来,方便大家将来查找:

学术论文相关

2013年-2014年

Notes: Ref, NoTag

@article{Picariello:2014ve,
author = {Picariello, F and Rapuano, S and Villano, U},
title = {{A portable measurement system for power profiling of processing units}},
journal = {Measurement},
year = {2014}
}
Notes: Recommend, UIBench, Download

@webpage{Patil:2014vs,
author = {Patil, Chetan Arvind and Memik, Gokhan},
title = {{\textbf{UIBench : User Interaction Benchmark Suite For}
\textbf{Architectural Simulators}}},
year = {2014},
month = mar,
url = {http://chetanpatil.info/wp-content/uploads/2013/12/Version2_TechnicalReport_UIBench_User_Interaction_Benchmark_Suite_For_Architectural_Simulators.pdf}
}
Notes: Ref, NoTag

@article{Leite:2014vq,
author = {Leite, A and Tadonki, C and Eisenbeis, C and de Melo, A},
title = {{A Fine-grained Approach for Power Consumption Analysis and Prediction}},
journal = {Procedia Computer Science},
year = {2014}
}
Notes: Ref, NoTag

@article{ChenHan:vt,
author = {Chen-Han, TNJM and Sankaralingam, H K},
title = {{gem5, GPGPUSim, McPAT, GPUWattch,” Your favorite simulator here” Considered Harmful}},
journal = {research.cs.wisc.edu
}
}
Notes: Ext, NoTag

@article{Lang:2014fo,
author = {Lang, Jens and R{\”u}nger, Gudula},
title = {{An execution time and energy model for an energy-aware execution of a conjugate gradient method with CPU/GPU collaboration}},
journal = {Journal of Parallel and Distributed Computing},
year = {2014},
month = jun
}
Notes: VIP, gem5Error’ARM14, Download

@article{Gutierrez:2014jw,
author = {Gutierrez, A and Pusdesris, J and Dreslinski, R G and Mudge, T and Sudanthi, C and Emmons, C D and Hayenga, M and Paver, N},
title = {{Sources of error in full-system simulation}},
journal = {Performance Analysis of Systems and Software (ISPASS), 2014 IEEE International Symposium on},
year = {2014},
pages = {13–22},
annote = {这篇文章的关键在于指出了gem5在全系统仿真环境下,与ARM Versatile TC2开发板的性能差异:其中,SPEC2006的运行时平均绝对误差在13%左右;二PARSEC的运行时平均绝对误差在16%左右。
文章对比了当前大量使用的多种支持全系统仿真的仿真器,包括Flexus、gem5、GEMS、MARSS、OVPSim、PTLsim以及simics等。
这篇文章的对比对象是A15双核处理器,因此包含了A15双核的一些微结构参数,特别是系统中DDR的参数。文章还指出:gem5的默认配置的TLB是64-entry的,而A15是1个32-entry的指令TLB+2个分离的32-entry数据TLB。分支预测单元两者完全一样,都是bi-mode分支预测器。此外,由于A15的fetch缓冲仅有16B,而不是一个cache行大小(gem5),因此gem5会大幅低估指令cache访问的次数,需要修改并保证仅有critical的16B返回。另外还有cache跨行的问题需要处理。
Gem5的存储系统采用MOESI一致性协议,与A15非常类似,但L1的容量应该设置为32kB(默认是64kB),L2 cache设置为1MB(默认2MB),关联度修改为16(默认为8),A15采用的是随机L2替换策略,而gem5采用的是LRU,这个需要手工实现。
此外,gem5不支持I-cache无效指令,需要添加。
对于DRAM部分,采用simpleDRAM模型,但按照Micro调整参数,并采用20ns作为未知延时的经验参数。
关闭gem5和A15的预取器,因为A15有一种专门探测和加速流访存的的硬件流加速器(针对类似于memcpy等操作)。
}
}
Notes: Recommend, AndroidProfing’Trans14, Download

@article{Tu:2014ff,
author = {Tu, Chia-Heng and Hsu, Hui-Hsin and Chen, Jen-Hao and Chen, Chun-Han and Hung, Shih-Hao},
title = {{Performance and power profiling for emulated Android systems}},
journal = {Transactions on Design Automation of Electronic Systems (TODAES},
year = {2014},
volume = {19},
number = {2},
month = mar
}
Notes: Ext, NoTag

@inproceedings{Muthukaruppan:2014wn,
author = {Muthukaruppan, T Somu and Pathania, A},
title = {{Price theory based power management for heterogeneous multi-cores}},
booktitle = {Proceedings of the 19th {\ldots}},
year = {2014}
}
Notes: VIP, MobyBench’ISPASS14, Download

@article{Huang:ty,
author = {Huang, Y and Zha, Z and Chen, M and Zhang, L},
title = {{Moby: A Mobile Benchmark Suite for Architectural Simulators}},
journal = {asg.ict.ac.cn
}
Notes: Recommend, AndroidPower’14, Download

@article{Rethinagiri:2014kl,
author = {Rethinagiri, Santhosh Kumar and Palomar, Oscar and Ben Atitallah, Rabie and Niar, Smail and Unsal, Osman and Kestelman, Adrian Cristal},
title = {{System-level power estimation tool for embedded processor based platforms}},
journal = {the 6th Workshop},
year = {2014},
pages = {5–8},
month = jan
}
Notes: Ext, NoTag

@article{LopezNovoa:2014vi,
author = {Lopez-Novoa, U and Mendiburu, A and Miguel-Alonso, J},
title = {{A Survey of Performance Modeling and Simulation Techniques for Accelerator-based Computing}},
year = {2014}
}
Notes: Ref, NoTag

@article{Sartor:2013ib,
author = {Sartor, Anderson Luiz and Correa, Ulisses Brisolara and Filho, Antonio Carlos Schneider Beck},
title = {{AndroProf: A Profiling Tool for the Android Platform}},
journal = {Computing Systems Engineering (SBESC), 2013 III Brazilian Symposium on},
year = {2013},
pages = {23–28}
}
Notes: Ref, BottleGraph
@inproceedings{DuBois:2013de,
author = {Du Bois, Kristof and Sartor, Jennifer B and Eyerman, Stijn and Eeckhout, Lieven},
title = {{Bottle graphs: visualizing scalability bottlenecks in multi-threaded applications}},
booktitle = {OOPSLA ’13: Proceedings of the 2013 ACM SIGPLAN international conference on Object oriented programming systems languages {\&} applications},
year = {2013},
pages = {355–372},
publisher = { ACM Request Permissions},
address = {New York, New York, USA},
month = oct
}
Notes: VIP, ARMPCA, Download
@article{Sunwoo:2013dl,
author = {Sunwoo, Dam and Wang, W and Ghosh, M and Sudanthi, C and Blake, G and Emmons, C D and Paver, N C},
title = {{A structured approach to the simulation, analysis and characterization of smartphone applications}},
journal = {Workload Characterization (IISWC), 2013 IEEE International Symposium on},
year = {2013},
pages = {113–122}
}
Notes: Ext, NoTag

@article{Wang:2013wm,
author = {Wang, W and Zwolinski, M},
title = {{An improved instruction-level power model for ARM11 microprocessor}},
year = {2013}
}
Notes: VIP, DarkDie, Download

@article{Wang:2013vo,
author = {Wang, L and Skadron, K},
title = {{Dark vs. Dim Silicon and Near-Threshold Computing Extended Results}},
journal = {cs.virginia.edu},
year = {2013}
}
Notes: VIP, CASE’13, Download

@inproceedings{Pricopi:2013fv,
author = {Pricopi, M and Muthukaruppan, T S and Venkataramani, V and Mitra, T and Vishin, S},
title = {{Power-performance modeling on asymmetric multi-cores}},
booktitle = {Compilers, Architecture and Synthesis for Embedded Systems (CASES), 2013 International Conference on},
year = {2013},
pages = {1–10},
publisher = {IEEE}
}
Notes: Recommend, CriticalThread’ISCA13, Download

@inproceedings{DuBois:2013eg,
author = {Du Bois, Kristof and Eyerman, Stijn and Sartor, Jennifer B and Eeckhout, Lieven},
title = {{Criticality stacks: identifying critical threads in parallel programs using synchronization behavior}},
booktitle = {ISCA ’13: Proceedings of the 40th Annual International Symposium on Computer Architecture},
year = {2013},
pages = {511–522},
publisher = { ACM Request Permissions},
address = {New York, New York, USA},
month = jun
}
Notes: Ref, NoTag

@article{Carlson:2013co,
author = {Carlson, T E and Heirman, W and Eeckhout, L},
title = {{Sampled simulation of multi-threaded applications}},
journal = {Performance Analysis of Systems and Software (ISPASS), 2013 IEEE International Symposium on},
year = {2013},
pages = {2–12}
}
Notes: Ref, ARMv8Sim

@article{Jiang:2013vt,
author = {Jiang, Tao and Zhang, Lele and Hou, Rui and Zhang, Yi and Zhang, Qianlong and Chai, Lin and Han, Jing and Zhang, Wuxiang and Wang, Cong and Zhang, Lixin},
title = {{The ARMv8 simulator}},
journal = {the 27th international ACM conference},
year = {2013},
pages = {477–478},
month = jun
}
Notes: Recommend, gem5DVFS’13, Download

@article{Spiliopoulos:2013eq,
author = {Spiliopoulos, V and Bagdia, A and Hansson, A and Aldworth, P and Kaxiras, S},
title = {{Introducing DVFS-Management in a Full-System Simulator}},
journal = {Modeling, Analysis {\&} Simulation of Computer and Telecommunication Systems (MASCOTS), 2013 IEEE 21st International Symposium on},
year = {2013},
pages = {535–545}
}
Notes: Ref, NoTag

@article{Chen:2013en,
author = {Chen, Hung-Shuen and Chiou, Jr-Yuan and Yang, Cheng-Yan and Wu, Yi-jui and Hwang, Wei-chung and Hung, Hao-Chien and Liao, Shih-Wei},
title = {{Design and implementation of high-level compute on Android systems}},
journal = {Embedded Systems for Real-time Multimedia (ESTIMedia), 2013 IEEE 11th Symposium on},
year = {2013},
pages = {96–104}
}
Notes: VIP, RevisitRISCvsCISC, Download

@article{Blem:2013bx,
author = {Blem, E and Menon, J and Sankaralingam, K},
title = {{Power struggles: Revisiting the RISC vs. CISC debate on contemporary ARM and x86 architectures}},
journal = {High Performance Computer Architecture (HPCA2013), 2013 IEEE 19th International Symposium on},
year = {2013},
pages = {1–12}
}
Notes: VIP, ARMPCA’13, Download

@article{Pandiyan:2013hy,
author = {Pandiyan, D and Lee, Shin-Ying and Wu, C J},
title = {{Performance, energy characterizations and architectural implications of an emerging mobile platform benchmark suite – MobileBench}},
journal = {2013 IEEE International Symposium on Workload Characterization (IISWC)},
year = {2013},
pages = {133–142}
}
Notes: Ref, NoTag, Download

@article{Song:2013df,
author = {Song, Shuaiwen and Su, Chunyi and Rountree, B and Cameron, K W},
title = {{A Simplified and Accurate Model of Power-Performance Efficiency on Emergent GPU Architectures}},
journal = {2013 IEEE International Symposium on Parallel {\&} Distributed Processing (IPDPS)},
year = {2013},
pages = {673–686}
}
Notes: Recommend, Weight, Download

@article{Eyerman:2013tq,
author = {Eyerman, Stijn and Eeckhout, Lieven},
title = {{Restating the case for weighted-IPC metrics to evaluate multiprogram workload performance}},
journal = {IEEE Computer Architecture Letters},
year = {2013},
volume = {99},
number = {2},
pages = {1}
}
Notes: Ext, NoTag

@article{AlBahra:2013bw,
author = {Al Bahra, Samy},
title = {{Nonblocking Algorithms and Scalable Multicore Programming}},
journal = {Queue},
year = {2013},
volume = {11},
number = {5},
month = may
}
Notes: Ref, NoTag

@article{Peng:2013eo,
author = {Peng, Manman and Hu, Yan},
title = {{A Power Model Combined of Architectural Level and Gate Level for Multicore Processors}},
journal = {2013 12th IEEE International Conference on Trust, Security and Privacy in Computing and Communications (TrustCom)},
year = {2013},
pages = {1652–1655}
}
Notes: VIP, AndroidCharcter, Download

@inproceedings{Ko:2013er,
author = {Ko, Jeonguk and Shim, Hyungjoon and Kim, Dongjin and Jeong, Youn-Sik and Cho, Seong-je and Park, Minkyu and Han, Sangchul and Kim, Seong Baeg},
title = {{Measuring similarity of android applications via reversing and K-gram birthmarking}},
booktitle = {RACS ’13: Proceedings of the 2013 Research in Adaptive and Convergent Systems},
year = {2013},
pages = {336–341},
publisher = { ACM Request Permissions},
address = {New York, New York, USA},
month = oct
}
Notes: Ref, NoTag

@article{Zhang:2013vq,
author = {Zhang, L},
title = {{Power, Performance Modeling and Optimization for Mobile System and Applications}},
journal = {deepblue.lib.umich.edu},
year = {2013}
}
Notes: Ref, NoTag

@article{Lee:2013wu,
author = {Lee, J H and Meng, J and Kim, H},
title = {{SESH framework: A Space Exploration Framework for GPU Application and Hardware Codesign}},
year = {2013}
}
Notes: VIP, McPAT2Muticore’TACO13, Download

@article{Li:2013ey,
author = {Li, Sheng and Ahn, Jung Ho and Strong, Richard D and Brockman, Jay B and Tullsen, Dean M and Jouppi, Norman P},
title = {{The McPAT Framework for Multicore and Manycore Architectures: Simultaneously Modeling Power, Area, and Timing}},
journal = {ACM Transactions on Architecture and Code Optimization (TACO)},
year = {2013},
volume = {10},
number = {1},
pages = {5–29},
month = apr
}
Notes: Ref, GPU2McPAT

@article{Lim:2013ts,
author = {Lim, J and Lakshminarayana, N and Kim, H},
title = {{Power Modeling for GPU Architecture using McPAT}},
journal = {Georgia Institute of {\ldots}},
year = {2013}
}
Notes: Recommend, DySER, Download

@article{Ho:vn,
author = {Ho, C H and Govindaraju, V and Nowatzki, T and Marzec, Z},
title = {{Performance Evaluation of a DySER FPGA Prototype System Spanning the Compiler, Microarchitecture, and Hardware Implementation}},
journal = {Energy (mJ)}
}

2011年-2012年

Notes: Ext, NoTag

@article{Sim:2012kt,
author = {Sim, Jaewoong and Dasgupta, Aniruddha and Kim, Hyesoon and Vuduc, Richard},
title = {{A performance analysis framework for identifying potential benefits in GPGPU applications}},
journal = {ACM SIGPLAN Notices},
year = {2012},
volume = {47},
number = {8},
pages = {11–22},
month = sep
}
Notes: Ref, NoTag

@article{Tu:2012gi,
author = {Tu, Chia-Heng and Hung, Shih-Hao and Tsai, Tung-Chieh},
title = {{MCEmu: A Framework for Software Development and Performance Analysis of Multicore Systems}},
journal = {Transactions on Design Automation of Electronic Systems (TODAES},
year = {2012},
volume = {17},
number = {4},
pages = {1–25},
month = oct
}
Notes: VIP, InorderModel, Download

@article{Breughe:2012ha,
author = {Breughe, M and Eyerman, S and Eeckhout, L},
title = {{A mechanistic performance model for superscalar in-order processors}},
journal = {Performance Analysis of Systems and Software (ISPASS), 2012 IEEE International Symposium on},
year = {2012},
pages = {14–24}
}
Notes: Recommend, DalvikPerf, Download

@inproceedings{Azimzadeh:2012jv,
author = {Azimzadeh, E and Sameki, M and Goudarzi, M},
title = {{Performance analysis of Android underlying virtual machine in mobile phones}},
booktitle = {Consumer Electronics – Berlin (ICCE-Berlin), 2012 IEEE International Conference on},
year = {2012},
pages = {292–295},
publisher = {IEEE}
}
Notes: Recommend, AMBench, Download

@article{Lee:2012tl,
author = {Lee, C and Kim, E and Kim, H},
title = {{The AM-Bench: An Android Multimedia Benchmark Suite}},
year = {2012}
}
Notes: VIP, gem5Accuracy’12, Download

@article{Butko:2012hl,
author = {Butko, A and Garibotti, R and Ost, L and Sassatelli, G},
title = {{Accuracy evaluation of GEM5 simulator system}},
journal = {2012 7th International Workshop on Reconfigurable and Communication-Centric Systems-on-Chip (ReCoSoC)},
year = {2012},
pages = {1–7},
annote = {这篇论文也是我必须引用的,配合ISPASS’14那篇,形成对GEM5仿真系统准确性的双重证明。这篇论文首先同样综述了多种不同的,支持全系统仿真的仿真平台,包括:Simics、PTLsim、SimpleScalar、OVPsim以及GEM5,指出Simics的问题在于非CA以及商业授权;PTLsim的问题在于仅支持x86且维护社区垃圾;SimpleScalar的问题在于:维护越来越差;OVPsim的问题在于非CA。
然后,论文给出其对比硬件为Nova A9500,是一颗双核A9,主频在1GHz,配备DSP以及一些ASIP核和Mali-400的SoC芯片,其DVFS支持从200MHz到1GHz,但这篇论文会锁频在1GHz,因为难以对物理传感器信息做建模。A9500的存储层次包括:32+32的L1、512KB的L2、64-bit的DDR(400MHz)。
论文选择的bench比较多,有CMU的Sphinux3、CSU的人脸识别、Tachyon的ray tracing、MSSG的MPEG-2 EN、MSSG的MPEG-2 DE等,此外还选择了SPLASH-2,还有stream。
实验结果表明:SPLASH-2中LU和radix的误差在1.39%-17.94%;stream中copy和scale误差较小,但sum和triad误差较大(误差1倍左右),论文认为原因在于存储在DDR的位置不同(不同的页),通过大幅降低物理系统的带宽可以验证。
}}
Notes: VIP, ISCA12, Download

@inproceedings{Nair:2012gn,
author = {Nair, Arun Arvind and Eyerman, Stijn and Eeckhout, Lieven and John, Lizy Kurian},
title = {{A first-order mechanistic model for architectural vulnerability factor}},
booktitle = {ISCA ’12: Proceedings of the 39th Annual International Symposium on Computer Architecture},
year = {2012},
publisher = { ACM},
month = jun
}
Notes: Recommend, SpeedupStack’ISPASS12, Download

@inproceedings{Eyerman:2012fu,
author = {Eyerman, S and Du Bois, K and Eeckhout, L},
title = {{Speedup stacks: Identifying scaling bottlenecks in multi-threaded applications}},
booktitle = {Performance Analysis of Systems and Software (ISPASS), 2012 IEEE International Symposium on},
year = {2012},
pages = {145–155}
}
Notes: Ref, AndroidBench

@article{Kim:2012vv,
author = {Kim, J M and Kim, J S},
title = {{Androbench: Benchmarking the storage performance of android-based mobile devices}},
journal = {Frontiers in Computer Education},
year = {2012}
}
Notes: VIP, GentDVFS’11, Download

@article{Eyerman:2011ib,
author = {Eyerman, Stijn and Eeckhout, Lieven},
title = {{Fine-grained DVFS using on-chip regulators}},
journal = {Transactions on Architecture and Code Optimization (TACO},
year = {2011},
volume = {8},
number = {1},
pages = {1–24},
month = apr,
annote = {这篇论文认为当前有限的DVFS研究得出了彼此矛盾的结论。早期的研究指出:DVFS对于较大的时间尺度(timescales)更加有效,且其切换的开销非常大,因此不应该经常切换。然而,近期的针对片上电压调节器的研究指出:细粒度的DVFS具有非常好的能耗收益(几百个cycle为粒度)。这篇论文探索了时间尺度和scaling速度we问题,发现:粗粒度的DVFS不受时间尺度以及scaling速度的影响,然而,细粒度的DVFS却可以在存储密集型应用上获得巨大的能耗收益。因此,本文提出了一种细粒度的微结构驱动型DVFSj机制,可以通过片上调节器在每次独立片外仿存s时下调电压与频率。}}
Notes: VIP, HybridModel, Download

@article{Dubach:2011bm,
author = {Dubach, C and Jones, T M and O’Boyle, M F P},
title = {{An Empirical Architecture-Centric Approach to Microarchitectural Design Space Exploration}},
journal = {Computers, IEEE Transactions on},
year = {2011},
volume = {60},
number = {10},
pages = {1445–1458},
annote = {这是一篇代表性的处理器经验模型论文,发在了2011年的TC上,此前该论文在MICOR’07上发表过。核心是基于机器学习技术,快速准确的预测任意微架构配置上执行任意应用的性能和能耗。该方法使用此前离线获得的数据,可以在32次仿真内获得任意新应用,在整个微架构配置空间的性能。其误差率仅为7%,相关系数为0.95。
本文认为解析模型太难构建,且随着系统复杂度的上升,其构建的难度将显著上升。而现有的机器学习方案则存在2方面的不足:1)对每个新应用都必须重新训练和构建预测期,本文则通过捕获架构行为而非程序自身的行为,规避这个问题;2)需要大量的训练,而本文仅需要少量仿真(response)。
本文针对13种微结构参数做研究,包括:处理器宽度、ROB容量、IQ容量、LSQ容量、RF容量、RF的读端口数目、RF的写端口数目、Gshare预测期容量、BTB容量、分支允许?、L1-ICache容量、L1-Dcache容量以及L2 Ucache容量等。
{首先指出解析模型分为机理模型和经验模型等两大类,其中机理模型是基于对目标系统的基本理解,也就是白盒方案。而经验模型则是通过统计以及机器学习的方法训练数据(回归模型或者神经网络等),属于黑盒模型。本文是基于这两者构建的混合灰盒模型,其中回归模型负责对未知参数进行建模。本文针对P4、Core2和Core i7进行建模,benchmark选择为SEPC2000和2006。误差率大约在9%-13%之间。此外,本文还支持混合模型的鲁棒性更好。
这篇论文给出了非常精炼但丰富的relate works。
CPI栈可以在真实硬件上构建。
在具体分析混合模型的部分,论文首先给出总的cycle数目公式,由如下部分构成:在宽度为D的处理器上dispatch N条有效微操作的(CISC应该考虑微码)数目;L1 I-cache、I-TLB以及L2 Icache的缺失开销;分支预测失败的开销;长延时load缺失以及D-TLB的开销。其中平均仿存演示应该等于独立仿存延时除以MLP修正因子;资源冲突导致的dispatch开销(完蛋了)。
本论文指出:此前的灰盒研究都假设仿存时间是确定值,同时假设处理器是平衡的,因此不考虑资源stall,此外还需要通过大量的仿真都得资源stall以及MLP的情况。
模型的推导过程是:用PMU抓事件次数,计算单次事件开销,然后扔到回归模型中求取未知参数
分支确定时间:该时间用于执行引导到错误分支指令的关键路径指令,该值依赖于:1)区间长度,越长的区间就意味着ROB中有越多的指令,也就意味着越长的依赖路径;2)ILP,ILP越大意味着引导到错误分支指令的依赖路径越短,而ILP与平均指令执行延时相关,越多的长延时指令就会导致分支确定时间越长。因此本文将浮点指令以及L1-Dcache缺失指令作为模型的输入。
MLP修正因子:反应仿存延时的不确定性,影响仿存延时最重要的因素是MLP。MLP的量取决于长延时load缺失的数目,他们之间越独立就意味着越有可能同时位于ROB中,从而展示出MLP。
资源stall:资源stall是源于dispatch stall或者流水线后端无法跟上前段的速率(没有缺失事件时)。dispatch stall的原因可能是ROB或者IQ满了,这又是由于ILP太少导致的,或者长延时指令的依赖链太长。
}}
Notes: VIP, ISPASS11, Download

@inproceedings{Eyerman:2011cf,
author = {Eyerman, S and Hoste, K and Eeckhout, L},
title = {{Mechanistic-empirical processor performance modeling for constructing CPI stacks on real hardware}},
booktitle = {Performance Analysis of Systems and Software (ISPASS), 2011 IEEE International Symposium on},
year = {2011},
pages = {216–226},
publisher = {IEEE},
annote = {首先指出解析模型分为机理模型和经验模型等两大类,其中机理模型是基于对目标系统的基本理解,也就是白盒方案。而经验模型则是通过统计以及机器学习的方法训练数据(回归模型或者神经网络等),属于黑盒模型。本文是基于这两者构建的混合灰盒模型,其中回归模型负责对未知参数进行建模。本文针对P4、Core2和Core i7进行建模,benchmark选择为SEPC2000和2006。误差率大约在9%-13%之间。此外,本文还支持混合模型的鲁棒性更好。
这篇论文给出了非常精炼但丰富的relate works。
CPI栈可以在真实硬件上构建。
在具体分析混合模型的部分,论文首先给出总的cycle数目公式,由如下部分构成:在宽度为D的处理器上dispatch N条有效微操作的(CISC应该考虑微码)数目;L1 I-cache、I-TLB以及L2 Icache的缺失开销;分支预测失败的开销;长延时load缺失以及D-TLB的开销。其中平均仿存演示应该等于独立仿存延时除以MLP修正因子;资源冲突导致的dispatch开销(完蛋了)。
本论文指出:此前的灰盒研究都假设仿存时间是确定值,同时假设处理器是平衡的,因此不考虑资源stall,此外还需要通过大量的仿真都得资源stall以及MLP的情况。
模型的推导过程是:用PMU抓事件次数,计算单次事件开销,然后扔到回归模型中求取未知参数
分支确定时间:该时间用于执行引导到错误分支指令的关键路径指令,该值依赖于:1)区间长度,越长的区间就意味着ROB中有越多的指令,也就意味着越长的依赖路径;2)ILP,ILP越大意味着引导到错误分支指令的依赖路径越短,而ILP与平均指令执行延时相关,越多的长延时指令就会导致分支确定时间越长。因此本文将浮点指令以及L1-Dcache缺失指令作为模型的输入。
MLP修正因子:反应仿存延时的不确定性,影响仿存延时最重要的因素是MLP。MLP的量取决于长延时load缺失的数目,他们之间越独立就意味着越有可能同时位于ROB中,从而展示出MLP。
资源stall:资源stall是源于dispatch stall或者流水线后端无法跟上前段的速率(没有缺失事件时)。dispatch stall的原因可能是ROB或者IQ满了,这又是由于ILP太少导致的,或者长延时指令的依赖链太长。
}
}
Notes: VIP, BBench’ARM11, Download

@article{Gutierrez:2011fp,
author = {Gutierrez, A and Dreslinski, R G and Wenisch, T F and Mudge, T and Saidi, A and Emmons, C and Paver, N},
title = {{Full-system analysis and characterization of interactive smartphone applications}},
journal = {Workload Characterization (IISWC), 2011 IEEE International Symposium on},
year = {2011},
pages = {81–90},
annote = {这篇论文也是必须引用的论文之一,提出了bbench,并指出:真实世界的交互式应用的特征与spec2006完全不同,特别是指令cache、指令TLB以及分支预测器的性能}
}
Notes: VIP, gem5, Download

@article{Binkert:2011bd,
author = {Binkert, Nathan and Beckmann, Bradford and Black, Gabriel and Reinhardt, Steven K and Saidi, Ali and Basu, Arkaprava and Hestness, Joel and Hower, Derek R and Krishna, Tushar and Sardashti, Somayeh and Sen, Rathijit and Sewell, Korey and Shoaib, Muhammad and Vaish, Nilay and Hill, Mark D and Wood, David A},
title = {{The gem5 simulator}},
journal = {SIGARCH Computer Architecture News},
year = {2011},
volume = {39},
number = {2},
pages = {1–7},
month = aug
}
Notes: Ref, NoTag

@article{Rethinagiri:2011eg,
author = {Rethinagiri, S K and Atitallah, R B and Dekeyser, J},
title = {{A system level power consumption estimation for MPSoC}},
journal = {2011 International Symposium on System-on-Chip – SOC},
year = {2011},
pages = {56–61}
}
Notes: VIP, Sniper’SC11, Download

@inproceedings{Carlson:2011kb,
author = {Carlson, Trevor E and Heirman, Wim and Eeckhout, Lieven},
title = {{Sniper: exploring the level of abstraction for scalable and accurate parallel multi-core simulation}},
booktitle = {SC ’11: Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis},
year = {2011},
pages = {1–12},
publisher = { ACM Request Permissions},
address = {New York, New York, USA},
month = nov
}
Notes: Ref, NoTag

@article{Clemons:2011hj,
author = {Clemons, J and Zhu, Haishan and Savarese, S and Austin, T},
title = {{MEVBench: A mobile computer vision benchmarking suite}},
journal = {2011 IEEE International Symposium on Workload Characterization (IISWC)},
year = {2011},
pages = {91–102}
}
Notes: Ref, NoTag

@article{Hsieh:2011fx,
author = {Hsieh, Ming-yu and Rodrigues, Arun and Riesen, Rolf and Thompson, Kevin and Song, William},
title = {{A framework for architecture-level power, area, and thermal simulation and its application to network-on-chip design exploration}},
journal = {SIGMETRICS Performance Evaluation Review},
year = {2011},
volume = {38},
number = {4},
pages = {63},
month = mar
}

2009年-2010年

说明:参考,无代号

@inproceedings{Eyerman:2010gx,
author = {Eyerman, Stijn and Eeckhout, Lieven},
title = {{Modeling critical sections in Amdahl’s law and its implications for multicore design}},
booktitle = {ISCA ’10: Proceedings of the 37th annual international symposium on Computer architecture},
year = {2010},
pages = {362–370},
publisher = { ACM Request Permissions},
address = {New York, New York, USA},
month = jun,
annote = {这篇论文的主旨是证明并行计算的性能不仅仅受制于串行指令,还受制于关键段的同步。论文发现:并行计算性能中关键段的影响可以由完全串行部分与完全并行部分建模。其中串行部分可以由进入关键段的概率以及冲突概率确定。论文还指出三点对多核设计有价值的见解:1)异构多核的性能收益小于对称多核;2)较少且较大的小核可以提供更好的性能;3)在大核上执行关键段可以获得显著的加速比,但性能对关键段竞争预测器的准确度更加敏感。}
}
Notes: Ext, VMWare

@article{Barr:2010eg,
author = {Barr, Ken and Bungale, Prashanth and Deasy, Stephen and Gyuris, Viktor and Hung, Perry and Newell, Craig and Tuch, Harvey and Zoppis, Bruno},
title = {{The VMware mobile virtualization platform: is that a hypervisor in your pocket?}},
journal = {SIGOPS Operating Systems Review},
year = {2010},
volume = {44},
number = {4},
pages = {124–135},
month = dec
}
Notes: Ref, NoTag

@inproceedings{Eyerman:2010dh,
author = {Eyerman, Stijn and Eeckhout, Lieven},
title = {{Probabilistic job symbiosis modeling for SMT processor scheduling}},
booktitle = {ASPLOS XV: Proceedings of the fifteenth edition of ASPLOS on Architectural support for programming languages and operating systems},
year = {2010},
publisher = { ACM Request Permissions},
month = mar
}
Notes: Ref, NoTag

@inproceedings{Yan:2010ha,
author = {Yan, Wei and Liu, Jia and Lin, Chuang},
title = {{A Hybrid Modeling Approach to Microarchitecture Design Space Exploring}},
booktitle = {Grid and Cooperative Computing (GCC), 2010 9th International Conference on},
year = {2010},
pages = {110–117},
publisher = {IEEE}
}
Notes: Recommend, NoTag, Download

@article{Hong:2010ie,
author = {Hong, Sunpyo and Kim, Hyesoon},
title = {{An integrated GPU power and performance model}},
journal = {ACM SIGARCH Computer Architecture News},
year = {2010},
volume = {38},
number = {3},
pages = {280–289},
month = jun
}
Notes: Ref, MatlabANN

@article{Beale:2010ua,
author = {Beale, M H and Hagan, M T and Demuth, H B},
title = {{Neural Network Toolbox 7}},
journal = {User’s guide},
year = {2010}
}
Notes: Ref, NoTag

@article{Eyerman:2010hi,
author = {Eyerman, S and Eeckhout, L},
title = {{A Counter Architecture for Online DVFS Profitability Estimation}},
journal = {Computers, IEEE Transactions on},
year = {2010},
volume = {59},
number = {11},
pages = {1576–1583}
}
Notes: Recommend, Sniper1, Download

@inproceedings{Genbrugge:2010je,
author = {Genbrugge, D and Eyerman, S and Eeckhout, L},
title = {{Interval simulation: Raising the level of abstraction in architectural simulation}},
booktitle = {High Performance Computer Architecture (HPCA), 2010 IEEE 16th International Symposium on},
year = {2010},
pages = {1–12},
annote = {这篇论文就开始实现从区间理论模型向sniper仿真器d的转化了。在摘要中,论文就提出通过将core级仿真提升到interval级,可以大幅降低仿真的时间开销(1个数量级),与M5的仿真相比,基于区间模型的精度在4.6%左右;此外,论文还认为区间模型的实现复杂度较低。
论文认为在多核仿真过程中提升抽象级最大的挑战是:如何配合协同执行的线程,因为存在线程间的同步、cache一致性、共享资源以及主存访问等问题。
本文的挑战以及贡献在于:预测每条指令的时序,而非解析模型中采用的平均性能,因为需要考虑模型同步精度、cachey一致性、共享资源竞争等问题;此外,本文还对缺失事件重叠效应做了描述,也就是二阶效应。相对于早期论文中,通过离线profiling获得关键路径长度的方法而言,本论文采用“旧窗口法”,在仿真过程中确定分支确定时间、窗口排空时间以及有效dispatch率。
论文还明确了一个问题:长延时D-cache缺失(以及D-TLB缺失)的开销严格意义上应该等于该load指令堵住ROB,从而导致dispatch终止到缺失事件解决为止,但可以近似看成仿存延时。然而一旦存在多个彼此独立的长延时load缺失,则可以挖掘MLP,当然并发度是取决于硬件的支持。
此外,这篇论文也开始讨论结构冲突问题。也就是当出现依赖指令链、L1 Dcache缺失、长延时FU指令或者store指令时,ROB、IQ、PR、WB等结构可能被填满,从而导致资源stall,最终导致dispatch的终止。其开销取决于堵住ROB的那条指令。
论文指出:指令窗口对应ROB,用于确定被长延时load缺失隐藏的缺失事件。
}}
Notes: Ext, NoTag

@article{Anand:2010ic,
author = {Anand, M and Fischmeister, S and Hur, Y and Kim, Jesung and Lee, Insup},
title = {{Generating Reliable Code from Hybrid-Systems Models}},
journal = {Computers, IEEE Transactions on},
year = {2010},
volume = {59},
number = {9},
pages = {1281–1294}
}
Notes: Recommend, GentSMT’09, Download

@inproceedings{Eyerman:2009bn,
author = {Eyerman, Stijn and Eeckhout, Lieven},
title = {{Per-thread cycle accounting in SMT processors}},
booktitle = {ASPLOS XIV: Proceedings of the 14th international conference on Architectural support for programming languages and operating systems},
year = {2009},
pages = {133–144},
publisher = { ACM Request Permissions},
address = {New York, New York, USA},
month = feb,
annote = {这篇论文对SMT处理器提出一种cycle计算架构,用于估计每个线程独立执行的时间(当其在SMT处理器上并发执行时)。这是通过统计多线程执行过程中,每个cycle处于base、缺失或者等待cycle实现的。单线程独立执行的时间就可以通过估计base以及缺失事件cycle数目得到,而等待cycle部分则表示为由于SMT执行方式引入的cycle损失数目。这个cycle统计单元的硬件开销大约1KB存储器,误差率在7.2%-11.7%左右。
}
}
Notes: Recommend, GPUanalyticalModel, Download

@inproceedings{Hong:2009gs,
author = {Hong, Sunpyo and Kim, Hyesoon},
title = {{An analytical model for a GPU architecture with memory-level and thread-level parallelism awareness}},
booktitle = {ISCA ’09: Proceedings of the 36th annual international symposium on Computer architecture},
year = {2009},
pages = {152},
publisher = { ACM Request Permissions},
address = {New York, New York, USA},
month = jun
}
Notes: Ref, NoTag

@article{Rawassizadeh:2009ky,
author = {Rawassizadeh, Reza},
title = {{Mobile Application Benchmarking Based on the Resource Usage Monitoring}},
journal = {International Journal of Mobile Computing and Multimedia Communications (IJMCMC)},
year = {2009},
volume = {1},
number = {4},
pages = {64–75}
}
Notes: Ref, 3DIC

@book{Xie:2009wh,
author = {Xie, Yuan and Cong, Jingsheng Jason and Sapatnekar, Sachin},
title = {{Three-Dimensional Integrated Circuit Design}},
publisher = {Springer},
year = {2009},
series = {EDA, Design and Microarchitectures},
month = dec
}
Notes: VIP, A8vsAtom, Download

@article{RobertsHoffman:2009wx,
author = {Roberts-Hoffman, K and Hegde, P},
title = {{ARM cortex-a8 vs. intel atom: Architectural and benchmark comparisons}},
journal = {Dallas: University of Texas at Dallas},
year = {2009}
}
Notes: VIP, TOCS’09, Download

@article{Eyerman:2009gp,
author = {Eyerman, Stijn and Eeckhout, Lieven and Karkhanis, Tejas and Smith, James E},
title = {{A mechanistic performance model for superscalar out-of-order processors}},
journal = {Transactions on Computer Systems (TOCS},
year = {2009},
volume = {27},
number = {2},
month = may
}
Notes: Recommend, MLP’TACO09, Download

@article{Eyerman:2009ej,
author = {Eyerman, Stijn and Eeckhout, Lieven},
title = {{Memory-level parallelism aware fetch policies for simultaneous multithreading processors}},
journal = {Transactions on Architecture and Code Optimization (TACO},
year = {2009},
volume = {6},
number = {1},
pages = {1–33},
month = mar
}
Notes: Recommend, McPAT, Download

@article{Li:2009da,
author = {Li, Sheng and Ahn, Jung Ho and Strong, R D and Brockman, J B and Tullsen, D M and Jouppi, N P},
title = {{McPAT: An integrated power, area, and timing modeling framework for multicore and manycore architectures}},
journal = {Microarchitecture, 2009. MICRO-42. 42nd Annual IEEE/ACM International Symposium on},
year = {2009},
pages = {469–480}
}
Notes: Recommend, PredicModel, Download

@inproceedings{Dubach:2009ik,
author = {Dubach, C and Jones, T M and O’Boyle, M F P},
title = {{Rapid early-stage microarchitecture design using predictive models}},
booktitle = {Computer Design, 2009. ICCD 2009. IEEE International Conference on},
year = {2009},
pages = {297–304},
publisher = {IEEE}
}
Notes: Ref, EEMBCCharacter, Download

@article{Poovey:2009fn,
author = {Poovey, J A and Conte, T M and Levy, M and Gal-On, S},
title = {{A Benchmark Characterization of the EEMBC Benchmark Suite}},
journal = {Micro, IEEE},
year = {2009},
volume = {29},
number = {5},
pages = {18–29}
}
Notes: VIP, McPAT, Download

@article{Li:2009va,
author = {Li, S and Ahn, J and Brockman, J B and Jouppi, N P},
title = {{McPAT 1.0: An integrated power, area, and timing modeling framework for multicore architectures}},
journal = {HP Labs},
year = {2009}
}

2007年-2008年

Notes: Ext, NoTag

@article{Strydis:2008wi,
author = {Strydis, C and Kachris, C},
title = {{ImpBench: A novel benchmark suite for biomedical, microelectronic implants}},
journal = {{\ldots} },
year = {2008}
}
Notes: Recommend, SerialInsts’08, Download

@inproceedings{Wells:2008ir,
author = {Wells, P M and Sohi, G S},
title = {{Serializing instructions in system-intensive workloads: Amdahl’s Law strikes again}},
booktitle = {High Performance Computer Architecture, 2008. HPCA 2008. IEEE 14th International Symposium on},
year = {2008},
pages = {264–275},
publisher = {IEEE},
annote = {这篇论文专门讨论串行指令对于系统性能的影响。首先定义了SI使用的原因,包括:写控制寄存器、复杂依赖关系等,处理器为了避免不必要的复杂度,所以会串行化流水线,以顺序方式处理这些指令。目前SPARC V9、X86-64以及PowerPC会比较常用SI,其对性能具有非常严重的影响,比如SPARC ISA下OS部分的代码会因为SI,导致8-45%的性能损失}
}
Notes: Ref, NoTag

@article{OuldAhmedVall:2008ku,
author = {Ould-Ahmed-Vall, E M and Doshi, K A and Yount, C and Woodlee, J},
title = {{Characterization of SPEC CPU2006 and SPEC OMP2001: Regression Models and their Transferability}},
journal = {Performance Analysis of Systems and software, 2008. ISPASS 2008. IEEE International Symposium on},
year = {2008},
pages = {179–190}
}
Notes: Ref, NoTag

@article{Hoste:2008jb,
author = {Hoste, K and Eeckhout, L},
title = {{Characterizing the Unique and Diverse Behaviors in Existing and Emerging General-Purpose and Domain-Specific Benchmark Suites}},
journal = {Performance Analysis of Systems and software, 2008. ISPASS 2008. IEEE International Symposium on},
year = {2008},
pages = {157–168}
}
Notes: Ref, SPEC06onIntel

@article{Prakash:2008vf,
author = {Prakash, Tribuvan Kumar and Peng, Lu},
title = {{Performance characterization of spec cpu2006 benchmarks on intel core 2 duo processor}},
journal = {ISAST Trans. Comput. Softw. Eng},
year = {2008},
volume = {2},
number = {1},
pages = {36–41}
}
Notes: Recommend, Karkhanis’07, Download

@article{Karkhanis:2007vf,
author = {Karkhanis, T S and Smith, J E},
title = {{Automated design of application specific superscalar processors: an analytical approach}},
journal = {ACM SIGARCH Computer Architecture News},
year = {2007}
}
Notes: VIP, CPIStack’Micor07, Download

@article{Eyerman:2007fs,
author = {Eyerman, S and Eeckhout, L and Karkhanis, T and Smith, J E},
title = {{A Top-Down Approach to Architecting CPI Component Performance Counters}},
journal = {Micro, IEEE},
year = {2007},
volume = {27},
number = {1},
pages = {84–93},
annote = {这篇论文是Eyerman早期的一片关于区间解析模型的论文,发表在07年的IEEE Micro上,这也应该是最有价值的一片详细阐述区间模型中缺失事件类型及其开销的论文了。
}
}
Notes: Recommend, NoTag, Download

@article{Dubach:2007dk,
author = {Dubach, C and Jones, T M and O’Boyle, M F P},
title = {{Microarchitectural Design Space Exploration Using an Architecture-Centric Approach}},
journal = {Microarchitecture, 2007. MICRO 2007. 40th Annual IEEE/ACM International Symposium on},
year = {2007},
pages = {262–271}
}
Notes: Ref, Helei

@article{Long:2007ki,
author = {Long, Changbo and Simonson, L J and Liao, Weiping and He, Lei},
title = {{Microarchitecture Configurations and Floorplanning Co-Optimization}},
journal = {Very Large Scale Integration (VLSI) Systems, IEEE Transactions on},
year = {2007},
volume = {15},
number = {7},
pages = {830–841},
month = jul
}
Notes: Recommend, SPEC06Tool, Download

@article{Spradling:2007he,
author = {Spradling, Cloyce D},
title = {{SPEC CPU2006 benchmark tools}},
journal = {SIGARCH Computer Architecture News},
year = {2007},
volume = {35},
number = {1},
pages = {130–134},
month = mar
}
Notes: VIP, M5, Download

@article{Binkert:2006dv,
author = {Binkert, N L and Dreslinski, R G and Hsu, L R and Lim, K T and Saidi, A G and Reinhardt, S K},
title = {{The M5 Simulator: Modeling Networked Systems}},
journal = {Micro, IEEE},
year = {2006},
volume = {26},
number = {4},
pages = {52–60}
}
Notes: Ref, NoTag

@article{孙帆:2007wu,
author = {{孙帆} and 勤, 施 学},
title = {{基于MATLAB的BP神经网络设计}},
journal = {计算机与数字工程},
year = {2007},
volume = {35},
number = {8},
pages = {124–126}
}
Notes: Recommend, NoTag, Download

@inproceedings{Dubach:2007ec,
author = {Dubach, Christophe and Jones, Timothy and O’Boyle, Michael},
title = {{Microarchitectural Design Space Exploration Using an Architecture-Centric Approach}},
booktitle = {MICRO 40: Proceedings of the 40th Annual IEEE/ACM International Symposium on Microarchitecture},
year = {2007},
publisher = { IEEE Computer Society},
month = dec
}
Notes: VIP, MIWorkCharacter’Micro07, Download

@article{Hoste:2007fy,
author = {Hoste, K and Eeckhout, L},
title = {{Microarchitecture-Independent Workload Characterization}},
journal = {Micro, IEEE},
year = {2007},
volume = {27},
number = {3},
pages = {63–72}
}
Notes: Recommend, RegressModel, Download

@inproceedings{Lee:2007da,
author = {Lee, B C and Brooks, D M},
title = {{Illustrative Design Space Studies with Microarchitectural Regression Models}},
booktitle = {High Performance Computer Architecture, 2007. HPCA 2007. IEEE 13th International Symposium on},
year = {2007},
pages = {340–351}
}
Notes: Ref, PTLsim

@article{Yourst:2007ed,
author = {Yourst, M T},
title = {{PTLsim: A Cycle Accurate Full System x86-64 Microarchitectural Simulator}},
journal = {Performance Analysis of Systems {\&} Software, 2007. ISPASS 2007. IEEE International Symposium on},
year = {2007},
pages = {23–34}
}

2005年-2006年

Notes: Recommend, NoTag, Download

@inproceedings{Ipek:2006hg,
author = {{\”I}pek, Engin and McKee, Sally A and Caruana, Rich and de Supinski, Bronis R and Schulz, Martin},
title = {{Efficiently exploring architectural design spaces via predictive modeling}},
booktitle = {ASPLOS XII: Proceedings of the 12th international conference on Architectural support for programming languages and operating systems},
year = {2006},
pages = {195},
publisher = { ACM Request Permissions},
address = {New York, New York, USA},
month = nov
}
Notes: Ref, NoTag

@article{常晓丽:2006wu,
author = {{常晓丽}},
title = {{基于Matlab的BP神经网络设计}},
journal = {机械工程与自动化},
year = {2006},
number = {4},
pages = {36–37}
}
Notes: Recommend, PMUonIntel, Download

@inproceedings{Eyerman:2006cq,
author = {Eyerman, Stijn and Eeckhout, Lieven and Karkhanis, Tejas and Smith, James E},
title = {{A performance counter architecture for computing accurate CPI components}},
booktitle = {ASPLOS XII: Proceedings of the 12th international conference on Architectural support for programming languages and operating systems},
year = {2006},
pages = {175},
publisher = { ACM Request Permissions},
address = {New York, New York, USA},
month = nov
}
Notes: Ref, Papabench

@article{Nemer:2006vl,
author = {Nemer, F and Cass{\’e}, H and Sainrat, P and Bahsoun, J P},
title = {{Papabench: a free real-time benchmark}},
journal = {{\ldots} Series in Informatics},
year = {2006}
}
Notes: Ref, SimFlex

@article{Wenisch:2006bc,
author = {Wenisch, T F and Wunderlich, R E and Ferdman, M and Ailamaki, A and Falsafi, B and Hoe, J C},
title = {{SimFlex: Statistical Sampling of Computer System Simulation}},
journal = {Micro, IEEE},
year = {2006},
volume = {26},
number = {4},
pages = {18–31}
}
Notes: VIP, BranchMissOverheads, Download

@article{Eyerman:2006ji,
author = {Eyerman, S and Smith, J E and Eeckhout, L},
title = {{Characterizing the branch misprediction penalty}},
journal = {Performance Analysis of Systems and Software, 2006 IEEE International Symposium on},
year = {2006},
pages = {48–58},
annote = {1. This paper illustrate that the overhead of branch miss-prediction can be substantially larger than the front-end pipeline length due to the branch resolution time.
2. This paper argue that for a balanced superscalar processor, the issue buffer and ROB size should be adequate to achieve the maximum issue rate.}
}
Notes: Recommend, NoTag, Download

@inproceedings{Joseph:2006ju,
author = {Joseph, P J and Vaswani, Kapil and Thazhuthaveetil, Matthew J},
title = {{A Predictive Performance Model for Superscalar Processors}},
booktitle = {MICRO 39: Proceedings of the 39th Annual IEEE/ACM International Symposium on Microarchitecture},
year = {2006},
pages = {161–170},
publisher = { IEEE Computer Society},
month = dec
}
Notes: Recommend, MICAearly’TOCS06, Download

@article{Joshi:2006dw,
author = {Joshi, A and Phansalkar, Aashish and Eeckhout, L and John, L K},
title = {{Measuring benchmark similarity using inherent program characteristics}},
journal = {Computers, IEEE Transactions on},
year = {2006},
volume = {55},
number = {6},
pages = {769–782}
}
Notes: Recommend, NoTag, Download

@inproceedings{Eyerman:2006jm,
author = {Eyerman, S and Eeckhout, L and De Bosschere, K},
title = {{Efficient Design Space Exploration of High Performance Embedded Out-of-Order Processors}},
booktitle = {Design, Automation and Test in Europe, 2006. DATE ’06. Proceedings},
year = {2006},
pages = {1–6},
publisher = {IEEE}
}
Notes: Ref, SPEC06dis

@article{Henning:2006th,
author = {Henning, J L},
title = {{SPEC CPU2006 benchmark descriptions}},
journal = {ACM SIGARCH Computer Architecture News},
year = {2006}
}
Notes: Recommend, InherentSimilarity, Download

@inproceedings{Hoste:2006ji,
author = {Hoste, Kenneth and Phansalkar, Aashish and Eeckhout, Lieven and Georges, Andy and John, Lizy K and De Bosschere, Koen},
title = {{Performance prediction based on inherent program similarity}},
booktitle = {PACT ’06: Proceedings of the 15th international conference on Parallel architectures and compilation techniques},
year = {2006},
pages = {114–122},
publisher = { ACM Request Permissions},
address = {New York, New York, USA},
month = sep
}
Notes: Ref, IntelPin

@inproceedings{Luk:2005hw,
author = {Luk, Chi-Keung and Cohn, Robert and Muth, Robert and Patil, Harish and Klauser, Artur and Lowney, Geoff and Wallace, Steven and Reddi, Vijay Janapa and Hazelwood, Kim},
title = {{Pin: building customized program analysis tools with dynamic instrumentation}},
booktitle = {PLDI ’05: Proceedings of the 2005 ACM SIGPLAN conference on Programming language design and implementation},
year = {2005},
pages = {190–200},
publisher = { ACM Request Permissions},
address = {New York, New York, USA},
month = jun
}
Notes: Ref, HeLei

@article{Simonson:2005fj,
author = {Simonson, Lucanus J and He, Lei},
title = {{Micro-architecture Performance Estimation by Formula}},
journal = {Embedded Computer Systems: Architectures},
year = {2005},
pages = {192–201}
}
Notes: Ext, NoTag

@book{Kuhn:2005wk,
author = {Kuhn, Michael},
title = {{Manual for the implementation of neural networks in MATLAB}},
publisher = {GRIN Verlag},
year = {2005},
month = dec
}
Notes: Ext, NoTag

@article{康耀红:2005wc,
author = {康耀红, 周开利},
title = {{神经网络模型及其 MATLAB 仿真程序设计}},
year = {2005}
}
Notes: VIP, Simpoint3.0, Download

@article{Hamerly:2005un,
author = {Hamerly, G and Perelman, E and Lau, J and Calder, B},
title = {{Simpoint 3.0: Faster and more flexible program phase analysis}},
journal = {Journal of Instruction Level {\ldots}},
year = {2005}
}

2005年以前

Notes: Ref, NoTag

@article{罗成汉:2004wu,
author = {{罗成汉}},
title = {{基于 MATLAB 神经网络工具箱的 BP 网络实现}},
journal = {计算机仿真},
year = {2004}
}
Notes: VIP, ISCA’04, Download

@inproceedings{Karkhanis:2004eg,
author = {Karkhanis, T S and Smith, J E},
title = {{A first-order superscalar processor model}},
booktitle = {Computer Architecture, 2004. Proceedings. 31st Annual International Symposium on},
year = {2004},
pages = {338–349},
publisher = {IEEE},
annote = {这是提出区间一阶超标量处理器模型的第一篇论文,非常重要}
}
Notes: VIP, MLP, Download

@inproceedings{Chou:2004fm,
author = {Chou, Yuan and Fahs, B and Abraham, S},
title = {{Microarchitecture optimizations for exploiting memory-level parallelism}},
booktitle = {Computer Architecture, 2004. Proceedings. 31st Annual International Symposium on},
year = {2004},
pages = {76–87},
publisher = {IEEE}
}
Notes: Ref, NoTag

@inproceedings{Wunderlich:2003hq,
author = {Wunderlich, R E and Wenisch, T F and Falsafi, B and Hoe, J C},
title = {{SMARTS: accelerating microarchitecture simulation via rigorous statistical sampling}},
booktitle = {Computer Architecture, 2003. Proceedings. 30th Annual International Symposium on},
year = {2003},
pages = {84–95}
}
Notes: Ext, NoTag

@article{Chen:2002ts,
author = {Chen, G and Kandemir, M and Vijaykrishnan, N},
title = {{Pennbench: A benchmark suite for embedded java}},
journal = {{\ldots} Characterization},
year = {2002}
}
Notes: Recommend, SimpleScalar, Download

@article{Austin:2002hq,
author = {Austin, T and Larson, E and Ernst, D},
title = {{SimpleScalar: an infrastructure for computer system modeling}},
journal = {Computer},
year = {2002},
volume = {35},
number = {2},
pages = {59–67}
}
Notes: VIP, PipeDepth, Download

@inproceedings{Hartstein:2002jt,
author = {Hartstein, A and Puzak, T R},
title = {{The optimum pipeline depth for a microprocessor}},
booktitle = {Computer Architecture, 2002. Proceedings. 29th Annual International Symposium on},
year = {2002},
pages = {7–13},
publisher = {IEEE Comput. Soc}
}
Notes: VIP, Mibench, Download

@book{Guthaus:2001ig,
author = {Guthaus, M R and Ringenberg, J S and Ernst, D and Austin, T M and Mudge, T and Brown, R B},
title = {{MiBench: A free, commercially representative embedded benchmark suite}},
publisher = {IEEE},
year = {2001},
month = dec
}
Notes: Recommend, BranchTransitionRate, Download

@inproceedings{Haungs:2000dr,
author = {Haungs, M and Sallee, P and Farrens, M},
title = {{Branch transition rate: a new metric for improved branch classification analysis}},
booktitle = {High-Performance Computer Architecture, 2000. HPCA-6. Proceedings. Sixth International Symposium on},
year = {2000},
pages = {241–250},
publisher = {IEEE Comput. Soc}
}
Notes: Recommend, Wattch

@inproceedings{Brooks:2000gi,
author = {Brooks, D and Tiwari, V and Martonosi, M},
title = {{Wattch: a framework for architectural-level power analysis and optimizations}},
booktitle = {Computer Architecture, 2000. Proceedings of the 27th International Symposium on},
year = {2000},
pages = {83–94},
publisher = {ACM}
}
Notes: Ref, ModelChallenges

@article{Bose:1999jv,
author = {Bose, P and Conte, T M and Austin, T M},
title = {{Challenges in processor modeling and validation [Guest Editors’ introduction]}},
journal = {Micro, IEEE},
year = {1999},
volume = {19},
number = {3},
pages = {9–14}
}
Notes: VIP, ASLOPS’99, Download

@inproceedings{Michaud:1999kd,
author = {Michaud, P and Seznec, A and Jourdan, S},
title = {{Exploring instruction-fetch bandwidth requirement in wide-issue superscalar processors}},
booktitle = {Parallel Architectures and Compilation Techniques, 1999. Proceedings. 1999 International Conference on},
year = {1999},
pages = {2–10},
publisher = { IEEE Computer Society}
}
Notes: Ref, NoTag

@article{Black:1998ey,
author = {Black, B and Shen, J P},
title = {{Calibration of microprocessor performance models}},
journal = {Computer},
year = {1998},
volume = {31},
number = {5},
pages = {59–65}
}
Notes: Ref, NoTag

@article{Bose:1998jz,
author = {Bose, P and Conte, T M},
title = {{Performance analysis and its impact on design}},
journal = {Computer},
year = {1998},
volume = {31},
number = {5},
pages = {41–49}
}
Notes: VIP, CISCvsRISC, Download

@article{Bhandarkar:1997fl,
author = {Bhandarkar, Dileep},
title = {{RISC versus CISC: a tale of two chips}},
journal = {SIGARCH Computer Architecture News},
year = {1997},
volume = {25},
number = {1},
pages = {1–12},
month = mar
}
Notes: Ref, NoTag

@article{Chen:1996gj,
author = {Chen, I-Cheng K and Coffey, John T and Mudge, Trevor N and Mudge, Trevor N},
title = {{Analysis of branch prediction via data compression}},
journal = {ACM SIGPLAN Notices},
year = {1996},
volume = {30},
number = {5},
pages = {128–137},
month = oct
}
Notes: Ref, NoTag

@inproceedings{Noonburg:1994bv,
author = {Noonburg, Derek B and Shen, John P},
title = {{Theoretical modeling of superscalar processor performance}},
booktitle = {MICRO 27: Proceedings of the 27th annual international symposium on Microarchitecture},
year = {1994},
pages = {52–62},
publisher = { ACM Request Permissions},
address = {New York, New York, USA},
month = nov
}
Notes: Ref, NoTag

@article{Smith:1992ue,
author = {Smith, A J and Saavedra, R H},
title = {{Analysis of Benchmark Characteristics and Benchmark Performance Prediction}},
journal = {ACM Transactions on Computer {\ldots}},
year = {1992}
}
Notes: Ref, NoTag

@article{Hill:1989hv,
author = {Hill, M D and Smith, A J},
title = {{Evaluating associativity in CPU caches}},
journal = {Computers, IEEE Transactions on},
year = {1989},
volume = {38},
number = {12},
pages = {1612–1630},
month = dec
}
Notes: Ref, IW’72

@article{Riseman:1972bh,
author = {Riseman, E M and Foster, Caxton C},
title = {{The Inhibition of Potential Parallelism by Conditional Jumps}},
journal = {Computers, IEEE Transactions on},
year = {1972},
number = {12},
pages = {1405–1411}
}

专业书籍相关

Notes: Ref, NoTag

@book{Kritikakou:2014vv,
author = {Kritikakou, Angeliki and Catthoor, Francky and Goutis, Costas},
title = {{Scalable and Near-Optimal Design Space Exploration for Embedded Systems}},
publisher = {Springer},
year = {2014},
month = mar
}
Notes: Ref, Validation

@book{Qin:2012uv,
author = {Qin, Xiaoke and Mishra, Prabhat and Koo, Heon-Mo},
title = {{System-Level Validation}},
publisher = {Springer Science {\&} Business Media},
year = {2012},
series = {High-Level Modeling and Directed Test Generation Techniques},
month = aug
}
Notes: VIP, CompEvaluation, Download

@article{Eeckhout:2010iq,
author = {Eeckhout, Lieven},
title = {{Computer Architecture Performance Evaluation Methods}},
journal = {Synthesis Lectures on Computer Architecture},
year = {2010},
volume = {5},
number = {1},
pages = {1–145},
month = dec
}
Notes: Ref, NoTag

@book{Baer:2010ux,
author = {Baer, Jean-Loup},
title = {{Microprocessor Architecture}},
publisher = {Cambridge University Press},
year = {2010},
series = {From Simple Pipelines to Chip Multiprocessors},
month = jan
}
Notes: Ref, NoTag

@book{Keckler:2009wb,
author = {Keckler, Stephen W and Olukotun, Kunle and Hofstee, H Peter},
title = {{Multicore Processors and Systems}},
publisher = {Springer},
year = {2009},
month = aug
}
Notes: Ref, NoTag

@book{Bansal:2009tl,
author = {Bansal, Raj Kumar and Goel, Ashok and Sharma, Manoj Kumar},
title = {{MATLAB and Its Applications in Engineering}},
publisher = {Pearson Education India},
year = {2009}
}
Notes: Ext, NoTag

@book{Kaeli:2009vi,
author = {Kaeli, David and Sachs, Kai},
title = {{Computer Performance Evaluation and Benchmarking}},
publisher = {Springer},
year = {2009},
series = {SPEC Benchmark Workshop 2009, Austin, TX, USA, January 25, 2009, Proceedings},
month = jan
}
Notes: Ref, ANNToolBox

@booklet{Demuth:2007wg,
title = {{Neural Network Toolbox}},
author = {Demuth, Howard and Beale, Mark and Hagan, Martin T},
year = {2007}
}
Notes: Ref, NoTag

@book{Sivanandam:2006vj,
author = {Sivanandam, S N and Deepa, S N},
title = {{Introduction to Neural Networks Using Matlab 6.0}},
publisher = {Tata McGraw-Hill Education},
year = {2006}
}
Notes: Ref, RISC

@book{Dandamudi:2005wp,
author = {Dandamudi, Sivarama P},
title = {{Guide to RISC Processors}},
publisher = {Springer},
year = {2005},
series = {for Programmers and Engineers},
month = dec
}
Notes: Ext, NoTag

@book{Bader:2005ck,
author = {Bader, D A and Li, Yue and Li, Tao and Sachdeva, V},
title = {{BioPerf: a benchmark suite to evaluate high-performance computer architecture on bioinformatics applications}},
publisher = {IEEE},
year = {2005}
}
Notes: Ref, NoTag

@book{Sherwood:2002fa,
author = {Sherwood, Timothy and Perelman, Erez and Hamerly, Greg and Calder, Brad and Sherwood, Timothy and Perelman, Erez and Hamerly, Greg and Calder, Brad and Sherwood, Timothy and Perelman, Erez and Hamerly, Greg and Calder, Brad and Sherwood, Timothy and Perelman, Erez and Hamerly, Greg and Calder, Brad},
title = {{Automatically characterizing large scale program behavior}},
publisher = {ACM},
year = {2002},
volume = {37},
month = oct
}
Notes: Ref, NoTag

@book{Thumann:2001uh,
author = {Thumann, Albert and Mehta, D Paul},
title = {{Handbook of Energy Engineering, Sixth Edition}},
publisher = {CRC Press},
year = {2001},
month = jan
}

硕博论文

Notes: Recommend, CacheCoherence, Download

@article{Wiener:2012tf,
author = {Wiener, U},
title = {{Modeling and Analysis of a Cache Coherent Interconnect}},
journal = {alexandria.tue.nl},
year = {2012}
}
Notes: VIP, AndroidBench, Download

@article{Jenkins:2012ff,
author = {Jenkins, I R},
title = {{Android Benchmarking For Architectural Research}},
journal = {Transactions on Design Automation of Electronic Systems (TODAES},
year = {2012},
volume = {19},
number = {2}

Advertisements

发表评论

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / 更改 )

Twitter picture

You are commenting using your Twitter account. Log Out / 更改 )

Facebook photo

You are commenting using your Facebook account. Log Out / 更改 )

Google+ photo

You are commenting using your Google+ account. Log Out / 更改 )

Connecting to %s